summaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2016-06-21 10:10:38 +1000
committerDave Chinner <david@fromorbit.com>2016-06-21 10:10:38 +1000
commit9b7fad20760b8f47730f0353459dd39a89c415b9 (patch)
treea0465f6d4bb6dceaf3a6412e1c30b31096fdf712 /fs/xfs
parent07931b7be70916055b882c6a379a3016f5772681 (diff)
parent3c2bdc912a1cc050db7e858aabe564cb382c9c30 (diff)
downloadlinux-stable-9b7fad20760b8f47730f0353459dd39a89c415b9.tar.gz
linux-stable-9b7fad20760b8f47730f0353459dd39a89c415b9.tar.bz2
linux-stable-9b7fad20760b8f47730f0353459dd39a89c415b9.zip
Merge branch 'xfs-4.8-iomap-write' into for-next
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Kconfig1
-rw-r--r--fs/xfs/xfs_aops.c283
-rw-r--r--fs/xfs/xfs_bmap_util.c343
-rw-r--r--fs/xfs/xfs_file.c193
-rw-r--r--fs/xfs/xfs_inode.h3
-rw-r--r--fs/xfs/xfs_iomap.c171
-rw-r--r--fs/xfs/xfs_iomap.h7
-rw-r--r--fs/xfs/xfs_iops.c113
-rw-r--r--fs/xfs/xfs_pnfs.c26
-rw-r--r--fs/xfs/xfs_trace.h3
10 files changed, 367 insertions, 776 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 5d47b4df61ea..35faf128f36d 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -4,6 +4,7 @@ config XFS_FS
depends on (64BIT || LBDAF)
select EXPORTFS
select LIBCRC32C
+ select FS_IOMAP
help
XFS is a high performance journaling filesystem which originated
on the SGI IRIX platform. It is completely multi-threaded, can
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4c463b99fe57..80714ebd54c0 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1143,6 +1143,8 @@ __xfs_get_blocks(
ssize_t size;
int new = 0;
+ BUG_ON(create && !direct);
+
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
@@ -1150,22 +1152,14 @@ __xfs_get_blocks(
ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
size = bh_result->b_size;
- if (!create && direct && offset >= i_size_read(inode))
+ if (!create && offset >= i_size_read(inode))
return 0;
/*
* Direct I/O is usually done on preallocated files, so try getting
- * a block mapping without an exclusive lock first. For buffered
- * writes we already have the exclusive iolock anyway, so avoiding
- * a lock roundtrip here by taking the ilock exclusive from the
- * beginning is a useful micro optimization.
+ * a block mapping without an exclusive lock first.
*/
- if (create && !direct) {
- lockmode = XFS_ILOCK_EXCL;
- xfs_ilock(ip, lockmode);
- } else {
- lockmode = xfs_ilock_data_map_shared(ip);
- }
+ lockmode = xfs_ilock_data_map_shared(ip);
ASSERT(offset <= mp->m_super->s_maxbytes);
if (offset + size > mp->m_super->s_maxbytes)
@@ -1184,37 +1178,19 @@ __xfs_get_blocks(
(imap.br_startblock == HOLESTARTBLOCK ||
imap.br_startblock == DELAYSTARTBLOCK) ||
(IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
- if (direct || xfs_get_extsz_hint(ip)) {
- /*
- * xfs_iomap_write_direct() expects the shared lock. It
- * is unlocked on return.
- */
- if (lockmode == XFS_ILOCK_EXCL)
- xfs_ilock_demote(ip, lockmode);
-
- error = xfs_iomap_write_direct(ip, offset, size,
- &imap, nimaps);
- if (error)
- return error;
- new = 1;
+ /*
+ * xfs_iomap_write_direct() expects the shared lock. It
+ * is unlocked on return.
+ */
+ if (lockmode == XFS_ILOCK_EXCL)
+ xfs_ilock_demote(ip, lockmode);
- } else {
- /*
- * Delalloc reservations do not require a transaction,
- * we can go on without dropping the lock here. If we
- * are allocating a new delalloc block, make sure that
- * we set the new flag so that we mark the buffer new so
- * that we know that it is newly allocated if the write
- * fails.
- */
- if (nimaps && imap.br_startblock == HOLESTARTBLOCK)
- new = 1;
- error = xfs_iomap_write_delay(ip, offset, size, &imap);
- if (error)
- goto out_unlock;
+ error = xfs_iomap_write_direct(ip, offset, size,
+ &imap, nimaps);
+ if (error)
+ return error;
+ new = 1;
- xfs_iunlock(ip, lockmode);
- }
trace_xfs_get_blocks_alloc(ip, offset, size,
ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
: XFS_IO_DELALLOC, &imap);
@@ -1235,9 +1211,7 @@ __xfs_get_blocks(
}
/* trim mapping down to size requested */
- if (direct || size > (1 << inode->i_blkbits))
- xfs_map_trim_size(inode, iblock, bh_result,
- &imap, offset, size);
+ xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
/*
* For unwritten extents do not report a disk address in the buffered
@@ -1250,7 +1224,7 @@ __xfs_get_blocks(
if (ISUNWRITTEN(&imap))
set_buffer_unwritten(bh_result);
/* direct IO needs special help */
- if (create && direct) {
+ if (create) {
if (dax_fault)
ASSERT(!ISUNWRITTEN(&imap));
else
@@ -1279,14 +1253,7 @@ __xfs_get_blocks(
(new || ISUNWRITTEN(&imap))))
set_buffer_new(bh_result);
- if (imap.br_startblock == DELAYSTARTBLOCK) {
- BUG_ON(direct);
- if (create) {
- set_buffer_uptodate(bh_result);
- set_buffer_mapped(bh_result);
- set_buffer_delay(bh_result);
- }
- }
+ BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK);
return 0;
@@ -1427,216 +1394,6 @@ xfs_vm_direct_IO(
xfs_get_blocks_direct, endio, NULL, flags);
}
-/*
- * Punch out the delalloc blocks we have already allocated.
- *
- * Don't bother with xfs_setattr given that nothing can have made it to disk yet
- * as the page is still locked at this point.
- */
-STATIC void
-xfs_vm_kill_delalloc_range(
- struct inode *inode,
- loff_t start,
- loff_t end)
-{
- struct xfs_inode *ip = XFS_I(inode);
- xfs_fileoff_t start_fsb;
- xfs_fileoff_t end_fsb;
- int error;
-
- start_fsb = XFS_B_TO_FSB(ip->i_mount, start);
- end_fsb = XFS_B_TO_FSB(ip->i_mount, end);
- if (end_fsb <= start_fsb)
- return;
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
- end_fsb - start_fsb);
- if (error) {
- /* something screwed, just bail */
- if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- xfs_alert(ip->i_mount,
- "xfs_vm_write_failed: unable to clean up ino %lld",
- ip->i_ino);
- }
- }
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-}
-
-STATIC void
-xfs_vm_write_failed(
- struct inode *inode,
- struct page *page,
- loff_t pos,
- unsigned len)
-{
- loff_t block_offset;
- loff_t block_start;
- loff_t block_end;
- loff_t from = pos & (PAGE_SIZE - 1);
- loff_t to = from + len;
- struct buffer_head *bh, *head;
- struct xfs_mount *mp = XFS_I(inode)->i_mount;
-
- /*
- * The request pos offset might be 32 or 64 bit, this is all fine
- * on 64-bit platform. However, for 64-bit pos request on 32-bit
- * platform, the high 32-bit will be masked off if we evaluate the
- * block_offset via (pos & PAGE_MASK) because the PAGE_MASK is
- * 0xfffff000 as an unsigned long, hence the result is incorrect
- * which could cause the following ASSERT failed in most cases.
- * In order to avoid this, we can evaluate the block_offset of the
- * start of the page by using shifts rather than masks the mismatch
- * problem.
- */
- block_offset = (pos >> PAGE_SHIFT) << PAGE_SHIFT;
-
- ASSERT(block_offset + from == pos);
-
- head = page_buffers(page);
- block_start = 0;
- for (bh = head; bh != head || !block_start;
- bh = bh->b_this_page, block_start = block_end,
- block_offset += bh->b_size) {
- block_end = block_start + bh->b_size;
-
- /* skip buffers before the write */
- if (block_end <= from)
- continue;
-
- /* if the buffer is after the write, we're done */
- if (block_start >= to)
- break;
-
- /*
- * Process delalloc and unwritten buffers beyond EOF. We can
- * encounter unwritten buffers in the event that a file has
- * post-EOF unwritten extents and an extending write happens to
- * fail (e.g., an unaligned write that also involves a delalloc
- * to the same page).
- */
- if (!buffer_delay(bh) && !buffer_unwritten(bh))
- continue;
-
- if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) &&
- block_offset < i_size_read(inode))
- continue;
-
- if (buffer_delay(bh))
- xfs_vm_kill_delalloc_range(inode, block_offset,
- block_offset + bh->b_size);
-
- /*
- * This buffer does not contain data anymore. make sure anyone
- * who finds it knows that for certain.
- */
- clear_buffer_delay(bh);
- clear_buffer_uptodate(bh);
- clear_buffer_mapped(bh);
- clear_buffer_new(bh);
- clear_buffer_dirty(bh);
- clear_buffer_unwritten(bh);
- }
-
-}
-
-/*
- * This used to call block_write_begin(), but it unlocks and releases the page
- * on error, and we need that page to be able to punch stale delalloc blocks out
- * on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at
- * the appropriate point.
- */
-STATIC int
-xfs_vm_write_begin(
- struct file *file,
- struct address_space *mapping,
- loff_t pos,
- unsigned len,
- unsigned flags,
- struct page **pagep,
- void **fsdata)
-{
- pgoff_t index = pos >> PAGE_SHIFT;
- struct page *page;
- int status;
- struct xfs_mount *mp = XFS_I(mapping->host)->i_mount;
-
- ASSERT(len <= PAGE_SIZE);
-
- page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page)
- return -ENOMEM;
-
- status = __block_write_begin(page, pos, len, xfs_get_blocks);
- if (xfs_mp_fail_writes(mp))
- status = -EIO;
- if (unlikely(status)) {
- struct inode *inode = mapping->host;
- size_t isize = i_size_read(inode);
-
- xfs_vm_write_failed(inode, page, pos, len);
- unlock_page(page);
-
- /*
- * If the write is beyond EOF, we only want to kill blocks
- * allocated in this write, not blocks that were previously
- * written successfully.
- */
- if (xfs_mp_fail_writes(mp))
- isize = 0;
- if (pos + len > isize) {
- ssize_t start = max_t(ssize_t, pos, isize);
-
- truncate_pagecache_range(inode, start, pos + len);
- }
-
- put_page(page);
- page = NULL;
- }
-
- *pagep = page;
- return status;
-}
-
-/*
- * On failure, we only need to kill delalloc blocks beyond EOF in the range of
- * this specific write because they will never be written. Previous writes
- * beyond EOF where block allocation succeeded do not need to be trashed, so
- * only new blocks from this write should be trashed. For blocks within
- * EOF, generic_write_end() zeros them so they are safe to leave alone and be
- * written with all the other valid data.
- */
-STATIC int
-xfs_vm_write_end(
- struct file *file,
- struct address_space *mapping,
- loff_t pos,
- unsigned len,
- unsigned copied,
- struct page *page,
- void *fsdata)
-{
- int ret;
-
- ASSERT(len <= PAGE_SIZE);
-
- ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
- if (unlikely(ret < len)) {
- struct inode *inode = mapping->host;
- size_t isize = i_size_read(inode);
- loff_t to = pos + len;
-
- if (to > isize) {
- /* only kill blocks in this write beyond EOF */
- if (pos > isize)
- isize = pos;
- xfs_vm_kill_delalloc_range(inode, isize, to);
- truncate_pagecache_range(inode, isize, to);
- }
- }
- return ret;
-}
-
STATIC sector_t
xfs_vm_bmap(
struct address_space *mapping,
@@ -1747,8 +1504,6 @@ const struct address_space_operations xfs_address_space_operations = {
.set_page_dirty = xfs_vm_set_page_dirty,
.releasepage = xfs_vm_releasepage,
.invalidatepage = xfs_vm_invalidatepage,
- .write_begin = xfs_vm_write_begin,
- .write_end = xfs_vm_write_end,
.bmap = xfs_vm_bmap,
.direct_IO = xfs_vm_direct_IO,
.migratepage = buffer_migrate_page,
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 28c42fb0c12a..91bee2db3207 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1087,99 +1087,120 @@ error1: /* Just cancel transaction */
return error;
}
-/*
- * Zero file bytes between startoff and endoff inclusive.
- * The iolock is held exclusive and no blocks are buffered.
- *
- * This function is used by xfs_free_file_space() to zero
- * partial blocks when the range to free is not block aligned.
- * When unreserving space with boundaries that are not block
- * aligned we round up the start and round down the end
- * boundaries and then use this function to zero the parts of
- * the blocks that got dropped during the rounding.
- */
-STATIC int
-xfs_zero_remaining_bytes(
- xfs_inode_t *ip,
- xfs_off_t startoff,
- xfs_off_t endoff)
+static int
+xfs_unmap_extent(
+ struct xfs_inode *ip,
+ xfs_fileoff_t startoffset_fsb,
+ xfs_filblks_t len_fsb,
+ int *done)
{
- xfs_bmbt_irec_t imap;
- xfs_fileoff_t offset_fsb;
- xfs_off_t lastoffset;
- xfs_off_t offset;
- xfs_buf_t *bp;
- xfs_mount_t *mp = ip->i_mount;
- int nimap;
- int error = 0;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ struct xfs_bmap_free free_list;
+ xfs_fsblock_t firstfsb;
+ uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+ int error;
- /*
- * Avoid doing I/O beyond eof - it's not necessary
- * since nothing can read beyond eof. The space will
- * be zeroed when the file is extended anyway.
- */
- if (startoff >= XFS_ISIZE(ip))
- return 0;
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
+ if (error) {
+ ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
+ return error;
+ }
- if (endoff > XFS_ISIZE(ip))
- endoff = XFS_ISIZE(ip);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot,
+ ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS);
+ if (error)
+ goto out_trans_cancel;
- for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
- uint lock_mode;
+ xfs_trans_ijoin(tp, ip, 0);
- offset_fsb = XFS_B_TO_FSBT(mp, offset);
- nimap = 1;
+ xfs_bmap_init(&free_list, &firstfsb);
+ error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb,
+ &free_list, done);
+ if (error)
+ goto out_bmap_cancel;
- lock_mode = xfs_ilock_data_map_shared(ip);
- error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0);
- xfs_iunlock(ip, lock_mode);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
+ if (error)
+ goto out_bmap_cancel;
- if (error || nimap < 1)
- break;
- ASSERT(imap.br_blockcount >= 1);
- ASSERT(imap.br_startoff == offset_fsb);
- ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
+ error = xfs_trans_commit(tp);
+out_unlock:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
- if (imap.br_startblock == HOLESTARTBLOCK ||
- imap.br_state == XFS_EXT_UNWRITTEN) {
- /* skip the entire extent */
- lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff +
- imap.br_blockcount) - 1;
- continue;
- }
+out_bmap_cancel:
+ xfs_bmap_cancel(&free_list);
+out_trans_cancel:
+ xfs_trans_cancel(tp);
+ goto out_unlock;
+}
- lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
- if (lastoffset > endoff)
- lastoffset = endoff;
+static int
+xfs_adjust_extent_unmap_boundaries(
+ struct xfs_inode *ip,
+ xfs_fileoff_t *startoffset_fsb,
+ xfs_fileoff_t *endoffset_fsb)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_bmbt_irec imap;
+ int nimap, error;
+ xfs_extlen_t mod = 0;
- /* DAX can just zero the backing device directly */
- if (IS_DAX(VFS_I(ip))) {
- error = dax_zero_page_range(VFS_I(ip), offset,
- lastoffset - offset + 1,
- xfs_get_blocks_direct);
- if (error)
- return error;
- continue;
- }
+ nimap = 1;
+ error = xfs_bmapi_read(ip, *startoffset_fsb, 1, &imap, &nimap, 0);
+ if (error)
+ return error;
- error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ?
- mp->m_rtdev_targp : mp->m_ddev_targp,
- xfs_fsb_to_db(ip, imap.br_startblock),
- BTOBB(mp->m_sb.sb_blocksize),
- 0, &bp, NULL);
- if (error)
- return error;
+ if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
+ xfs_daddr_t block;
- memset(bp->b_addr +
- (offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
- 0, lastoffset - offset + 1);
+ ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
+ block = imap.br_startblock;
+ mod = do_div(block, mp->m_sb.sb_rextsize);
+ if (mod)
+ *startoffset_fsb += mp->m_sb.sb_rextsize - mod;
+ }
- error = xfs_bwrite(bp);
- xfs_buf_relse(bp);
- if (error)
- return error;
+ nimap = 1;
+ error = xfs_bmapi_read(ip, *endoffset_fsb - 1, 1, &imap, &nimap, 0);
+ if (error)
+ return error;
+
+ if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
+ ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
+ mod++;
+ if (mod && mod != mp->m_sb.sb_rextsize)
+ *endoffset_fsb -= mod;
}
- return error;
+
+ return 0;
+}
+
+static int
+xfs_flush_unmap_range(
+ struct xfs_inode *ip,
+ xfs_off_t offset,
+ xfs_off_t len)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct inode *inode = VFS_I(ip);
+ xfs_off_t rounding, start, end;
+ int error;
+
+ /* wait for the completion of any pending DIOs */
+ inode_dio_wait(inode);
+
+ rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
+ start = round_down(offset, rounding);
+ end = round_up(offset + len, rounding) - 1;
+
+ error = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (error)
+ return error;
+ truncate_pagecache_range(inode, start, end);
+ return 0;
}
int
@@ -1188,24 +1209,10 @@ xfs_free_file_space(
xfs_off_t offset,
xfs_off_t len)
{
- int done;
- xfs_fileoff_t endoffset_fsb;
- int error;
- xfs_fsblock_t firstfsb;
- xfs_bmap_free_t free_list;
- xfs_bmbt_irec_t imap;
- xfs_off_t ioffset;
- xfs_off_t iendoffset;
- xfs_extlen_t mod=0;
- xfs_mount_t *mp;
- int nimap;
- uint resblks;
- xfs_off_t rounding;
- int rt;
+ struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t startoffset_fsb;
- xfs_trans_t *tp;
-
- mp = ip->i_mount;
+ xfs_fileoff_t endoffset_fsb;
+ int done = 0, error;
trace_xfs_free_file_space(ip);
@@ -1213,135 +1220,45 @@ xfs_free_file_space(
if (error)
return error;
- error = 0;
if (len <= 0) /* if nothing being freed */
- return error;
- rt = XFS_IS_REALTIME_INODE(ip);
- startoffset_fsb = XFS_B_TO_FSB(mp, offset);
- endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
-
- /* wait for the completion of any pending DIOs */
- inode_dio_wait(VFS_I(ip));
+ return 0;
- rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
- ioffset = round_down(offset, rounding);
- iendoffset = round_up(offset + len, rounding) - 1;
- error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset,
- iendoffset);
+ error = xfs_flush_unmap_range(ip, offset, len);
if (error)
- goto out;
- truncate_pagecache_range(VFS_I(ip), ioffset, iendoffset);
+ return error;
+
+ startoffset_fsb = XFS_B_TO_FSB(mp, offset);
+ endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
/*
- * Need to zero the stuff we're not freeing, on disk.
- * If it's a realtime file & can't use unwritten extents then we
- * actually need to zero the extent edges. Otherwise xfs_bunmapi
- * will take care of it for us.
+ * Need to zero the stuff we're not freeing, on disk. If it's a RT file
+ * and we can't use unwritten extents then we actually need to ensure
+ * to zero the whole extent, otherwise we just need to take of block
+ * boundaries, and xfs_bunmapi will handle the rest.
*/
- if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
- nimap = 1;
- error = xfs_bmapi_read(ip, startoffset_fsb, 1,
- &imap, &nimap, 0);
+ if (XFS_IS_REALTIME_INODE(ip) &&
+ !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
+ error = xfs_adjust_extent_unmap_boundaries(ip, &startoffset_fsb,
+ &endoffset_fsb);
if (error)
- goto out;
- ASSERT(nimap == 0 || nimap == 1);
- if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
- xfs_daddr_t block;
-
- ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
- block = imap.br_startblock;
- mod = do_div(block, mp->m_sb.sb_rextsize);
- if (mod)
- startoffset_fsb += mp->m_sb.sb_rextsize - mod;
- }
- nimap = 1;
- error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1,
- &imap, &nimap, 0);
- if (error)
- goto out;
- ASSERT(nimap == 0 || nimap == 1);
- if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
- ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
- mod++;
- if (mod && (mod != mp->m_sb.sb_rextsize))
- endoffset_fsb -= mod;
- }
- }
- if ((done = (endoffset_fsb <= startoffset_fsb)))
- /*
- * One contiguous piece to clear
- */
- error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1);
- else {
- /*
- * Some full blocks, possibly two pieces to clear
- */
- if (offset < XFS_FSB_TO_B(mp, startoffset_fsb))
- error = xfs_zero_remaining_bytes(ip, offset,
- XFS_FSB_TO_B(mp, startoffset_fsb) - 1);
- if (!error &&
- XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len)
- error = xfs_zero_remaining_bytes(ip,
- XFS_FSB_TO_B(mp, endoffset_fsb),
- offset + len - 1);
+ return error;
}
- /*
- * free file space until done or until there is an error
- */
- resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
- while (!error && !done) {
-
- /*
- * allocate and setup the transaction. Allow this
- * transaction to dip into the reserve blocks to ensure
- * the freeing of the space succeeds at ENOSPC.
- */
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
- &tp);
- if (error) {
- ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
- break;
+ if (endoffset_fsb > startoffset_fsb) {
+ while (!done) {
+ error = xfs_unmap_extent(ip, startoffset_fsb,
+ endoffset_fsb - startoffset_fsb, &done);
+ if (error)
+ return error;
}
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- error = xfs_trans_reserve_quota(tp, mp,
- ip->i_udquot, ip->i_gdquot, ip->i_pdquot,
- resblks, 0, XFS_QMOPT_RES_REGBLKS);
- if (error)
- goto error1;
-
- xfs_trans_ijoin(tp, ip, 0);
-
- /*
- * issue the bunmapi() call to free the blocks
- */
- xfs_bmap_init(&free_list, &firstfsb);
- error = xfs_bunmapi(tp, ip, startoffset_fsb,
- endoffset_fsb - startoffset_fsb,
- 0, 2, &firstfsb, &free_list, &done);
- if (error)
- goto error0;
-
- /*
- * complete the transaction
- */
- error = xfs_bmap_finish(&tp, &free_list, NULL);
- if (error)
- goto error0;
-
- error = xfs_trans_commit(tp);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
- out:
- return error;
-
- error0:
- xfs_bmap_cancel(&free_list);
- error1:
- xfs_trans_cancel(tp);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- goto out;
+ /*
+ * Now that we've unmap all full blocks we'll have to zero out any
+ * partial block at the beginning and/or end. xfs_zero_range is
+ * smart enough to skip any holes, including those we just created.
+ */
+ return xfs_zero_range(ip, offset, len, NULL);
}
/*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 47fc63295422..713991c22781 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -37,6 +37,7 @@
#include "xfs_log.h"
#include "xfs_icache.h"
#include "xfs_pnfs.h"
+#include "xfs_iomap.h"
#include <linux/dcache.h>
#include <linux/falloc.h>
@@ -80,61 +81,17 @@ xfs_rw_ilock_demote(
}
/*
- * xfs_iozero clears the specified range supplied via the page cache (except in
- * the DAX case). Writes through the page cache will allocate blocks over holes,
- * though the callers usually map the holes first and avoid them. If a block is
- * not completely zeroed, then it will be read from disk before being partially
- * zeroed.
- *
- * In the DAX case, we can just directly write to the underlying pages. This
- * will not allocate blocks, but will avoid holes and unwritten extents and so
- * not do unnecessary work.
+ * Clear the specified ranges to zero through either the pagecache or DAX.
+ * Holes and unwritten extents will be left as-is as they already are zeroed.
*/
int
-xfs_iozero(
- struct xfs_inode *ip, /* inode */
- loff_t pos, /* offset in file */
- size_t count) /* size of data to zero */
+xfs_zero_range(
+ struct xfs_inode *ip,
+ xfs_off_t pos,
+ xfs_off_t count,
+ bool *did_zero)
{
- struct page *page;
- struct address_space *mapping;
- int status = 0;
-
-
- mapping = VFS_I(ip)->i_mapping;
- do {
- unsigned offset, bytes;
- void *fsdata;
-
- offset = (pos & (PAGE_SIZE -1)); /* Within page */
- bytes = PAGE_SIZE - offset;
- if (bytes > count)
- bytes = count;
-
- if (IS_DAX(VFS_I(ip))) {
- status = dax_zero_page_range(VFS_I(ip), pos, bytes,
- xfs_get_blocks_direct);
- if (status)
- break;
- } else {
- status = pagecache_write_begin(NULL, mapping, pos, bytes,
- AOP_FLAG_UNINTERRUPTIBLE,
- &page, &fsdata);
- if (status)
- break;
-
- zero_user(page, offset, bytes);
-
- status = pagecache_write_end(NULL, mapping, pos, bytes,
- bytes, page, fsdata);
- WARN_ON(status <= 0); /* can't return less than zero! */
- status = 0;
- }
- pos += bytes;
- count -= bytes;
- } while (count);
-
- return status;
+ return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
}
int
@@ -424,49 +381,6 @@ out:
}
/*
- * This routine is called to handle zeroing any space in the last block of the
- * file that is beyond the EOF. We do this since the size is being increased
- * without writing anything to that block and we don't want to read the
- * garbage on the disk.
- */
-STATIC int /* error (positive) */
-xfs_zero_last_block(
- struct xfs_inode *ip,
- xfs_fsize_t offset,
- xfs_fsize_t isize,
- bool *did_zeroing)
-{
- struct xfs_mount *mp = ip->i_mount;
- xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize);
- int zero_offset = XFS_B_FSB_OFFSET(mp, isize);
- int zero_len;
- int nimaps = 1;
- int error = 0;
- struct xfs_bmbt_irec imap;
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- if (error)
- return error;
-
- ASSERT(nimaps > 0);
-
- /*
- * If the block underlying isize is just a hole, then there
- * is nothing to zero.
- */
- if (imap.br_startblock == HOLESTARTBLOCK)
- return 0;
-
- zero_len = mp->m_sb.sb_blocksize - zero_offset;
- if (isize + zero_len > offset)
- zero_len = offset - isize;
- *did_zeroing = true;
- return xfs_iozero(ip, isize, zero_len);
-}
-
-/*
* Zero any on disk space between the current EOF and the new, larger EOF.
*
* This handles the normal case of zeroing the remainder of the last block in
@@ -484,94 +398,11 @@ xfs_zero_eof(
xfs_fsize_t isize, /* current inode size */
bool *did_zeroing)
{
- struct xfs_mount *mp = ip->i_mount;
- xfs_fileoff_t start_zero_fsb;
- xfs_fileoff_t end_zero_fsb;
- xfs_fileoff_t zero_count_fsb;
- xfs_fileoff_t last_fsb;
- xfs_fileoff_t zero_off;
- xfs_fsize_t zero_len;
- int nimaps;
- int error = 0;
- struct xfs_bmbt_irec imap;
-
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT(offset > isize);
trace_xfs_zero_eof(ip, isize, offset - isize);
-
- /*
- * First handle zeroing the block on which isize resides.
- *
- * We only zero a part of that block so it is handled specially.
- */
- if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
- error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
- if (error)
- return error;
- }
-
- /*
- * Calculate the range between the new size and the old where blocks
- * needing to be zeroed may exist.
- *
- * To get the block where the last byte in the file currently resides,
- * we need to subtract one from the size and truncate back to a block
- * boundary. We subtract 1 in case the size is exactly on a block
- * boundary.
- */
- last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
- start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
- end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
- ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
- if (last_fsb == end_zero_fsb) {
- /*
- * The size was only incremented on its last block.
- * We took care of that above, so just return.
- */
- return 0;
- }
-
- ASSERT(start_zero_fsb <= end_zero_fsb);
- while (start_zero_fsb <= end_zero_fsb) {
- nimaps = 1;
- zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb,
- &imap, &nimaps, 0);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- if (error)
- return error;
-
- ASSERT(nimaps > 0);
-
- if (imap.br_state == XFS_EXT_UNWRITTEN ||
- imap.br_startblock == HOLESTARTBLOCK) {
- start_zero_fsb = imap.br_startoff + imap.br_blockcount;
- ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
- continue;
- }
-
- /*
- * There are blocks we need to zero.
- */
- zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
- zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
-
- if ((zero_off + zero_len) > offset)
- zero_len = offset - zero_off;
-
- error = xfs_iozero(ip, zero_off, zero_len);
- if (error)
- return error;
-
- *did_zeroing = true;
- start_zero_fsb = imap.br_startoff + imap.br_blockcount;
- ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
- }
-
- return 0;
+ return xfs_zero_range(ip, isize, offset - isize, did_zeroing);
}
/*
@@ -841,7 +672,7 @@ xfs_file_buffered_aio_write(
write_retry:
trace_xfs_file_buffered_write(ip, iov_iter_count(from),
iocb->ki_pos, 0);
- ret = generic_perform_write(file, from, iocb->ki_pos);
+ ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
if (likely(ret >= 0))
iocb->ki_pos += ret;
@@ -1553,7 +1384,7 @@ xfs_filemap_page_mkwrite(
if (IS_DAX(inode)) {
ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
} else {
- ret = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+ ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
ret = block_page_mkwrite_return(ret);
}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 99d75223ff2e..0c19d3d05a91 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -427,7 +427,8 @@ int xfs_update_prealloc_flags(struct xfs_inode *ip,
enum xfs_prealloc_flags flags);
int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
xfs_fsize_t isize, bool *did_zeroing);
-int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
+int xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count,
+ bool *did_zero);
loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start,
loff_t eof, int whence);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 58391355a44d..620fc9120444 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -15,6 +15,7 @@
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include <linux/iomap.h>
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
@@ -940,3 +941,173 @@ error_on_bmapi_transaction:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
+
+void
+xfs_bmbt_to_iomap(
+ struct xfs_inode *ip,
+ struct iomap *iomap,
+ struct xfs_bmbt_irec *imap)
+{
+ struct xfs_mount *mp = ip->i_mount;
+
+ if (imap->br_startblock == HOLESTARTBLOCK) {
+ iomap->blkno = IOMAP_NULL_BLOCK;
+ iomap->type = IOMAP_HOLE;
+ } else if (imap->br_startblock == DELAYSTARTBLOCK) {
+ iomap->blkno = IOMAP_NULL_BLOCK;
+ iomap->type = IOMAP_DELALLOC;
+ } else {
+ iomap->blkno = xfs_fsb_to_db(ip, imap->br_startblock);
+ if (imap->br_state == XFS_EXT_UNWRITTEN)
+ iomap->type = IOMAP_UNWRITTEN;
+ else
+ iomap->type = IOMAP_MAPPED;
+ }
+ iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
+ iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
+ iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
+}
+
+static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps)
+{
+ return !nimaps ||
+ imap->br_startblock == HOLESTARTBLOCK ||
+ imap->br_startblock == DELAYSTARTBLOCK;
+}
+
+static int
+xfs_file_iomap_begin(
+ struct inode *inode,
+ loff_t offset,
+ loff_t length,
+ unsigned flags,
+ struct iomap *iomap)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_bmbt_irec imap;
+ xfs_fileoff_t offset_fsb, end_fsb;
+ int nimaps = 1, error = 0;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+ ASSERT(offset <= mp->m_super->s_maxbytes);
+ if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
+ length = mp->m_super->s_maxbytes - offset;
+ offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ end_fsb = XFS_B_TO_FSB(mp, offset + length);
+
+ error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
+ &nimaps, XFS_BMAPI_ENTIRE);
+ if (error) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+ }
+
+ if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) {
+ /*
+ * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
+ * pages to keep the chunks of work done where somewhat symmetric
+ * with the work writeback does. This is a completely arbitrary
+ * number pulled out of thin air as a best guess for initial
+ * testing.
+ *
+ * Note that the values needs to be less than 32-bits wide until
+ * the lower level functions are updated.
+ */
+ length = min_t(loff_t, length, 1024 * PAGE_SIZE);
+ if (xfs_get_extsz_hint(ip)) {
+ /*
+ * xfs_iomap_write_direct() expects the shared lock. It
+ * is unlocked on return.
+ */
+ xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
+ error = xfs_iomap_write_direct(ip, offset, length, &imap,
+ nimaps);
+ } else {
+ error = xfs_iomap_write_delay(ip, offset, length, &imap);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ }
+
+ if (error)
+ return error;
+
+ trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
+ xfs_bmbt_to_iomap(ip, iomap, &imap);
+ } else if (nimaps) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ trace_xfs_iomap_found(ip, offset, length, 0, &imap);
+ xfs_bmbt_to_iomap(ip, iomap, &imap);
+ } else {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ trace_xfs_iomap_not_found(ip, offset, length, 0, &imap);
+ iomap->blkno = IOMAP_NULL_BLOCK;
+ iomap->type = IOMAP_HOLE;
+ iomap->offset = offset;
+ iomap->length = length;
+ }
+
+ return 0;
+}
+
+static int
+xfs_file_iomap_end_delalloc(
+ struct xfs_inode *ip,
+ loff_t offset,
+ loff_t length,
+ ssize_t written)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fileoff_t start_fsb;
+ xfs_fileoff_t end_fsb;
+ int error = 0;
+
+ start_fsb = XFS_B_TO_FSB(mp, offset + written);
+ end_fsb = XFS_B_TO_FSB(mp, offset + length);
+
+ /*
+ * Trim back delalloc blocks if we didn't manage to write the whole
+ * range reserved.
+ *
+ * We don't need to care about racing delalloc as we hold i_mutex
+ * across the reserve/allocate/unreserve calls. If there are delalloc
+ * blocks in the range, they are ours.
+ */
+ if (start_fsb < end_fsb) {
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+ end_fsb - start_fsb);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ if (error && !XFS_FORCED_SHUTDOWN(mp)) {
+ xfs_alert(mp, "%s: unable to clean up ino %lld",
+ __func__, ip->i_ino);
+ return error;
+ }
+ }
+
+ return 0;
+}
+
+static int
+xfs_file_iomap_end(
+ struct inode *inode,
+ loff_t offset,
+ loff_t length,
+ ssize_t written,
+ unsigned flags,
+ struct iomap *iomap)
+{
+ if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
+ return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
+ length, written);
+ return 0;
+}
+
+struct iomap_ops xfs_iomap_ops = {
+ .iomap_begin = xfs_file_iomap_begin,
+ .iomap_end = xfs_file_iomap_end,
+};
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 8688e663d744..e066d045e2ff 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -18,6 +18,8 @@
#ifndef __XFS_IOMAP_H__
#define __XFS_IOMAP_H__
+#include <linux/iomap.h>
+
struct xfs_inode;
struct xfs_bmbt_irec;
@@ -29,4 +31,9 @@ int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t,
struct xfs_bmbt_irec *);
int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
+void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
+ struct xfs_bmbt_irec *);
+
+extern struct iomap_ops xfs_iomap_ops;
+
#endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index c5d4eba6972e..ab820f84ed50 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -38,12 +38,13 @@
#include "xfs_dir2.h"
#include "xfs_trans_space.h"
#include "xfs_pnfs.h"
+#include "xfs_iomap.h"
#include <linux/capability.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
#include <linux/security.h>
-#include <linux/fiemap.h>
+#include <linux/iomap.h>
#include <linux/slab.h>
/*
@@ -801,20 +802,30 @@ xfs_setattr_size(
return error;
/*
+ * Wait for all direct I/O to complete.
+ */
+ inode_dio_wait(inode);
+
+ /*
* File data changes must be complete before we start the transaction to
* modify the inode. This needs to be done before joining the inode to
* the transaction because the inode cannot be unlocked once it is a
* part of the transaction.
*
- * Start with zeroing any data block beyond EOF that we may expose on
- * file extension.
+ * Start with zeroing any data beyond EOF that we may expose on file
+ * extension, or zeroing out the rest of the block on a downward
+ * truncate.
*/
if (newsize > oldsize) {
error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
- if (error)
- return error;
+ } else {
+ error = iomap_truncate_page(inode, newsize, &did_zeroing,
+ &xfs_iomap_ops);
}
+ if (error)
+ return error;
+
/*
* We are going to log the inode size change in this transaction so
* any previous writes that are beyond the on disk EOF and the new
@@ -823,17 +834,14 @@ xfs_setattr_size(
* problem. Note that this includes any block zeroing we did above;
* otherwise those blocks may not be zeroed after a crash.
*/
- if (newsize > ip->i_d.di_size &&
- (oldsize != ip->i_d.di_size || did_zeroing)) {
+ if (did_zeroing ||
+ (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) {
error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
ip->i_d.di_size, newsize);
if (error)
return error;
}
- /* Now wait for all direct I/O to complete. */
- inode_dio_wait(inode);
-
/*
* We've already locked out new page faults, so now we can safely remove
* pages from the page cache knowing they won't get refaulted until we
@@ -851,13 +859,6 @@ xfs_setattr_size(
* to hope that the caller sees ENOMEM and retries the truncate
* operation.
*/
- if (IS_DAX(inode))
- error = dax_truncate_page(inode, newsize, xfs_get_blocks_direct);
- else
- error = block_truncate_page(inode->i_mapping, newsize,
- xfs_get_blocks);
- if (error)
- return error;
truncate_setsize(inode, newsize);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
@@ -998,51 +999,6 @@ xfs_vn_update_time(
return xfs_trans_commit(tp);
}
-#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
-
-/*
- * Call fiemap helper to fill in user data.
- * Returns positive errors to xfs_getbmap.
- */
-STATIC int
-xfs_fiemap_format(
- void **arg,
- struct getbmapx *bmv,
- int *full)
-{
- int error;
- struct fiemap_extent_info *fieinfo = *arg;
- u32 fiemap_flags = 0;
- u64 logical, physical, length;
-
- /* Do nothing for a hole */
- if (bmv->bmv_block == -1LL)
- return 0;
-
- logical = BBTOB(bmv->bmv_offset);
- physical = BBTOB(bmv->bmv_block);
- length = BBTOB(bmv->bmv_length);
-
- if (bmv->bmv_oflags & BMV_OF_PREALLOC)
- fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
- else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
- fiemap_flags |= (FIEMAP_EXTENT_DELALLOC |
- FIEMAP_EXTENT_UNKNOWN);
- physical = 0; /* no block yet */
- }
- if (bmv->bmv_oflags & BMV_OF_LAST)
- fiemap_flags |= FIEMAP_EXTENT_LAST;
-
- error = fiemap_fill_next_extent(fieinfo, logical, physical,
- length, fiemap_flags);
- if (error > 0) {
- error = 0;
- *full = 1; /* user array now full */
- }
-
- return error;
-}
-
STATIC int
xfs_vn_fiemap(
struct inode *inode,
@@ -1050,38 +1006,13 @@ xfs_vn_fiemap(
u64 start,
u64 length)
{
- xfs_inode_t *ip = XFS_I(inode);
- struct getbmapx bm;
int error;
- error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
- if (error)
- return error;
-
- /* Set up bmap header for xfs internal routine */
- bm.bmv_offset = BTOBBT(start);
- /* Special case for whole file */
- if (length == FIEMAP_MAX_OFFSET)
- bm.bmv_length = -1LL;
- else
- bm.bmv_length = BTOBB(start + length) - bm.bmv_offset;
-
- /* We add one because in getbmap world count includes the header */
- bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
- fieinfo->fi_extents_max + 1;
- bm.bmv_count = min_t(__s32, bm.bmv_count,
- (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
- bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
- if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
- bm.bmv_iflags |= BMV_IF_ATTRFORK;
- if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
- bm.bmv_iflags |= BMV_IF_DELALLOC;
-
- error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
- if (error)
- return error;
+ xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
+ error = iomap_fiemap(inode, fieinfo, start, length, &xfs_iomap_ops);
+ xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED);
- return 0;
+ return error;
}
STATIC int
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index db3c7df52e30..0f14b2e4bf6c 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -80,32 +80,6 @@ xfs_fs_get_uuid(
return 0;
}
-static void
-xfs_bmbt_to_iomap(
- struct xfs_inode *ip,
- struct iomap *iomap,
- struct xfs_bmbt_irec *imap)
-{
- struct xfs_mount *mp = ip->i_mount;
-
- if (imap->br_startblock == HOLESTARTBLOCK) {
- iomap->blkno = IOMAP_NULL_BLOCK;
- iomap->type = IOMAP_HOLE;
- } else if (imap->br_startblock == DELAYSTARTBLOCK) {
- iomap->blkno = IOMAP_NULL_BLOCK;
- iomap->type = IOMAP_DELALLOC;
- } else {
- iomap->blkno =
- XFS_FSB_TO_DADDR(ip->i_mount, imap->br_startblock);
- if (imap->br_state == XFS_EXT_UNWRITTEN)
- iomap->type = IOMAP_UNWRITTEN;
- else
- iomap->type = IOMAP_MAPPED;
- }
- iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
- iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
-}
-
/*
* Get a layout for the pNFS client.
*/
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ea94ee0fe5ea..bb24ce7b0280 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1295,6 +1295,9 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
+DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
+DEFINE_IOMAP_EVENT(xfs_iomap_found);
+DEFINE_IOMAP_EVENT(xfs_iomap_not_found);
DECLARE_EVENT_CLASS(xfs_simple_io_class,
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),