vfs: pull btrfs clone API to vfs layer

The btrfs clone ioctls are now adopted by other file systems, with NFS and CIFS already having support for them, and XFS being under active development. To avoid growth of various slightly incompatible implementations, add one to the VFS. Note that clones are different from file copies in several ways: - they are atomic vs other writers - they support whole file clones - they support 64-bit legth clones - they do not allow partial success (aka short writes) - clones are expected to be a fast metadata operation Because of that it would be rather cumbersome to try to piggyback them on top of the recent clone_file_range infrastructure. The converse isn't true and the clone_file_range system call could try clone file range as a first attempt to copy, something that further patches will enable. Based on earlier work from Peng Tao. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
author: Christoph Hellwig <hch@lst.de> 2015-12-03 12:59:50 +0100
committer: Al Viro <viro@zeniv.linux.org.uk> 2015-12-07 23:11:33 -0500
commit: 04b38d601239b4d9be641b412cf4b7456a041c67 (patch)
tree: 196b5fa72848de2a98e09af86099d99da70f2833 /fs/read_write.c
parent: acc15575e78e534c12549d8057a692f490a50f61 (diff)
download: linux-stable-04b38d601239b4d9be641b412cf4b7456a041c67.tar.gz
linux-stable-04b38d601239b4d9be641b412cf4b7456a041c67.tar.bz2
linux-stable-04b38d601239b4d9be641b412cf4b7456a041c67.zip
1 files changed, 72 insertions, 0 deletions
diff --git a/fs/read_write.c b/fs/read_write.c
index 6cfad4761fd8..c75d02cb13ec 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1451,3 +1451,75 @@ out1:
 out2:
 	return ret;
 }
+
+static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
+{
+	struct inode *inode = file_inode(file);
+
+	if (unlikely(pos < 0))
+		return -EINVAL;
+
+	 if (unlikely((loff_t) (pos + len) < 0))
+		return -EINVAL;
+
+	if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
+		loff_t end = len ? pos + len - 1 : OFFSET_MAX;
+		int retval;
+
+		retval = locks_mandatory_area(inode, file, pos, end,
+				write ? F_WRLCK : F_RDLCK);
+		if (retval < 0)
+			return retval;
+	}
+
+	return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
+}
+
+int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+		struct file *file_out, loff_t pos_out, u64 len)
+{
+	struct inode *inode_in = file_inode(file_in);
+	struct inode *inode_out = file_inode(file_out);
+	int ret;
+
+	if (inode_in->i_sb != inode_out->i_sb ||
+	    file_in->f_path.mnt != file_out->f_path.mnt)
+		return -EXDEV;
+
+	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+		return -EISDIR;
+	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+		return -EOPNOTSUPP;
+
+	if (!(file_in->f_mode & FMODE_READ) ||
+	    !(file_out->f_mode & FMODE_WRITE) ||
+	    (file_out->f_flags & O_APPEND) ||
+	    !file_in->f_op->clone_file_range)
+		return -EBADF;
+
+	ret = clone_verify_area(file_in, pos_in, len, false);
+	if (ret)
+		return ret;
+
+	ret = clone_verify_area(file_out, pos_out, len, true);
+	if (ret)
+		return ret;
+
+	if (pos_in + len > i_size_read(inode_in))
+		return -EINVAL;
+
+	ret = mnt_want_write_file(file_out);
+	if (ret)
+		return ret;
+
+	ret = file_in->f_op->clone_file_range(file_in, pos_in,
+			file_out, pos_out, len);
+	if (!ret) {
+		fsnotify_access(file_in);
+		fsnotify_modify(file_out);
+	}
+
+	mnt_drop_write_file(file_out);
+	return ret;
+}
+EXPORT_SYMBOL(vfs_clone_file_range);
author	Christoph Hellwig <hch@lst.de>	2015-12-03 12:59:50 +0100
committer	Al Viro <viro@zeniv.linux.org.uk>	2015-12-07 23:11:33 -0500
commit	04b38d601239b4d9be641b412cf4b7456a041c67 (patch)
tree	196b5fa72848de2a98e09af86099d99da70f2833 /fs/read_write.c
parent	acc15575e78e534c12549d8057a692f490a50f61 (diff)
download	linux-stable-04b38d601239b4d9be641b412cf4b7456a041c67.tar.gz linux-stable-04b38d601239b4d9be641b412cf4b7456a041c67.tar.bz2 linux-stable-04b38d601239b4d9be641b412cf4b7456a041c67.zip