diff options
Diffstat (limited to 'fs')
218 files changed, 3940 insertions, 2841 deletions
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index b83ebfbf3fdc..5a0db6dec8d1 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -68,14 +68,10 @@ int v9fs_file_open(struct inode *inode, struct file *file); void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat); int v9fs_uflags2omode(int uflags, int extended); -ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); -ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64); void v9fs_blank_wstat(struct p9_wstat *wstat); int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *); int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end, int datasync); -ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *, - const char __user *, size_t, loff_t *, int); int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode); int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode); static inline void v9fs_invalidate_inode_attr(struct inode *inode) diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index eb14e055ea83..2e38f9a5b472 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -33,7 +33,7 @@ #include <linux/pagemap.h> #include <linux/idr.h> #include <linux/sched.h> -#include <linux/aio.h> +#include <linux/uio.h> #include <net/9p/9p.h> #include <net/9p/client.h> @@ -51,12 +51,11 @@ */ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page) { - int retval; - loff_t offset; - char *buffer; - struct inode *inode; + struct inode *inode = page->mapping->host; + struct bio_vec bvec = {.bv_page = page, .bv_len = PAGE_SIZE}; + struct iov_iter to; + int retval, err; - inode = page->mapping->host; p9_debug(P9_DEBUG_VFS, "\n"); BUG_ON(!PageLocked(page)); @@ -65,16 +64,16 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page) if (retval == 0) return retval; - buffer = kmap(page); - offset = page_offset(page); + iov_iter_bvec(&to, ITER_BVEC | READ, &bvec, 1, PAGE_SIZE); - retval = v9fs_fid_readn(fid, buffer, NULL, PAGE_CACHE_SIZE, offset); - if (retval < 0) { + retval = p9_client_read(fid, page_offset(page), &to, &err); + if (err) { v9fs_uncache_page(inode, page); + retval = err; goto done; } - memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval); + zero_user(page, retval, PAGE_SIZE - retval); flush_dcache_page(page); SetPageUptodate(page); @@ -82,7 +81,6 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page) retval = 0; done: - kunmap(page); unlock_page(page); return retval; } @@ -161,41 +159,32 @@ static void v9fs_invalidate_page(struct page *page, unsigned int offset, static int v9fs_vfs_writepage_locked(struct page *page) { - char *buffer; - int retval, len; - loff_t offset, size; - mm_segment_t old_fs; - struct v9fs_inode *v9inode; struct inode *inode = page->mapping->host; + struct v9fs_inode *v9inode = V9FS_I(inode); + loff_t size = i_size_read(inode); + struct iov_iter from; + struct bio_vec bvec; + int err, len; - v9inode = V9FS_I(inode); - size = i_size_read(inode); if (page->index == size >> PAGE_CACHE_SHIFT) len = size & ~PAGE_CACHE_MASK; else len = PAGE_CACHE_SIZE; - set_page_writeback(page); - - buffer = kmap(page); - offset = page_offset(page); + bvec.bv_page = page; + bvec.bv_offset = 0; + bvec.bv_len = len; + iov_iter_bvec(&from, ITER_BVEC | WRITE, &bvec, 1, len); - old_fs = get_fs(); - set_fs(get_ds()); /* We should have writeback_fid always set */ BUG_ON(!v9inode->writeback_fid); - retval = v9fs_file_write_internal(inode, - v9inode->writeback_fid, - (__force const char __user *)buffer, - len, &offset, 0); - if (retval > 0) - retval = 0; + set_page_writeback(page); + + p9_client_write(v9inode->writeback_fid, page_offset(page), &from, &err); - set_fs(old_fs); - kunmap(page); end_page_writeback(page); - return retval; + return err; } static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc) @@ -261,16 +250,21 @@ static int v9fs_launder_page(struct page *page) static ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) { - /* - * FIXME - * Now that we do caching with cache mode enabled, We need - * to support direct IO - */ - p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%pD) off/no(%lld/%lu) EINVAL\n", - iocb->ki_filp, - (long long)pos, iter->nr_segs); - - return -EINVAL; + struct file *file = iocb->ki_filp; + ssize_t n; + int err = 0; + if (rw & WRITE) { + n = p9_client_write(file->private_data, pos, iter, &err); + if (n) { + struct inode *inode = file_inode(file); + loff_t i_size = i_size_read(inode); + if (pos + n > i_size) + inode_add_bytes(inode, pos + n - i_size); + } + } else { + n = p9_client_read(file->private_data, pos, iter, &err); + } + return n ? n : err; } static int v9fs_write_begin(struct file *filp, struct address_space *mapping, diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 4f1151088ebe..76c3b1ab6361 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -33,6 +33,7 @@ #include <linux/inet.h> #include <linux/idr.h> #include <linux/slab.h> +#include <linux/uio.h> #include <net/9p/9p.h> #include <net/9p/client.h> @@ -115,6 +116,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) int buflen; int reclen = 0; struct p9_rdir *rdir; + struct kvec kvec; p9_debug(P9_DEBUG_VFS, "name %pD\n", file); fid = file->private_data; @@ -124,16 +126,21 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) rdir = v9fs_alloc_rdir_buf(file, buflen); if (!rdir) return -ENOMEM; + kvec.iov_base = rdir->buf; + kvec.iov_len = buflen; while (1) { if (rdir->tail == rdir->head) { - err = v9fs_file_readn(file, rdir->buf, NULL, - buflen, ctx->pos); - if (err <= 0) + struct iov_iter to; + int n; + iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buflen); + n = p9_client_read(file->private_data, ctx->pos, &to, + &err); + if (err) return err; rdir->head = 0; - rdir->tail = err; + rdir->tail = n; } while (rdir->head < rdir->tail) { p9stat_init(&st); diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index b40133796b87..d7fcb775311e 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -36,6 +36,8 @@ #include <linux/utsname.h> #include <asm/uaccess.h> #include <linux/idr.h> +#include <linux/uio.h> +#include <linux/slab.h> #include <net/9p/9p.h> #include <net/9p/client.h> @@ -285,6 +287,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) fl->fl_end = glock.start + glock.length - 1; fl->fl_pid = glock.proc_id; } + kfree(glock.client_id); return res; } @@ -364,63 +367,6 @@ out_err: } /** - * v9fs_fid_readn - read from a fid - * @fid: fid to read - * @data: data buffer to read data into - * @udata: user data buffer to read data into - * @count: size of buffer - * @offset: offset at which to read data - * - */ -ssize_t -v9fs_fid_readn(struct p9_fid *fid, char *data, char __user *udata, u32 count, - u64 offset) -{ - int n, total, size; - - p9_debug(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", - fid->fid, (long long unsigned)offset, count); - n = 0; - total = 0; - size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; - do { - n = p9_client_read(fid, data, udata, offset, count); - if (n <= 0) - break; - - if (data) - data += n; - if (udata) - udata += n; - - offset += n; - count -= n; - total += n; - } while (count > 0 && n == size); - - if (n < 0) - total = n; - - return total; -} - -/** - * v9fs_file_readn - read from a file - * @filp: file pointer to read - * @data: data buffer to read data into - * @udata: user data buffer to read data into - * @count: size of buffer - * @offset: offset at which to read data - * - */ -ssize_t -v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count, - u64 offset) -{ - return v9fs_fid_readn(filp->private_data, data, udata, count, offset); -} - -/** * v9fs_file_read - read from a file * @filp: file pointer to read * @udata: user data buffer to read data into @@ -430,69 +376,22 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count, */ static ssize_t -v9fs_file_read(struct file *filp, char __user *udata, size_t count, - loff_t * offset) +v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { - int ret; - struct p9_fid *fid; - size_t size; + struct p9_fid *fid = iocb->ki_filp->private_data; + int ret, err; - p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset); - fid = filp->private_data; + p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n", + iov_iter_count(to), iocb->ki_pos); - size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; - if (count > size) - ret = v9fs_file_readn(filp, NULL, udata, count, *offset); - else - ret = p9_client_read(fid, NULL, udata, *offset, count); - - if (ret > 0) - *offset += ret; + ret = p9_client_read(fid, iocb->ki_pos, to, &err); + if (!ret) + return err; + iocb->ki_pos += ret; return ret; } -ssize_t -v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid, - const char __user *data, size_t count, - loff_t *offset, int invalidate) -{ - int n; - loff_t i_size; - size_t total = 0; - loff_t origin = *offset; - unsigned long pg_start, pg_end; - - p9_debug(P9_DEBUG_VFS, "data %p count %d offset %x\n", - data, (int)count, (int)*offset); - - do { - n = p9_client_write(fid, NULL, data+total, origin+total, count); - if (n <= 0) - break; - count -= n; - total += n; - } while (count > 0); - - if (invalidate && (total > 0)) { - pg_start = origin >> PAGE_CACHE_SHIFT; - pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT; - if (inode->i_mapping && inode->i_mapping->nrpages) - invalidate_inode_pages2_range(inode->i_mapping, - pg_start, pg_end); - *offset += total; - i_size = i_size_read(inode); - if (*offset > i_size) { - inode_add_bytes(inode, *offset - i_size); - i_size_write(inode, *offset); - } - } - if (n < 0) - return n; - - return total; -} - /** * v9fs_file_write - write to a file * @filp: file pointer to write @@ -502,35 +401,45 @@ v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid, * */ static ssize_t -v9fs_file_write(struct file *filp, const char __user * data, - size_t count, loff_t *offset) +v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { + struct file *file = iocb->ki_filp; ssize_t retval = 0; - loff_t origin = *offset; + loff_t origin = iocb->ki_pos; + size_t count = iov_iter_count(from); + int err = 0; - - retval = generic_write_checks(filp, &origin, &count, 0); + retval = generic_write_checks(file, &origin, &count, 0); if (retval) - goto out; + return retval; + + iov_iter_truncate(from, count); - retval = -EINVAL; - if ((ssize_t) count < 0) - goto out; - retval = 0; if (!count) - goto out; + return 0; - retval = v9fs_file_write_internal(file_inode(filp), - filp->private_data, - data, count, &origin, 1); - /* update offset on successful write */ - if (retval > 0) - *offset = origin; -out: - return retval; + retval = p9_client_write(file->private_data, origin, from, &err); + if (retval > 0) { + struct inode *inode = file_inode(file); + loff_t i_size; + unsigned long pg_start, pg_end; + pg_start = origin >> PAGE_CACHE_SHIFT; + pg_end = (origin + retval - 1) >> PAGE_CACHE_SHIFT; + if (inode->i_mapping && inode->i_mapping->nrpages) + invalidate_inode_pages2_range(inode->i_mapping, + pg_start, pg_end); + origin += retval; + i_size = i_size_read(inode); + iocb->ki_pos = origin; + if (origin > i_size) { + inode_add_bytes(inode, origin - i_size); + i_size_write(inode, origin); + } + return retval; + } + return err; } - static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { @@ -657,44 +566,6 @@ out_unlock: return VM_FAULT_NOPAGE; } -static ssize_t -v9fs_direct_read(struct file *filp, char __user *udata, size_t count, - loff_t *offsetp) -{ - loff_t size, offset; - struct inode *inode; - struct address_space *mapping; - - offset = *offsetp; - mapping = filp->f_mapping; - inode = mapping->host; - if (!count) - return 0; - size = i_size_read(inode); - if (offset < size) - filemap_write_and_wait_range(mapping, offset, - offset + count - 1); - - return v9fs_file_read(filp, udata, count, offsetp); -} - -/** - * v9fs_cached_file_read - read from a file - * @filp: file pointer to read - * @data: user data buffer to read data into - * @count: size of buffer - * @offset: offset at which to read data - * - */ -static ssize_t -v9fs_cached_file_read(struct file *filp, char __user *data, size_t count, - loff_t *offset) -{ - if (filp->f_flags & O_DIRECT) - return v9fs_direct_read(filp, data, count, offset); - return new_sync_read(filp, data, count, offset); -} - /** * v9fs_mmap_file_read - read from a file * @filp: file pointer to read @@ -704,84 +575,12 @@ v9fs_cached_file_read(struct file *filp, char __user *data, size_t count, * */ static ssize_t -v9fs_mmap_file_read(struct file *filp, char __user *data, size_t count, - loff_t *offset) +v9fs_mmap_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { /* TODO: Check if there are dirty pages */ - return v9fs_file_read(filp, data, count, offset); + return v9fs_file_read_iter(iocb, to); } -static ssize_t -v9fs_direct_write(struct file *filp, const char __user * data, - size_t count, loff_t *offsetp) -{ - loff_t offset; - ssize_t retval; - struct inode *inode; - struct address_space *mapping; - - offset = *offsetp; - mapping = filp->f_mapping; - inode = mapping->host; - if (!count) - return 0; - - mutex_lock(&inode->i_mutex); - retval = filemap_write_and_wait_range(mapping, offset, - offset + count - 1); - if (retval) - goto err_out; - /* - * After a write we want buffered reads to be sure to go to disk to get - * the new data. We invalidate clean cached page from the region we're - * about to write. We do this *before* the write so that if we fail - * here we fall back to buffered write - */ - if (mapping->nrpages) { - pgoff_t pg_start = offset >> PAGE_CACHE_SHIFT; - pgoff_t pg_end = (offset + count - 1) >> PAGE_CACHE_SHIFT; - - retval = invalidate_inode_pages2_range(mapping, - pg_start, pg_end); - /* - * If a page can not be invalidated, fall back - * to buffered write. - */ - if (retval) { - if (retval == -EBUSY) - goto buff_write; - goto err_out; - } - } - retval = v9fs_file_write(filp, data, count, offsetp); -err_out: - mutex_unlock(&inode->i_mutex); - return retval; - -buff_write: - mutex_unlock(&inode->i_mutex); - return new_sync_write(filp, data, count, offsetp); -} - -/** - * v9fs_cached_file_write - write to a file - * @filp: file pointer to write - * @data: data buffer to write data from - * @count: size of buffer - * @offset: offset at which to write data - * - */ -static ssize_t -v9fs_cached_file_write(struct file *filp, const char __user * data, - size_t count, loff_t *offset) -{ - - if (filp->f_flags & O_DIRECT) - return v9fs_direct_write(filp, data, count, offset); - return new_sync_write(filp, data, count, offset); -} - - /** * v9fs_mmap_file_write - write to a file * @filp: file pointer to write @@ -791,14 +590,13 @@ v9fs_cached_file_write(struct file *filp, const char __user * data, * */ static ssize_t -v9fs_mmap_file_write(struct file *filp, const char __user *data, - size_t count, loff_t *offset) +v9fs_mmap_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { /* * TODO: invalidate mmaps on filp's inode between * offset and offset+count */ - return v9fs_file_write(filp, data, count, offset); + return v9fs_file_write_iter(iocb, from); } static void v9fs_mmap_vm_close(struct vm_area_struct *vma) @@ -843,8 +641,6 @@ static const struct vm_operations_struct v9fs_mmap_file_vm_ops = { const struct file_operations v9fs_cached_file_operations = { .llseek = generic_file_llseek, - .read = v9fs_cached_file_read, - .write = v9fs_cached_file_write, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .open = v9fs_file_open, @@ -856,8 +652,6 @@ const struct file_operations v9fs_cached_file_operations = { const struct file_operations v9fs_cached_file_operations_dotl = { .llseek = generic_file_llseek, - .read = v9fs_cached_file_read, - .write = v9fs_cached_file_write, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .open = v9fs_file_open, @@ -870,8 +664,8 @@ const struct file_operations v9fs_cached_file_operations_dotl = { const struct file_operations v9fs_file_operations = { .llseek = generic_file_llseek, - .read = v9fs_file_read, - .write = v9fs_file_write, + .read_iter = v9fs_file_read_iter, + .write_iter = v9fs_file_write_iter, .open = v9fs_file_open, .release = v9fs_dir_release, .lock = v9fs_file_lock, @@ -881,8 +675,8 @@ const struct file_operations v9fs_file_operations = { const struct file_operations v9fs_file_operations_dotl = { .llseek = generic_file_llseek, - .read = v9fs_file_read, - .write = v9fs_file_write, + .read_iter = v9fs_file_read_iter, + .write_iter = v9fs_file_write_iter, .open = v9fs_file_open, .release = v9fs_dir_release, .lock = v9fs_file_lock_dotl, @@ -893,8 +687,8 @@ const struct file_operations v9fs_file_operations_dotl = { const struct file_operations v9fs_mmap_file_operations = { .llseek = generic_file_llseek, - .read = v9fs_mmap_file_read, - .write = v9fs_mmap_file_write, + .read_iter = v9fs_mmap_file_read_iter, + .write_iter = v9fs_mmap_file_write_iter, .open = v9fs_file_open, .release = v9fs_dir_release, .lock = v9fs_file_lock, @@ -904,8 +698,8 @@ const struct file_operations v9fs_mmap_file_operations = { const struct file_operations v9fs_mmap_file_operations_dotl = { .llseek = generic_file_llseek, - .read = v9fs_mmap_file_read, - .write = v9fs_mmap_file_write, + .read_iter = v9fs_mmap_file_read_iter, + .write_iter = v9fs_mmap_file_write_iter, .open = v9fs_file_open, .release = v9fs_dir_release, .lock = v9fs_file_lock_dotl, diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index f95e01e058e4..0cf44b6cccd6 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/fs.h> #include <linux/sched.h> +#include <linux/uio.h> #include <net/9p/9p.h> #include <net/9p/client.h> @@ -25,50 +26,34 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name, void *buffer, size_t buffer_size) { ssize_t retval; - int msize, read_count; - u64 offset = 0, attr_size; + u64 attr_size; struct p9_fid *attr_fid; + struct kvec kvec = {.iov_base = buffer, .iov_len = buffer_size}; + struct iov_iter to; + int err; + + iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buffer_size); attr_fid = p9_client_xattrwalk(fid, name, &attr_size); if (IS_ERR(attr_fid)) { retval = PTR_ERR(attr_fid); p9_debug(P9_DEBUG_VFS, "p9_client_attrwalk failed %zd\n", retval); - attr_fid = NULL; - goto error; - } - if (!buffer_size) { - /* request to get the attr_size */ - retval = attr_size; - goto error; + return retval; } if (attr_size > buffer_size) { - retval = -ERANGE; - goto error; - } - msize = attr_fid->clnt->msize; - while (attr_size) { - if (attr_size > (msize - P9_IOHDRSZ)) - read_count = msize - P9_IOHDRSZ; + if (!buffer_size) /* request to get the attr_size */ + retval = attr_size; else - read_count = attr_size; - read_count = p9_client_read(attr_fid, ((char *)buffer)+offset, - NULL, offset, read_count); - if (read_count < 0) { - /* error in xattr read */ - retval = read_count; - goto error; - } - offset += read_count; - attr_size -= read_count; + retval = -ERANGE; + } else { + iov_iter_truncate(&to, attr_size); + retval = p9_client_read(attr_fid, 0, &to, &err); + if (err) + retval = err; } - /* Total read xattr bytes */ - retval = offset; -error: - if (attr_fid) - p9_client_clunk(attr_fid); + p9_client_clunk(attr_fid); return retval; - } @@ -120,8 +105,11 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name, int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, const void *value, size_t value_len, int flags) { - u64 offset = 0; - int retval, msize, write_count; + struct kvec kvec = {.iov_base = (void *)value, .iov_len = value_len}; + struct iov_iter from; + int retval; + + iov_iter_kvec(&from, WRITE | ITER_KVEC, &kvec, 1, value_len); p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n", name, value_len, flags); @@ -135,29 +123,11 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, * On success fid points to xattr */ retval = p9_client_xattrcreate(fid, name, value_len, flags); - if (retval < 0) { + if (retval < 0) p9_debug(P9_DEBUG_VFS, "p9_client_xattrcreate failed %d\n", retval); - goto err; - } - msize = fid->clnt->msize; - while (value_len) { - if (value_len > (msize - P9_IOHDRSZ)) - write_count = msize - P9_IOHDRSZ; - else - write_count = value_len; - write_count = p9_client_write(fid, ((char *)value)+offset, - NULL, offset, write_count); - if (write_count < 0) { - /* error in xattr write */ - retval = write_count; - goto err; - } - offset += write_count; - value_len -= write_count; - } - retval = 0; -err: + else + p9_client_write(fid, 0, &from, &retval); p9_client_clunk(fid); return retval; } diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 270c48148f79..2d0cbbd14cfc 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -27,9 +27,6 @@ config COMPAT_BINFMT_ELF bool depends on COMPAT && BINFMT_ELF -config ARCH_BINFMT_ELF_RANDOMIZE_PIE - bool - config ARCH_BINFMT_ELF_STATE bool diff --git a/fs/Makefile b/fs/Makefile index a88ac4838c9e..cb92fd4c3172 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -118,6 +118,7 @@ obj-$(CONFIG_HOSTFS) += hostfs/ obj-$(CONFIG_HPPFS) += hppfs/ obj-$(CONFIG_CACHEFILES) += cachefiles/ obj-$(CONFIG_DEBUG_FS) += debugfs/ +obj-$(CONFIG_TRACING) += tracefs/ obj-$(CONFIG_OCFS2_FS) += ocfs2/ obj-$(CONFIG_BTRFS_FS) += btrfs/ obj-$(CONFIG_GFS2_FS) += gfs2/ diff --git a/fs/adfs/file.c b/fs/adfs/file.c index 07c9edce5aa7..46c0d5671cd5 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c @@ -23,11 +23,9 @@ const struct file_operations adfs_file_operations = { .llseek = generic_file_llseek, - .read = new_sync_read, .read_iter = generic_file_read_iter, .mmap = generic_file_mmap, .fsync = generic_file_fsync, - .write = new_sync_write, .write_iter = generic_file_write_iter, .splice_read = generic_file_splice_read, }; diff --git a/fs/affs/file.c b/fs/affs/file.c index d2468bf95669..7c1a3d4c19c2 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -12,7 +12,7 @@ * affs regular file handling primitives */ -#include <linux/aio.h> +#include <linux/uio.h> #include "affs.h" static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext); @@ -699,8 +699,10 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, boff = tmp % bsize; if (boff) { bh = affs_bread_ino(inode, bidx, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); + if (IS_ERR(bh)) { + written = PTR_ERR(bh); + goto err_first_bh; + } tmp = min(bsize - boff, to - from); BUG_ON(boff + tmp > bsize || tmp > bsize); memcpy(AFFS_DATA(bh) + boff, data + from, tmp); @@ -712,14 +714,16 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, bidx++; } else if (bidx) { bh = affs_bread_ino(inode, bidx - 1, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); + if (IS_ERR(bh)) { + written = PTR_ERR(bh); + goto err_first_bh; + } } while (from + bsize <= to) { prev_bh = bh; bh = affs_getemptyblk_ino(inode, bidx); if (IS_ERR(bh)) - goto out; + goto err_bh; memcpy(AFFS_DATA(bh), data + from, bsize); if (buffer_new(bh)) { AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); @@ -751,7 +755,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, prev_bh = bh; bh = affs_bread_ino(inode, bidx, 1); if (IS_ERR(bh)) - goto out; + goto err_bh; tmp = min(bsize, to - from); BUG_ON(tmp > bsize); memcpy(AFFS_DATA(bh), data + from, tmp); @@ -790,12 +794,13 @@ done: if (tmp > inode->i_size) inode->i_size = AFFS_I(inode)->mmu_private = tmp; +err_first_bh: unlock_page(page); page_cache_release(page); return written; -out: +err_bh: bh = prev_bh; if (!written) written = PTR_ERR(bh); @@ -964,9 +969,7 @@ int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) } const struct file_operations affs_file_operations = { .llseek = generic_file_llseek, - .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = new_sync_write, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .open = affs_file_open, diff --git a/fs/afs/file.c b/fs/afs/file.c index 932ce07948b3..999bc3caec92 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -31,8 +31,6 @@ const struct file_operations afs_file_operations = { .open = afs_open, .release = afs_release, .llseek = generic_file_llseek, - .read = new_sync_read, - .write = new_sync_write, .read_iter = generic_file_read_iter, .write_iter = afs_file_write, .mmap = generic_file_readonly_mmap, diff --git a/fs/afs/misc.c b/fs/afs/misc.c index 0dd4dafee10b..91ea1aa0d8b3 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -22,9 +22,12 @@ int afs_abort_to_error(u32 abort_code) { switch (abort_code) { + /* low errno codes inserted into abort namespace */ case 13: return -EACCES; case 27: return -EFBIG; case 30: return -EROFS; + + /* VICE "special error" codes; 101 - 111 */ case VSALVAGE: return -EIO; case VNOVNODE: return -ENOENT; case VNOVOL: return -ENOMEDIUM; @@ -36,11 +39,18 @@ int afs_abort_to_error(u32 abort_code) case VOVERQUOTA: return -EDQUOT; case VBUSY: return -EBUSY; case VMOVED: return -ENXIO; - case 0x2f6df0a: return -EWOULDBLOCK; + + /* Unified AFS error table; ET "uae" == 0x2f6df00 */ + case 0x2f6df00: return -EPERM; + case 0x2f6df01: return -ENOENT; + case 0x2f6df04: return -EIO; + case 0x2f6df0a: return -EAGAIN; + case 0x2f6df0b: return -ENOMEM; case 0x2f6df0c: return -EACCES; case 0x2f6df0f: return -EBUSY; case 0x2f6df10: return -EEXIST; case 0x2f6df11: return -EXDEV; + case 0x2f6df12: return -ENODEV; case 0x2f6df13: return -ENOTDIR; case 0x2f6df14: return -EISDIR; case 0x2f6df15: return -EINVAL; @@ -54,8 +64,12 @@ int afs_abort_to_error(u32 abort_code) case 0x2f6df23: return -ENAMETOOLONG; case 0x2f6df24: return -ENOLCK; case 0x2f6df26: return -ENOTEMPTY; + case 0x2f6df28: return -EWOULDBLOCK; + case 0x2f6df69: return -ENOTCONN; + case 0x2f6df6c: return -ETIMEDOUT; case 0x2f6df78: return -EDQUOT; + /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */ case RXKADINCONSISTENCY: return -EPROTO; case RXKADPACKETSHORT: return -EPROTO; case RXKADLEVELFAIL: return -EKEYREJECTED; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index dbc732e9a5c0..3a57a1b0fb51 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -770,15 +770,12 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, void afs_send_empty_reply(struct afs_call *call) { struct msghdr msg; - struct kvec iov[1]; _enter(""); - iov[0].iov_base = NULL; - iov[0].iov_len = 0; msg.msg_name = NULL; msg.msg_namelen = 0; - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 0, 0); /* WTF? */ + iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, NULL, 0, 0); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; diff --git a/fs/afs/write.c b/fs/afs/write.c index c13cb08964ed..0714abcd7f32 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -14,7 +14,6 @@ #include <linux/pagemap.h> #include <linux/writeback.h> #include <linux/pagevec.h> -#include <linux/aio.h> #include "internal.h" static int afs_write_back_from_locked_page(struct afs_writeback *wb, @@ -77,6 +77,11 @@ struct kioctx_cpu { unsigned reqs_available; }; +struct ctx_rq_wait { + struct completion comp; + atomic_t count; +}; + struct kioctx { struct percpu_ref users; atomic_t dead; @@ -115,7 +120,7 @@ struct kioctx { /* * signals when all in-flight requests are done */ - struct completion *requests_done; + struct ctx_rq_wait *rq_wait; struct { /* @@ -151,6 +156,38 @@ struct kioctx { unsigned id; }; +/* + * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either + * cancelled or completed (this makes a certain amount of sense because + * successful cancellation - io_cancel() - does deliver the completion to + * userspace). + * + * And since most things don't implement kiocb cancellation and we'd really like + * kiocb completion to be lockless when possible, we use ki_cancel to + * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED + * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel(). + */ +#define KIOCB_CANCELLED ((void *) (~0ULL)) + +struct aio_kiocb { + struct kiocb common; + + struct kioctx *ki_ctx; + kiocb_cancel_fn *ki_cancel; + + struct iocb __user *ki_user_iocb; /* user's aiocb */ + __u64 ki_user_data; /* user's data for completion */ + + struct list_head ki_list; /* the aio core uses this + * for cancellation */ + + /* + * If the aio_resfd field of the userspace iocb is not zero, + * this is the underlying eventfd context to deliver events to. + */ + struct eventfd_ctx *ki_eventfd; +}; + /*------ sysctl variables----*/ static DEFINE_SPINLOCK(aio_nr_lock); unsigned long aio_nr; /* current system wide number of aio requests */ @@ -220,7 +257,7 @@ static int __init aio_setup(void) if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); - kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); + kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page)); @@ -278,11 +315,11 @@ static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static void aio_ring_remap(struct file *file, struct vm_area_struct *vma) +static int aio_ring_remap(struct file *file, struct vm_area_struct *vma) { struct mm_struct *mm = vma->vm_mm; struct kioctx_table *table; - int i; + int i, res = -EINVAL; spin_lock(&mm->ioctx_lock); rcu_read_lock(); @@ -292,13 +329,17 @@ static void aio_ring_remap(struct file *file, struct vm_area_struct *vma) ctx = table->table[i]; if (ctx && ctx->aio_ring_file == file) { - ctx->user_id = ctx->mmap_base = vma->vm_start; + if (!atomic_read(&ctx->dead)) { + ctx->user_id = ctx->mmap_base = vma->vm_start; + res = 0; + } break; } } rcu_read_unlock(); spin_unlock(&mm->ioctx_lock); + return res; } static const struct file_operations aio_ring_fops = { @@ -480,8 +521,9 @@ static int aio_setup_ring(struct kioctx *ctx) #define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) #define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) -void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) +void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) { + struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common); struct kioctx *ctx = req->ki_ctx; unsigned long flags; @@ -496,7 +538,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) } EXPORT_SYMBOL(kiocb_set_cancel_fn); -static int kiocb_cancel(struct kiocb *kiocb) +static int kiocb_cancel(struct aio_kiocb *kiocb) { kiocb_cancel_fn *old, *cancel; @@ -514,7 +556,7 @@ static int kiocb_cancel(struct kiocb *kiocb) cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); } while (cancel != old); - return cancel(kiocb); + return cancel(&kiocb->common); } static void free_ioctx(struct work_struct *work) @@ -535,8 +577,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref) struct kioctx *ctx = container_of(ref, struct kioctx, reqs); /* At this point we know that there are no any in-flight requests */ - if (ctx->requests_done) - complete(ctx->requests_done); + if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) + complete(&ctx->rq_wait->comp); INIT_WORK(&ctx->free_work, free_ioctx); schedule_work(&ctx->free_work); @@ -550,13 +592,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref) static void free_ioctx_users(struct percpu_ref *ref) { struct kioctx *ctx = container_of(ref, struct kioctx, users); - struct kiocb *req; + struct aio_kiocb *req; spin_lock_irq(&ctx->ctx_lock); while (!list_empty(&ctx->active_reqs)) { req = list_first_entry(&ctx->active_reqs, - struct kiocb, ki_list); + struct aio_kiocb, ki_list); list_del_init(&req->ki_list); kiocb_cancel(req); @@ -655,8 +697,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) nr_events *= 2; /* Prevent overflows */ - if ((nr_events > (0x10000000U / sizeof(struct io_event))) || - (nr_events > (0x10000000U / sizeof(struct kiocb)))) { + if (nr_events > (0x10000000U / sizeof(struct io_event))) { pr_debug("ENOMEM: nr_events too high\n"); return ERR_PTR(-EINVAL); } @@ -727,6 +768,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) err_cleanup: aio_nr_sub(ctx->max_reqs); err_ctx: + atomic_set(&ctx->dead, 1); + if (ctx->mmap_size) + vm_munmap(ctx->mmap_base, ctx->mmap_size); aio_free_ring(ctx); err: mutex_unlock(&ctx->ring_lock); @@ -744,15 +788,16 @@ err: * the rapid destruction of the kioctx. */ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, - struct completion *requests_done) + struct ctx_rq_wait *wait) { struct kioctx_table *table; - if (atomic_xchg(&ctx->dead, 1)) + spin_lock(&mm->ioctx_lock); + if (atomic_xchg(&ctx->dead, 1)) { + spin_unlock(&mm->ioctx_lock); return -EINVAL; + } - - spin_lock(&mm->ioctx_lock); table = rcu_dereference_raw(mm->ioctx_table); WARN_ON(ctx != table->table[ctx->id]); table->table[ctx->id] = NULL; @@ -773,27 +818,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, if (ctx->mmap_size) vm_munmap(ctx->mmap_base, ctx->mmap_size); - ctx->requests_done = requests_done; + ctx->rq_wait = wait; percpu_ref_kill(&ctx->users); return 0; } -/* wait_on_sync_kiocb: - * Waits on the given sync kiocb to complete. - */ -ssize_t wait_on_sync_kiocb(struct kiocb *req) -{ - while (!req->ki_ctx) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (req->ki_ctx) - break; - io_schedule(); - } - __set_current_state(TASK_RUNNING); - return req->ki_user_data; -} -EXPORT_SYMBOL(wait_on_sync_kiocb); - /* * exit_aio: called when the last user of mm goes away. At this point, there is * no way for any new requests to be submited or any of the io_* syscalls to be @@ -805,18 +834,24 @@ EXPORT_SYMBOL(wait_on_sync_kiocb); void exit_aio(struct mm_struct *mm) { struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table); - int i; + struct ctx_rq_wait wait; + int i, skipped; if (!table) return; + atomic_set(&wait.count, table->nr); + init_completion(&wait.comp); + + skipped = 0; for (i = 0; i < table->nr; ++i) { struct kioctx *ctx = table->table[i]; - struct completion requests_done = - COMPLETION_INITIALIZER_ONSTACK(requests_done); - if (!ctx) + if (!ctx) { + skipped++; continue; + } + /* * We don't need to bother with munmap() here - exit_mmap(mm) * is coming and it'll unmap everything. And we simply can't, @@ -825,10 +860,12 @@ void exit_aio(struct mm_struct *mm) * that it needs to unmap the area, just set it to 0. */ ctx->mmap_size = 0; - kill_ioctx(mm, ctx, &requests_done); + kill_ioctx(mm, ctx, &wait); + } + if (!atomic_sub_and_test(skipped, &wait.count)) { /* Wait until all IO for the context are done. */ - wait_for_completion(&requests_done); + wait_for_completion(&wait.comp); } RCU_INIT_POINTER(mm->ioctx_table, NULL); @@ -948,9 +985,9 @@ static void user_refill_reqs_available(struct kioctx *ctx) * Allocate a slot for an aio request. * Returns NULL if no requests are free. */ -static inline struct kiocb *aio_get_req(struct kioctx *ctx) +static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) { - struct kiocb *req; + struct aio_kiocb *req; if (!get_reqs_available(ctx)) { user_refill_reqs_available(ctx); @@ -971,10 +1008,10 @@ out_put: return NULL; } -static void kiocb_free(struct kiocb *req) +static void kiocb_free(struct aio_kiocb *req) { - if (req->ki_filp) - fput(req->ki_filp); + if (req->common.ki_filp) + fput(req->common.ki_filp); if (req->ki_eventfd != NULL) eventfd_ctx_put(req->ki_eventfd); kmem_cache_free(kiocb_cachep, req); @@ -1010,8 +1047,9 @@ out: /* aio_complete * Called when the io request on the given iocb is complete. */ -void aio_complete(struct kiocb *iocb, long res, long res2) +static void aio_complete(struct kiocb *kiocb, long res, long res2) { + struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common); struct kioctx *ctx = iocb->ki_ctx; struct aio_ring *ring; struct io_event *ev_page, *event; @@ -1025,13 +1063,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) * ref, no other paths have a way to get another ref * - the sync task helpfully left a reference to itself in the iocb */ - if (is_sync_kiocb(iocb)) { - iocb->ki_user_data = res; - smp_wmb(); - iocb->ki_ctx = ERR_PTR(-EXDEV); - wake_up_process(iocb->ki_obj.tsk); - return; - } + BUG_ON(is_sync_kiocb(kiocb)); if (iocb->ki_list.next) { unsigned long flags; @@ -1057,7 +1089,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); event = ev_page + pos % AIO_EVENTS_PER_PAGE; - event->obj = (u64)(unsigned long)iocb->ki_obj.user; + event->obj = (u64)(unsigned long)iocb->ki_user_iocb; event->data = iocb->ki_user_data; event->res = res; event->res2 = res2; @@ -1066,7 +1098,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", - ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data, + ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data, res, res2); /* after flagging the request as done, we @@ -1113,7 +1145,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) percpu_ref_put(&ctx->reqs); } -EXPORT_SYMBOL(aio_complete); /* aio_read_events_ring * Pull an event off of the ioctx's event ring. Returns the number of @@ -1313,15 +1344,17 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) { struct kioctx *ioctx = lookup_ioctx(ctx); if (likely(NULL != ioctx)) { - struct completion requests_done = - COMPLETION_INITIALIZER_ONSTACK(requests_done); + struct ctx_rq_wait wait; int ret; + init_completion(&wait.comp); + atomic_set(&wait.count, 1); + /* Pass requests_done to kill_ioctx() where it can be set * in a thread-safe way. If we try to set it here then we have * a race condition if two io_destroy() called simultaneously. */ - ret = kill_ioctx(current->mm, ioctx, &requests_done); + ret = kill_ioctx(current->mm, ioctx, &wait); percpu_ref_put(&ioctx->users); /* Wait until all IO for the context are done. Otherwise kernel @@ -1329,7 +1362,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) * is destroyed. */ if (!ret) - wait_for_completion(&requests_done); + wait_for_completion(&wait.comp); return ret; } @@ -1337,50 +1370,21 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) return -EINVAL; } -typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, - unsigned long, loff_t); typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); -static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, - int rw, char __user *buf, - unsigned long *nr_segs, - struct iovec **iovec, - bool compat) +static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len, + struct iovec **iovec, + bool compat, + struct iov_iter *iter) { - ssize_t ret; - - *nr_segs = kiocb->ki_nbytes; - #ifdef CONFIG_COMPAT if (compat) - ret = compat_rw_copy_check_uvector(rw, + return compat_import_iovec(rw, (struct compat_iovec __user *)buf, - *nr_segs, UIO_FASTIOV, *iovec, iovec); - else + len, UIO_FASTIOV, iovec, iter); #endif - ret = rw_copy_check_uvector(rw, - (struct iovec __user *)buf, - *nr_segs, UIO_FASTIOV, *iovec, iovec); - if (ret < 0) - return ret; - - /* ki_nbytes now reflect bytes instead of segs */ - kiocb->ki_nbytes = ret; - return 0; -} - -static ssize_t aio_setup_single_vector(struct kiocb *kiocb, - int rw, char __user *buf, - unsigned long *nr_segs, - struct iovec *iovec) -{ - if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes))) - return -EFAULT; - - iovec->iov_base = buf; - iovec->iov_len = kiocb->ki_nbytes; - *nr_segs = 1; - return 0; + return import_iovec(rw, (struct iovec __user *)buf, + len, UIO_FASTIOV, iovec, iter); } /* @@ -1388,14 +1392,12 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb, * Performs the initial checks and io submission. */ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, - char __user *buf, bool compat) + char __user *buf, size_t len, bool compat) { struct file *file = req->ki_filp; ssize_t ret; - unsigned long nr_segs; int rw; fmode_t mode; - aio_rw_op *rw_op; rw_iter_op *iter_op; struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iov_iter iter; @@ -1405,7 +1407,6 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, case IOCB_CMD_PREADV: mode = FMODE_READ; rw = READ; - rw_op = file->f_op->aio_read; iter_op = file->f_op->read_iter; goto rw_common; @@ -1413,51 +1414,40 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, case IOCB_CMD_PWRITEV: mode = FMODE_WRITE; rw = WRITE; - rw_op = file->f_op->aio_write; iter_op = file->f_op->write_iter; goto rw_common; rw_common: if (unlikely(!(file->f_mode & mode))) return -EBADF; - if (!rw_op && !iter_op) + if (!iter_op) return -EINVAL; - ret = (opcode == IOCB_CMD_PREADV || - opcode == IOCB_CMD_PWRITEV) - ? aio_setup_vectored_rw(req, rw, buf, &nr_segs, - &iovec, compat) - : aio_setup_single_vector(req, rw, buf, &nr_segs, - iovec); + if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) + ret = aio_setup_vectored_rw(rw, buf, len, + &iovec, compat, &iter); + else { + ret = import_single_range(rw, buf, len, iovec, &iter); + iovec = NULL; + } if (!ret) - ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); + ret = rw_verify_area(rw, file, &req->ki_pos, + iov_iter_count(&iter)); if (ret < 0) { - if (iovec != inline_vecs) - kfree(iovec); + kfree(iovec); return ret; } - req->ki_nbytes = ret; - - /* XXX: move/kill - rw_verify_area()? */ - /* This matches the pread()/pwrite() logic */ - if (req->ki_pos < 0) { - ret = -EINVAL; - break; - } + len = ret; if (rw == WRITE) file_start_write(file); - if (iter_op) { - iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes); - ret = iter_op(req, &iter); - } else { - ret = rw_op(req, iovec, nr_segs, req->ki_pos); - } + ret = iter_op(req, &iter); if (rw == WRITE) file_end_write(file); + kfree(iovec); break; case IOCB_CMD_FDSYNC: @@ -1479,9 +1469,6 @@ rw_common: return -EINVAL; } - if (iovec != inline_vecs) - kfree(iovec); - if (ret != -EIOCBQUEUED) { /* * There's no easy way to restart the syscall since other AIO's @@ -1500,7 +1487,7 @@ rw_common: static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, struct iocb *iocb, bool compat) { - struct kiocb *req; + struct aio_kiocb *req; ssize_t ret; /* enforce forwards compatibility on users */ @@ -1523,11 +1510,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, if (unlikely(!req)) return -EAGAIN; - req->ki_filp = fget(iocb->aio_fildes); - if (unlikely(!req->ki_filp)) { + req->common.ki_filp = fget(iocb->aio_fildes); + if (unlikely(!req->common.ki_filp)) { ret = -EBADF; goto out_put_req; } + req->common.ki_pos = iocb->aio_offset; + req->common.ki_complete = aio_complete; + req->common.ki_flags = 0; if (iocb->aio_flags & IOCB_FLAG_RESFD) { /* @@ -1542,6 +1532,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, req->ki_eventfd = NULL; goto out_put_req; } + + req->common.ki_flags |= IOCB_EVENTFD; } ret = put_user(KIOCB_KEY, &user_iocb->aio_key); @@ -1550,13 +1542,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, goto out_put_req; } - req->ki_obj.user = user_iocb; + req->ki_user_iocb = user_iocb; req->ki_user_data = iocb->aio_data; - req->ki_pos = iocb->aio_offset; - req->ki_nbytes = iocb->aio_nbytes; - ret = aio_run_iocb(req, iocb->aio_lio_opcode, + ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode, (char __user *)(unsigned long)iocb->aio_buf, + iocb->aio_nbytes, compat); if (ret) goto out_put_req; @@ -1643,10 +1634,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, /* lookup_kiocb * Finds a given iocb for cancellation. */ -static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, - u32 key) +static struct aio_kiocb * +lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key) { - struct list_head *pos; + struct aio_kiocb *kiocb; assert_spin_locked(&ctx->ctx_lock); @@ -1654,9 +1645,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, return NULL; /* TODO: use a hash or array, this sucks. */ - list_for_each(pos, &ctx->active_reqs) { - struct kiocb *kiocb = list_kiocb(pos); - if (kiocb->ki_obj.user == iocb) + list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) { + if (kiocb->ki_user_iocb == iocb) return kiocb; } return NULL; @@ -1676,7 +1666,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result) { struct kioctx *ctx; - struct kiocb *kiocb; + struct aio_kiocb *kiocb; u32 key; int ret; diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 8e98cf954bab..d10e619632ab 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -213,7 +213,7 @@ void autofs4_clean_ino(struct autofs_info *); static inline int autofs_prepare_pipe(struct file *pipe) { - if (!pipe->f_op->write) + if (!(pipe->f_mode & FMODE_CAN_WRITE)) return -EINVAL; if (!S_ISFIFO(file_inode(pipe)->i_mode)) return -EINVAL; diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 116fd38ee472..2ad05ab93db8 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -70,7 +70,7 @@ static int autofs4_write(struct autofs_sb_info *sbi, mutex_lock(&sbi->pipe_mutex); while (bytes && - (wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) { + (wr = __vfs_write(file,data,bytes,&file->f_pos)) > 0) { data += wr; bytes -= wr; } diff --git a/fs/bfs/file.c b/fs/bfs/file.c index e7f88ace1a25..97f1b5160155 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -23,9 +23,7 @@ const struct file_operations bfs_file_operations = { .llseek = generic_file_llseek, - .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = new_sync_write, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 90bc079d9982..fdcb4d69f430 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -15,6 +15,7 @@ #include <linux/buffer_head.h> #include <linux/vfs.h> #include <linux/writeback.h> +#include <linux/uio.h> #include <asm/uaccess.h> #include "bfs.h" diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 995986b8e36b..241ef68d2893 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -31,6 +31,7 @@ #include <linux/security.h> #include <linux/random.h> #include <linux/elf.h> +#include <linux/elf-randomize.h> #include <linux/utsname.h> #include <linux/coredump.h> #include <linux/sched.h> @@ -862,6 +863,7 @@ static int load_elf_binary(struct linux_binprm *bprm) i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { int elf_prot = 0, elf_flags; unsigned long k, vaddr; + unsigned long total_size = 0; if (elf_ppnt->p_type != PT_LOAD) continue; @@ -909,25 +911,20 @@ static int load_elf_binary(struct linux_binprm *bprm) * default mmap base, as well as whatever program they * might try to exec. This is because the brk will * follow the loader, and is not movable. */ -#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE - /* Memory randomization might have been switched off - * in runtime via sysctl or explicit setting of - * personality flags. - * If that is the case, retain the original non-zero - * load_bias value in order to establish proper - * non-randomized mappings. - */ + load_bias = ELF_ET_DYN_BASE - vaddr; if (current->flags & PF_RANDOMIZE) - load_bias = 0; - else - load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); -#else - load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); -#endif + load_bias += arch_mmap_rnd(); + load_bias = ELF_PAGESTART(load_bias); + total_size = total_mapping_size(elf_phdata, + loc->elf_ex.e_phnum); + if (!total_size) { + error = -EINVAL; + goto out_free_dentry; + } } error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, - elf_prot, elf_flags, 0); + elf_prot, elf_flags, total_size); if (BAD_ADDR(error)) { retval = IS_ERR((void *)error) ? PTR_ERR((void*)error) : -EINVAL; @@ -1053,15 +1050,13 @@ static int load_elf_binary(struct linux_binprm *bprm) current->mm->end_data = end_data; current->mm->start_stack = bprm->p; -#ifdef arch_randomize_brk if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { current->mm->brk = current->mm->start_brk = arch_randomize_brk(current->mm); -#ifdef CONFIG_COMPAT_BRK +#ifdef compat_brk_randomized current->brk_randomized = 1; #endif } -#endif if (current->personality & MMAP_PAGE_ZERO) { /* Why this, you ask??? Well SVr4 maps page 0 as read-only, diff --git a/fs/block_dev.c b/fs/block_dev.c index 975266be67d3..b5e87896f517 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -27,7 +27,6 @@ #include <linux/namei.h> #include <linux/log2.h> #include <linux/cleancache.h> -#include <linux/aio.h> #include <asm/uaccess.h> #include "internal.h" @@ -1660,8 +1659,6 @@ const struct file_operations def_blk_fops = { .open = blkdev_open, .release = blkdev_close, .llseek = block_llseek, - .read = new_sync_read, - .write = new_sync_write, .read_iter = blkdev_read_iter, .write_iter = blkdev_write_iter, .mmap = generic_file_mmap, diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 993642199326..6d67f32e648d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1645,14 +1645,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, parent_nritems = btrfs_header_nritems(parent); blocksize = root->nodesize; - end_slot = parent_nritems; + end_slot = parent_nritems - 1; - if (parent_nritems == 1) + if (parent_nritems <= 1) return 0; btrfs_set_lock_blocking(parent); - for (i = start_slot; i < end_slot; i++) { + for (i = start_slot; i <= end_slot; i++) { int close = 1; btrfs_node_key(parent, &disk_key, i); @@ -1669,7 +1669,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, other = btrfs_node_blockptr(parent, i - 1); close = close_blocks(blocknr, other, blocksize); } - if (!close && i < end_slot - 2) { + if (!close && i < end_slot) { other = btrfs_node_blockptr(parent, i + 1); close = close_blocks(blocknr, other, blocksize); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 84c3b00f3de8..f9c89cae39ee 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3387,6 +3387,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, + struct btrfs_root *root); int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); @@ -3909,6 +3911,9 @@ int btrfs_prealloc_file_range_trans(struct inode *inode, loff_t actual_len, u64 *alloc_hint); int btrfs_inode_check_errors(struct inode *inode); extern const struct dentry_operations btrfs_dentry_operations; +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +void btrfs_test_inode_set_ops(struct inode *inode); +#endif /* ioctl.c */ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f79f38542a73..639f2663ed3f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3921,7 +3921,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, } if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) + sizeof(struct btrfs_chunk)) { - printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n", + printk(KERN_ERR "BTRFS: system chunk array too small %u < %zu\n", btrfs_super_sys_array_size(sb), sizeof(struct btrfs_disk_key) + sizeof(struct btrfs_chunk)); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 571f402d3fc4..8b353ad02f03 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3208,6 +3208,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, return 0; } + if (trans->aborted) + return 0; again: inode = lookup_free_space_inode(root, block_group, path); if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { @@ -3243,6 +3245,20 @@ again: */ BTRFS_I(inode)->generation = 0; ret = btrfs_update_inode(trans, root, inode); + if (ret) { + /* + * So theoretically we could recover from this, simply set the + * super cache generation to 0 so we know to invalidate the + * cache, but then we'd have to keep track of the block groups + * that fail this way so we know we _have_ to reset this cache + * before the next commit or risk reading stale cache. So to + * limit our exposure to horrible edge cases lets just abort the + * transaction, this only happens in really bad situations + * anyway. + */ + btrfs_abort_transaction(trans, root, ret); + goto out_put; + } WARN_ON(ret); if (i_size_read(inode) > 0) { @@ -3309,6 +3325,32 @@ out: return ret; } +int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_block_group_cache *cache, *tmp; + struct btrfs_transaction *cur_trans = trans->transaction; + struct btrfs_path *path; + + if (list_empty(&cur_trans->dirty_bgs) || + !btrfs_test_opt(root, SPACE_CACHE)) + return 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* Could add new block groups, use _safe just in case */ + list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs, + dirty_list) { + if (cache->disk_cache_state == BTRFS_DC_CLEAR) + cache_save_setup(cache, trans, path); + } + + btrfs_free_path(path); + return 0; +} + int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -5094,7 +5136,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) num_bytes = ALIGN(num_bytes, root->sectorsize); spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents++; + nr_extents = (unsigned)div64_u64(num_bytes + + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + BTRFS_I(inode)->outstanding_extents += nr_extents; + nr_extents = 0; if (BTRFS_I(inode)->outstanding_extents > BTRFS_I(inode)->reserved_extents) @@ -5239,6 +5285,9 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) if (dropped > 0) to_free += btrfs_calc_trans_metadata_size(root, dropped); + if (btrfs_test_is_dummy_root(root)) + return; + trace_btrfs_space_reservation(root->fs_info, "delalloc", btrfs_ino(inode), to_free, 0); if (root->fs_info->quota_enabled) { diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c7233ff1d533..d688cfe5d496 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4968,6 +4968,12 @@ static int release_extent_buffer(struct extent_buffer *eb) /* Should be safe to release our pages at this point */ btrfs_release_extent_buffer_page(eb); +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS + if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) { + __free_extent_buffer(eb); + return 1; + } +#endif call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); return 1; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b78bbbac900d..cdc801c85105 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -24,7 +24,6 @@ #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/mpage.h> -#include <linux/aio.h> #include <linux/falloc.h> #include <linux/swap.h> #include <linux/writeback.h> @@ -32,6 +31,7 @@ #include <linux/compat.h> #include <linux/slab.h> #include <linux/btrfs.h> +#include <linux/uio.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -1811,22 +1811,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, mutex_unlock(&inode->i_mutex); /* - * we want to make sure fsync finds this change - * but we haven't joined a transaction running right now. - * - * Later on, someone is sure to update the inode and get the - * real transid recorded. - * - * We set last_trans now to the fs_info generation + 1, - * this will either be one more than the running transaction - * or the generation used for the next transaction if there isn't - * one running right now. - * * We also have to set last_sub_trans to the current log transid, * otherwise subsequent syncs to a file that's been synced in this * transaction will appear to have already occured. */ - BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; BTRFS_I(inode)->last_sub_trans = root->log_transid; if (num_written > 0) { err = generic_write_sync(file, pos, num_written); @@ -1959,25 +1947,37 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) atomic_inc(&root->log_batch); /* - * check the transaction that last modified this inode - * and see if its already been committed - */ - if (!BTRFS_I(inode)->last_trans) { - mutex_unlock(&inode->i_mutex); - goto out; - } - - /* - * if the last transaction that changed this file was before - * the current transaction, we can bail out now without any - * syncing + * If the last transaction that changed this file was before the current + * transaction and we have the full sync flag set in our inode, we can + * bail out now without any syncing. + * + * Note that we can't bail out if the full sync flag isn't set. This is + * because when the full sync flag is set we start all ordered extents + * and wait for them to fully complete - when they complete they update + * the inode's last_trans field through: + * + * btrfs_finish_ordered_io() -> + * btrfs_update_inode_fallback() -> + * btrfs_update_inode() -> + * btrfs_set_inode_last_trans() + * + * So we are sure that last_trans is up to date and can do this check to + * bail out safely. For the fast path, when the full sync flag is not + * set in our inode, we can not do it because we start only our ordered + * extents and don't wait for them to complete (that is when + * btrfs_finish_ordered_io runs), so here at this point their last_trans + * value might be less than or equals to fs_info->last_trans_committed, + * and setting a speculative last_trans for an inode when a buffered + * write is made (such as fs_info->generation + 1 for example) would not + * be reliable since after setting the value and before fsync is called + * any number of transactions can start and commit (transaction kthread + * commits the current transaction periodically), and a transaction + * commit does not start nor waits for ordered extents to complete. */ smp_mb(); if (btrfs_inode_in_log(inode, root->fs_info->generation) || - BTRFS_I(inode)->last_trans <= - root->fs_info->last_trans_committed) { - BTRFS_I(inode)->last_trans = 0; - + (full_sync && BTRFS_I(inode)->last_trans <= + root->fs_info->last_trans_committed)) { /* * We'v had everything committed since the last time we were * modified so clear this flag in case it was set for whatever @@ -2275,6 +2275,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) bool same_page; bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); u64 ino_size; + bool truncated_page = false; + bool updated_inode = false; ret = btrfs_wait_ordered_range(inode, offset, len); if (ret) @@ -2306,13 +2308,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) * entire page. */ if (same_page && len < PAGE_CACHE_SIZE) { - if (offset < ino_size) + if (offset < ino_size) { + truncated_page = true; ret = btrfs_truncate_page(inode, offset, len, 0); + } else { + ret = 0; + } goto out_only_mutex; } /* zero back part of the first page */ if (offset < ino_size) { + truncated_page = true; ret = btrfs_truncate_page(inode, offset, 0, 0); if (ret) { mutex_unlock(&inode->i_mutex); @@ -2348,6 +2355,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) if (!ret) { /* zero the front end of the last page */ if (tail_start + tail_len < ino_size) { + truncated_page = true; ret = btrfs_truncate_page(inode, tail_start + tail_len, 0, 1); if (ret) @@ -2357,8 +2365,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) } if (lockend < lockstart) { - mutex_unlock(&inode->i_mutex); - return 0; + ret = 0; + goto out_only_mutex; } while (1) { @@ -2506,6 +2514,7 @@ out_trans: trans->block_rsv = &root->fs_info->trans_block_rsv; ret = btrfs_update_inode(trans, root, inode); + updated_inode = true; btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root); out_free: @@ -2515,6 +2524,22 @@ out: unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state, GFP_NOFS); out_only_mutex: + if (!updated_inode && truncated_page && !ret && !err) { + /* + * If we only end up zeroing part of a page, we still need to + * update the inode item, so that all the time fields are + * updated as well as the necessary btrfs inode in memory fields + * for detecting, at fsync time, if the inode isn't yet in the + * log tree or it's there but not up to date. + */ + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + } else { + err = btrfs_update_inode(trans, root, inode); + ret = btrfs_end_transaction(trans, root); + } + } mutex_unlock(&inode->i_mutex); if (ret && !err) err = ret; @@ -2781,8 +2806,6 @@ out: const struct file_operations btrfs_file_operations = { .llseek = btrfs_file_llseek, - .read = new_sync_read, - .write = new_sync_write, .read_iter = generic_file_read_iter, .splice_read = generic_file_splice_read, .write_iter = btrfs_file_write_iter, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a85c23dfcddb..686331f22b15 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -32,7 +32,6 @@ #include <linux/writeback.h> #include <linux/statfs.h> #include <linux/compat.h> -#include <linux/aio.h> #include <linux/bit_spinlock.h> #include <linux/xattr.h> #include <linux/posix_acl.h> @@ -43,6 +42,7 @@ #include <linux/btrfs.h> #include <linux/blkdev.h> #include <linux/posix_acl_xattr.h> +#include <linux/uio.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, static int btrfs_dirty_inode(struct inode *inode); +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +void btrfs_test_inode_set_ops(struct inode *inode) +{ + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; +} +#endif + static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *dir, const struct qstr *qstr) @@ -1542,30 +1549,17 @@ static void btrfs_split_extent_hook(struct inode *inode, u64 new_size; /* - * We need the largest size of the remaining extent to see if we - * need to add a new outstanding extent. Think of the following - * case - * - * [MEAX_EXTENT_SIZEx2 - 4k][4k] - * - * The new_size would just be 4k and we'd think we had enough - * outstanding extents for this if we only took one side of the - * split, same goes for the other direction. We need to see if - * the larger size still is the same amount of extents as the - * original size, because if it is we need to add a new - * outstanding extent. But if we split up and the larger size - * is less than the original then we are good to go since we've - * already accounted for the extra extent in our original - * accounting. + * See the explanation in btrfs_merge_extent_hook, the same + * applies here, just in reverse. */ new_size = orig->end - split + 1; - if ((split - orig->start) > new_size) - new_size = split - orig->start; - - num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, + num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); - if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, - BTRFS_MAX_EXTENT_SIZE) < num_extents) + new_size = split - orig->start; + num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE) >= num_extents) return; } @@ -1591,8 +1585,10 @@ static void btrfs_merge_extent_hook(struct inode *inode, if (!(other->state & EXTENT_DELALLOC)) return; - old_size = other->end - other->start + 1; - new_size = old_size + (new->end - new->start + 1); + if (new->start > other->start) + new_size = new->end - other->start + 1; + else + new_size = other->end - new->start + 1; /* we're not bigger than the max, unreserve the space and go */ if (new_size <= BTRFS_MAX_EXTENT_SIZE) { @@ -1603,13 +1599,32 @@ static void btrfs_merge_extent_hook(struct inode *inode, } /* - * If we grew by another max_extent, just return, we want to keep that - * reserved amount. + * We have to add up either side to figure out how many extents were + * accounted for before we merged into one big extent. If the number of + * extents we accounted for is <= the amount we need for the new range + * then we can return, otherwise drop. Think of it like this + * + * [ 4k][MAX_SIZE] + * + * So we've grown the extent by a MAX_SIZE extent, this would mean we + * need 2 outstanding extents, on one side we have 1 and the other side + * we have 1 so they are == and we can return. But in this case + * + * [MAX_SIZE+4k][MAX_SIZE+4k] + * + * Each range on their own accounts for 2 extents, but merged together + * they are only 3 extents worth of accounting, so we need to drop in + * this case. */ + old_size = other->end - other->start + 1; num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); + old_size = new->end - new->start + 1; + num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, - BTRFS_MAX_EXTENT_SIZE) > num_extents) + BTRFS_MAX_EXTENT_SIZE) >= num_extents) return; spin_lock(&BTRFS_I(inode)->lock); @@ -1686,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode, spin_unlock(&BTRFS_I(inode)->lock); } + /* For sanity tests */ + if (btrfs_test_is_dummy_root(root)) + return; + __percpu_counter_add(&root->fs_info->delalloc_bytes, len, root->fs_info->delalloc_batch); spin_lock(&BTRFS_I(inode)->lock); @@ -1741,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode, root != root->fs_info->tree_root) btrfs_delalloc_release_metadata(inode, len); + /* For sanity tests. */ + if (btrfs_test_is_dummy_root(root)) + return; + if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID && do_list && !(state->state & EXTENT_NORESERVE)) btrfs_free_reserved_data_space(inode, len); @@ -7213,7 +7236,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, u64 start = iblock << inode->i_blkbits; u64 lockstart, lockend; u64 len = bh_result->b_size; - u64 orig_len = len; + u64 *outstanding_extents = NULL; int unlock_bits = EXTENT_LOCKED; int ret = 0; @@ -7225,6 +7248,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, lockstart = start; lockend = start + len - 1; + if (current->journal_info) { + /* + * Need to pull our outstanding extents and set journal_info to NULL so + * that anything that needs to check if there's a transction doesn't get + * confused. + */ + outstanding_extents = current->journal_info; + current->journal_info = NULL; + } + /* * If this errors out it's because we couldn't invalidate pagecache for * this range and we need to fallback to buffered. @@ -7285,7 +7318,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && em->block_start != EXTENT_MAP_HOLE)) { int type; - int ret; u64 block_start, orig_start, orig_block_len, ram_bytes; if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) @@ -7349,11 +7381,20 @@ unlock: if (start + len > i_size_read(inode)) i_size_write(inode, start + len); - if (len < orig_len) { + /* + * If we have an outstanding_extents count still set then we're + * within our reservation, otherwise we need to adjust our inode + * counter appropriately. + */ + if (*outstanding_extents) { + (*outstanding_extents)--; + } else { spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->lock); } + + current->journal_info = outstanding_extents; btrfs_free_reserved_data_space(inode, len); } @@ -7377,6 +7418,8 @@ unlock: unlock_err: clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, unlock_bits, 1, 0, &cached_state, GFP_NOFS); + if (outstanding_extents) + current->journal_info = outstanding_extents; return ret; } @@ -8076,6 +8119,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + u64 outstanding_extents = 0; size_t count = 0; int flags = 0; bool wakeup = true; @@ -8113,6 +8157,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, ret = btrfs_delalloc_reserve_space(inode, count); if (ret) goto out; + outstanding_extents = div64_u64(count + + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + + /* + * We need to know how many extents we reserved so that we can + * do the accounting properly if we go over the number we + * originally calculated. Abuse current->journal_info for this. + */ + current->journal_info = &outstanding_extents; } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags)) { inode_dio_done(inode); @@ -8125,6 +8179,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, iter, offset, btrfs_get_blocks_direct, NULL, btrfs_submit_direct, flags); if (rw & WRITE) { + current->journal_info = NULL; if (ret < 0 && ret != -EIOCBQUEUED) btrfs_delalloc_release_space(inode, count); else if (ret >= 0 && (size_t)ret < count) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 534544e08f76..157cc54fc634 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -452,9 +452,7 @@ void btrfs_get_logged_extents(struct inode *inode, continue; if (entry_end(ordered) <= start) break; - if (!list_empty(&ordered->log_list)) - continue; - if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) + if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) continue; list_add(&ordered->log_list, logged_list); atomic_inc(&ordered->refs); @@ -511,8 +509,7 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)); - if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) - list_add_tail(&ordered->trans_list, &trans->ordered); + list_add_tail(&ordered->trans_list, &trans->ordered); spin_lock_irq(&log->log_extents_lock[index]); } spin_unlock_irq(&log->log_extents_lock[index]); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 97159a8e91d4..058c79eecbfb 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1259,7 +1259,7 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1, if (oper1->seq < oper2->seq) return -1; if (oper1->seq > oper2->seq) - return -1; + return 1; if (oper1->ref_root < oper2->ref_root) return -1; if (oper1->ref_root > oper2->ref_root) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index fe5857223515..d6033f540cc7 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -230,6 +230,7 @@ struct pending_dir_move { u64 parent_ino; u64 ino; u64 gen; + bool is_orphan; struct list_head update_refs; }; @@ -2984,7 +2985,8 @@ static int add_pending_dir_move(struct send_ctx *sctx, u64 ino_gen, u64 parent_ino, struct list_head *new_refs, - struct list_head *deleted_refs) + struct list_head *deleted_refs, + const bool is_orphan) { struct rb_node **p = &sctx->pending_dir_moves.rb_node; struct rb_node *parent = NULL; @@ -2999,6 +3001,7 @@ static int add_pending_dir_move(struct send_ctx *sctx, pm->parent_ino = parent_ino; pm->ino = ino; pm->gen = ino_gen; + pm->is_orphan = is_orphan; INIT_LIST_HEAD(&pm->list); INIT_LIST_HEAD(&pm->update_refs); RB_CLEAR_NODE(&pm->node); @@ -3131,16 +3134,20 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) rmdir_ino = dm->rmdir_ino; free_waiting_dir_move(sctx, dm); - ret = get_first_ref(sctx->parent_root, pm->ino, - &parent_ino, &parent_gen, name); - if (ret < 0) - goto out; - - ret = get_cur_path(sctx, parent_ino, parent_gen, - from_path); - if (ret < 0) - goto out; - ret = fs_path_add_path(from_path, name); + if (pm->is_orphan) { + ret = gen_unique_name(sctx, pm->ino, + pm->gen, from_path); + } else { + ret = get_first_ref(sctx->parent_root, pm->ino, + &parent_ino, &parent_gen, name); + if (ret < 0) + goto out; + ret = get_cur_path(sctx, parent_ino, parent_gen, + from_path); + if (ret < 0) + goto out; + ret = fs_path_add_path(from_path, name); + } if (ret < 0) goto out; @@ -3150,7 +3157,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) LIST_HEAD(deleted_refs); ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, - &pm->update_refs, &deleted_refs); + &pm->update_refs, &deleted_refs, + pm->is_orphan); if (ret < 0) goto out; if (rmdir_ino) { @@ -3283,6 +3291,127 @@ out: return ret; } +/* + * We might need to delay a directory rename even when no ancestor directory + * (in the send root) with a higher inode number than ours (sctx->cur_ino) was + * renamed. This happens when we rename a directory to the old name (the name + * in the parent root) of some other unrelated directory that got its rename + * delayed due to some ancestor with higher number that got renamed. + * + * Example: + * + * Parent snapshot: + * . (ino 256) + * |---- a/ (ino 257) + * | |---- file (ino 260) + * | + * |---- b/ (ino 258) + * |---- c/ (ino 259) + * + * Send snapshot: + * . (ino 256) + * |---- a/ (ino 258) + * |---- x/ (ino 259) + * |---- y/ (ino 257) + * |----- file (ino 260) + * + * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257 + * from 'a' to 'x/y' happening first, which in turn depends on the rename of + * inode 259 from 'c' to 'x'. So the order of rename commands the send stream + * must issue is: + * + * 1 - rename 259 from 'c' to 'x' + * 2 - rename 257 from 'a' to 'x/y' + * 3 - rename 258 from 'b' to 'a' + * + * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can + * be done right away and < 0 on error. + */ +static int wait_for_dest_dir_move(struct send_ctx *sctx, + struct recorded_ref *parent_ref, + const bool is_orphan) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_key di_key; + struct btrfs_dir_item *di; + u64 left_gen; + u64 right_gen; + int ret = 0; + + if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) + return 0; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + key.objectid = parent_ref->dir; + key.type = BTRFS_DIR_ITEM_KEY; + key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len); + + ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0); + if (ret < 0) { + goto out; + } else if (ret > 0) { + ret = 0; + goto out; + } + + di = btrfs_match_dir_item_name(sctx->parent_root, path, + parent_ref->name, parent_ref->name_len); + if (!di) { + ret = 0; + goto out; + } + /* + * di_key.objectid has the number of the inode that has a dentry in the + * parent directory with the same name that sctx->cur_ino is being + * renamed to. We need to check if that inode is in the send root as + * well and if it is currently marked as an inode with a pending rename, + * if it is, we need to delay the rename of sctx->cur_ino as well, so + * that it happens after that other inode is renamed. + */ + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key); + if (di_key.type != BTRFS_INODE_ITEM_KEY) { + ret = 0; + goto out; + } + + ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL, + &left_gen, NULL, NULL, NULL, NULL); + if (ret < 0) + goto out; + ret = get_inode_info(sctx->send_root, di_key.objectid, NULL, + &right_gen, NULL, NULL, NULL, NULL); + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; + goto out; + } + + /* Different inode, no need to delay the rename of sctx->cur_ino */ + if (right_gen != left_gen) { + ret = 0; + goto out; + } + + if (is_waiting_for_move(sctx, di_key.objectid)) { + ret = add_pending_dir_move(sctx, + sctx->cur_ino, + sctx->cur_inode_gen, + di_key.objectid, + &sctx->new_refs, + &sctx->deleted_refs, + is_orphan); + if (!ret) + ret = 1; + } +out: + btrfs_free_path(path); + return ret; +} + static int wait_for_parent_move(struct send_ctx *sctx, struct recorded_ref *parent_ref) { @@ -3349,7 +3478,8 @@ out: sctx->cur_inode_gen, ino, &sctx->new_refs, - &sctx->deleted_refs); + &sctx->deleted_refs, + false); if (!ret) ret = 1; } @@ -3372,6 +3502,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) int did_overwrite = 0; int is_orphan = 0; u64 last_dir_ino_rm = 0; + bool can_rename = true; verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); @@ -3490,12 +3621,22 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); } } + if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) { + ret = wait_for_dest_dir_move(sctx, cur, is_orphan); + if (ret < 0) + goto out; + if (ret == 1) { + can_rename = false; + *pending_move = 1; + } + } + /* * link/move the ref to the new place. If we have an orphan * inode, move it and update valid_path. If not, link or move * it depending on the inode mode. */ - if (is_orphan) { + if (is_orphan && can_rename) { ret = send_rename(sctx, valid_path, cur->full_path); if (ret < 0) goto out; @@ -3503,7 +3644,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); ret = fs_path_copy(valid_path, cur->full_path); if (ret < 0) goto out; - } else { + } else if (can_rename) { if (S_ISDIR(sctx->cur_inode_mode)) { /* * Dirs can't be linked, so move it. For moved diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index a116b55ce788..054fc0d97131 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -911,6 +911,197 @@ out: return ret; } +static int test_extent_accounting(void) +{ + struct inode *inode = NULL; + struct btrfs_root *root = NULL; + int ret = -ENOMEM; + + inode = btrfs_new_test_inode(); + if (!inode) { + test_msg("Couldn't allocate inode\n"); + return ret; + } + + root = btrfs_alloc_dummy_root(); + if (IS_ERR(root)) { + test_msg("Couldn't allocate root\n"); + goto out; + } + + root->fs_info = btrfs_alloc_dummy_fs_info(); + if (!root->fs_info) { + test_msg("Couldn't allocate dummy fs info\n"); + goto out; + } + + BTRFS_I(inode)->root = root; + btrfs_test_inode_set_ops(inode); + + /* [BTRFS_MAX_EXTENT_SIZE] */ + BTRFS_I(inode)->outstanding_extents++; + ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, + NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 1) { + ret = -EINVAL; + test_msg("Miscount, wanted 1, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* [BTRFS_MAX_EXTENT_SIZE][4k] */ + BTRFS_I(inode)->outstanding_extents++; + ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE, + BTRFS_MAX_EXTENT_SIZE + 4095, NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 2) { + ret = -EINVAL; + test_msg("Miscount, wanted 2, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */ + ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, + BTRFS_MAX_EXTENT_SIZE >> 1, + (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, + EXTENT_DELALLOC | EXTENT_DIRTY | + EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, + NULL, GFP_NOFS); + if (ret) { + test_msg("clear_extent_bit returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 2) { + ret = -EINVAL; + test_msg("Miscount, wanted 2, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* [BTRFS_MAX_EXTENT_SIZE][4K] */ + BTRFS_I(inode)->outstanding_extents++; + ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1, + (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, + NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 2) { + ret = -EINVAL; + test_msg("Miscount, wanted 2, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* + * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K] + * + * I'm artificially adding 2 to outstanding_extents because in the + * buffered IO case we'd add things up as we go, but I don't feel like + * doing that here, this isn't the interesting case we want to test. + */ + BTRFS_I(inode)->outstanding_extents += 2; + ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192, + (BTRFS_MAX_EXTENT_SIZE << 1) + 12287, + NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 4) { + ret = -EINVAL; + test_msg("Miscount, wanted 4, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */ + BTRFS_I(inode)->outstanding_extents++; + ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096, + BTRFS_MAX_EXTENT_SIZE+8191, NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 3) { + ret = -EINVAL; + test_msg("Miscount, wanted 3, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */ + ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, + BTRFS_MAX_EXTENT_SIZE+4096, + BTRFS_MAX_EXTENT_SIZE+8191, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, + NULL, GFP_NOFS); + if (ret) { + test_msg("clear_extent_bit returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 4) { + ret = -EINVAL; + test_msg("Miscount, wanted 4, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* + * Refill the hole again just for good measure, because I thought it + * might fail and I'd rather satisfy my paranoia at this point. + */ + BTRFS_I(inode)->outstanding_extents++; + ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096, + BTRFS_MAX_EXTENT_SIZE+8191, NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 3) { + ret = -EINVAL; + test_msg("Miscount, wanted 3, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* Empty */ + ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, + NULL, GFP_NOFS); + if (ret) { + test_msg("clear_extent_bit returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents) { + ret = -EINVAL; + test_msg("Miscount, wanted 0, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + ret = 0; +out: + if (ret) + clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, + NULL, GFP_NOFS); + iput(inode); + btrfs_free_dummy_root(root); + return ret; +} + int btrfs_test_inodes(void) { int ret; @@ -924,5 +1115,9 @@ int btrfs_test_inodes(void) if (ret) return ret; test_msg("Running hole first btrfs_get_extent test\n"); - return test_hole_first(); + ret = test_hole_first(); + if (ret) + return ret; + test_msg("Running outstanding_extents tests\n"); + return test_extent_accounting(); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7e80f32550a6..8be4278e25e8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1023,17 +1023,13 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, u64 old_root_bytenr; u64 old_root_used; struct btrfs_root *tree_root = root->fs_info->tree_root; - bool extent_root = (root->objectid == BTRFS_EXTENT_TREE_OBJECTID); old_root_used = btrfs_root_used(&root->root_item); - btrfs_write_dirty_block_groups(trans, root); while (1) { old_root_bytenr = btrfs_root_bytenr(&root->root_item); if (old_root_bytenr == root->node->start && - old_root_used == btrfs_root_used(&root->root_item) && - (!extent_root || - list_empty(&trans->transaction->dirty_bgs))) + old_root_used == btrfs_root_used(&root->root_item)) break; btrfs_set_root_node(&root->root_item, root->node); @@ -1044,17 +1040,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, return ret; old_root_used = btrfs_root_used(&root->root_item); - if (extent_root) { - ret = btrfs_write_dirty_block_groups(trans, root); - if (ret) - return ret; - } - ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - if (ret) - return ret; - ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - if (ret) - return ret; } return 0; @@ -1071,6 +1056,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; + struct list_head *dirty_bgs = &trans->transaction->dirty_bgs; struct list_head *next; struct extent_buffer *eb; int ret; @@ -1098,11 +1084,15 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, if (ret) return ret; + ret = btrfs_setup_space_cache(trans, root); + if (ret) + return ret; + /* run_qgroups might have added some more refs */ ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); if (ret) return ret; - +again: while (!list_empty(&fs_info->dirty_cowonly_roots)) { next = fs_info->dirty_cowonly_roots.next; list_del_init(next); @@ -1115,8 +1105,23 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, ret = update_cowonly_root(trans, root); if (ret) return ret; + ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); + if (ret) + return ret; } + while (!list_empty(dirty_bgs)) { + ret = btrfs_write_dirty_block_groups(trans, root); + if (ret) + return ret; + ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); + if (ret) + return ret; + } + + if (!list_empty(&fs_info->dirty_cowonly_roots)) + goto again; + list_add_tail(&fs_info->extent_root->dirty_list, &trans->transaction->switch_commits); btrfs_after_dev_replace_commit(fs_info); @@ -1814,6 +1819,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, wait_for_commit(root, cur_trans); + if (unlikely(cur_trans->aborted)) + ret = cur_trans->aborted; + btrfs_put_transaction(cur_trans); return ret; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9a37f8b39bae..c5b8ba37f88e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1012,7 +1012,7 @@ again: base = btrfs_item_ptr_offset(leaf, path->slots[0]); while (cur_offset < item_size) { - extref = (struct btrfs_inode_extref *)base + cur_offset; + extref = (struct btrfs_inode_extref *)(base + cur_offset); victim_name_len = btrfs_inode_extref_name_len(leaf, extref); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 47b19465f0dc..883b93623bc5 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -111,6 +111,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans, name, name_len, -1); if (!di && (flags & XATTR_REPLACE)) ret = -ENODATA; + else if (IS_ERR(di)) + ret = PTR_ERR(di); else if (di) ret = btrfs_delete_one_dir_name(trans, root, path, di); goto out; @@ -127,10 +129,12 @@ static int do_setxattr(struct btrfs_trans_handle *trans, ASSERT(mutex_is_locked(&inode->i_mutex)); di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), name, name_len, 0); - if (!di) { + if (!di) ret = -ENODATA; + else if (IS_ERR(di)) + ret = PTR_ERR(di); + if (ret) goto out; - } btrfs_release_path(path); di = NULL; } diff --git a/fs/buffer.c b/fs/buffer.c index 20805db2c987..c7a5602d01ee 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3243,8 +3243,8 @@ int try_to_free_buffers(struct page *page) * to synchronise against __set_page_dirty_buffers and prevent the * dirty bit from being lost. */ - if (ret) - cancel_dirty_page(page, PAGE_CACHE_SIZE); + if (ret && TestClearPageDirty(page)) + account_page_cleaned(page, mapping); spin_unlock(&mapping->private_lock); out: if (buffers_to_free) { diff --git a/fs/ceph/file.c b/fs/ceph/file.c index d533075a823d..56237ea5fc22 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -7,7 +7,6 @@ #include <linux/mount.h> #include <linux/namei.h> #include <linux/writeback.h> -#include <linux/aio.h> #include <linux/falloc.h> #include "super.h" @@ -808,7 +807,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *filp = iocb->ki_filp; struct ceph_file_info *fi = filp->private_data; - size_t len = iocb->ki_nbytes; + size_t len = iov_iter_count(to); struct inode *inode = file_inode(filp); struct ceph_inode_info *ci = ceph_inode(inode); struct page *pinned_page = NULL; @@ -1332,8 +1331,6 @@ const struct file_operations ceph_file_fops = { .open = ceph_open, .release = ceph_release, .llseek = ceph_llseek, - .read = new_sync_read, - .write = new_sync_write, .read_iter = ceph_read_iter, .write_iter = ceph_write_iter, .mmap = ceph_mmap, diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 4ac7445e6ec7..aa0dc2573374 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -1,6 +1,9 @@ /* * fs/cifs/cifsencrypt.c * + * Encryption and hashing operations relating to NTLM, NTLMv2. See MS-NLMP + * for more detailed information + * * Copyright (C) International Business Machines Corp., 2005,2013 * Author(s): Steve French (sfrench@us.ibm.com) * @@ -515,7 +518,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, __func__); return rc; } - } else if (ses->serverName) { + } else { + /* We use ses->serverName if no domain name available */ len = strlen(ses->serverName); server = kmalloc(2 + (len * 2), GFP_KERNEL); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index d72fe37f5420..eaab4b2a0595 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -906,8 +906,6 @@ const struct inode_operations cifs_symlink_inode_ops = { }; const struct file_operations cifs_file_ops = { - .read = new_sync_read, - .write = new_sync_write, .read_iter = cifs_loose_read_iter, .write_iter = cifs_file_write_iter, .open = cifs_open, @@ -926,8 +924,6 @@ const struct file_operations cifs_file_ops = { }; const struct file_operations cifs_file_strict_ops = { - .read = new_sync_read, - .write = new_sync_write, .read_iter = cifs_strict_readv, .write_iter = cifs_strict_writev, .open = cifs_open, @@ -947,8 +943,6 @@ const struct file_operations cifs_file_strict_ops = { const struct file_operations cifs_file_direct_ops = { /* BB reevaluate whether they can be done with directio, no cache */ - .read = new_sync_read, - .write = new_sync_write, .read_iter = cifs_user_readv, .write_iter = cifs_user_writev, .open = cifs_open, @@ -967,8 +961,6 @@ const struct file_operations cifs_file_direct_ops = { }; const struct file_operations cifs_file_nobrl_ops = { - .read = new_sync_read, - .write = new_sync_write, .read_iter = cifs_loose_read_iter, .write_iter = cifs_file_write_iter, .open = cifs_open, @@ -986,8 +978,6 @@ const struct file_operations cifs_file_nobrl_ops = { }; const struct file_operations cifs_file_strict_nobrl_ops = { - .read = new_sync_read, - .write = new_sync_write, .read_iter = cifs_strict_readv, .write_iter = cifs_strict_writev, .open = cifs_open, @@ -1006,8 +996,6 @@ const struct file_operations cifs_file_strict_nobrl_ops = { const struct file_operations cifs_file_direct_nobrl_ops = { /* BB reevaluate whether they can be done with directio, no cache */ - .read = new_sync_read, - .write = new_sync_write, .read_iter = cifs_user_readv, .write_iter = cifs_user_writev, .open = cifs_open, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d3aa999ab785..f3bfe08e177b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -773,8 +773,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) length = atomic_dec_return(&tcpSesAllocCount); if (length > 0) - mempool_resize(cifs_req_poolp, length + cifs_min_rcv, - GFP_KERNEL); + mempool_resize(cifs_req_poolp, length + cifs_min_rcv); } static int @@ -848,8 +847,7 @@ cifs_demultiplex_thread(void *p) length = atomic_inc_return(&tcpSesAllocCount); if (length > 1) - mempool_resize(cifs_req_poolp, length + cifs_min_rcv, - GFP_KERNEL); + mempool_resize(cifs_req_poolp, length + cifs_min_rcv); set_freezable(); while (server->tcpStatus != CifsExiting) { @@ -1599,6 +1597,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, pr_warn("CIFS: username too long\n"); goto cifs_parse_mount_err; } + + kfree(vol->username); vol->username = kstrdup(string, GFP_KERNEL); if (!vol->username) goto cifs_parse_mount_err; @@ -1700,6 +1700,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, goto cifs_parse_mount_err; } + kfree(vol->domainname); vol->domainname = kstrdup(string, GFP_KERNEL); if (!vol->domainname) { pr_warn("CIFS: no memory for domainname\n"); @@ -1731,6 +1732,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, } if (strncasecmp(string, "default", 7) != 0) { + kfree(vol->iocharset); vol->iocharset = kstrdup(string, GFP_KERNEL); if (!vol->iocharset) { @@ -2913,8 +2915,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server) * calling name ends in null (byte 16) from old smb * convention. */ - if (server->workstation_RFC1001_name && - server->workstation_RFC1001_name[0] != 0) + if (server->workstation_RFC1001_name[0] != 0) rfc1002mangle(ses_init_buf->trailer. session_req.calling_name, server->workstation_RFC1001_name, @@ -3692,6 +3693,12 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, #endif /* CIFS_WEAK_PW_HASH */ rc = SMBNTencrypt(tcon->password, ses->server->cryptkey, bcc_ptr, nls_codepage); + if (rc) { + cifs_dbg(FYI, "%s Can't generate NTLM rsp. Error: %d\n", + __func__, rc); + cifs_buf_release(smb_buffer); + return rc; + } bcc_ptr += CIFS_AUTH_RESP_SIZE; if (ses->capabilities & CAP_UNICODE) { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index a94b3e673182..ca30c391a894 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1823,6 +1823,7 @@ refind_writable: cifsFileInfo_put(inv_file); spin_lock(&cifs_file_list_lock); ++refind; + inv_file = NULL; goto refind_writable; } } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 2d4f37235ed0..3e126d7bb2ea 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -771,6 +771,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, cifs_buf_release(srchinf->ntwrk_buf_start); } kfree(srchinf); + if (rc) + goto cgii_exit; } else goto cgii_exit; diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 689f035915cf..22dfdf17d065 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -322,7 +322,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) /* return pointer to beginning of data area, ie offset from SMB start */ if ((*off != 0) && (*len != 0)) - return hdr->ProtocolId + *off; + return (char *)(&hdr->ProtocolId[0]) + *off; else return NULL; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 96b5d40a2ece..eab05e1aa587 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -684,7 +684,8 @@ smb2_clone_range(const unsigned int xid, /* No need to change MaxChunks since already set to 1 */ chunk_sizes_updated = true; - } + } else + goto cchunk_out; } cchunk_out: diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 3417340bf89e..65cd7a84c8bc 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1218,7 +1218,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, struct smb2_ioctl_req *req; struct smb2_ioctl_rsp *rsp; struct TCP_Server_Info *server; - struct cifs_ses *ses = tcon->ses; + struct cifs_ses *ses; struct kvec iov[2]; int resp_buftype; int num_iovecs; @@ -1233,6 +1233,11 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, if (plen) *plen = 0; + if (tcon) + ses = tcon->ses; + else + return -EIO; + if (ses && (ses->server)) server = ses->server; else @@ -1296,14 +1301,12 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base; if ((rc != 0) && (rc != -EINVAL)) { - if (tcon) - cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); + cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); goto ioctl_exit; } else if (rc == -EINVAL) { if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) && (opcode != FSCTL_SRV_COPYCHUNK)) { - if (tcon) - cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); + cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); goto ioctl_exit; } } @@ -1629,7 +1632,7 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0); - if ((rc != 0) && tcon) + if (rc != 0) cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE); free_rsp_buf(resp_buftype, iov[0].iov_base); @@ -2114,7 +2117,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, struct kvec iov[2]; int rc = 0; int len; - int resp_buftype; + int resp_buftype = CIFS_NO_BUFFER; unsigned char *bufptr; struct TCP_Server_Info *server; struct cifs_ses *ses = tcon->ses; diff --git a/fs/coda/file.c b/fs/coda/file.c index d244d743a232..1da3805f3ddc 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -27,19 +27,14 @@ #include "coda_int.h" static ssize_t -coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *ppos) +coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { - struct coda_file_info *cfi; - struct file *host_file; + struct file *coda_file = iocb->ki_filp; + struct coda_file_info *cfi = CODA_FTOC(coda_file); - cfi = CODA_FTOC(coda_file); BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); - host_file = cfi->cfi_container; - if (!host_file->f_op->read) - return -EINVAL; - - return host_file->f_op->read(host_file, buf, count, ppos); + return vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos); } static ssize_t @@ -64,32 +59,25 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos, } static ssize_t -coda_file_write(struct file *coda_file, const char __user *buf, size_t count, loff_t *ppos) +coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to) { - struct inode *host_inode, *coda_inode = file_inode(coda_file); - struct coda_file_info *cfi; + struct file *coda_file = iocb->ki_filp; + struct inode *coda_inode = file_inode(coda_file); + struct coda_file_info *cfi = CODA_FTOC(coda_file); struct file *host_file; ssize_t ret; - cfi = CODA_FTOC(coda_file); BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); - host_file = cfi->cfi_container; - - if (!host_file->f_op->write) - return -EINVAL; - host_inode = file_inode(host_file); + host_file = cfi->cfi_container; file_start_write(host_file); mutex_lock(&coda_inode->i_mutex); - - ret = host_file->f_op->write(host_file, buf, count, ppos); - - coda_inode->i_size = host_inode->i_size; + ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos); + coda_inode->i_size = file_inode(host_file)->i_size; coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9; coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC; mutex_unlock(&coda_inode->i_mutex); file_end_write(host_file); - return ret; } @@ -231,8 +219,8 @@ int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync) const struct file_operations coda_file_operations = { .llseek = generic_file_llseek, - .read = coda_file_read, - .write = coda_file_write, + .read_iter = coda_file_read_iter, + .write_iter = coda_file_write_iter, .mmap = coda_file_mmap, .open = coda_open, .release = coda_release, diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index afec6450450f..6b8e2f091f5b 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -570,6 +570,7 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp) #define BNEPCONNDEL _IOW('B', 201, int) #define BNEPGETCONNLIST _IOR('B', 210, int) #define BNEPGETCONNINFO _IOR('B', 211, int) +#define BNEPGETSUPPFEAT _IOR('B', 212, int) #define CMTPCONNADD _IOW('C', 200, int) #define CMTPCONNDEL _IOW('C', 201, int) @@ -1247,6 +1248,7 @@ COMPATIBLE_IOCTL(BNEPCONNADD) COMPATIBLE_IOCTL(BNEPCONNDEL) COMPATIBLE_IOCTL(BNEPGETCONNLIST) COMPATIBLE_IOCTL(BNEPGETCONNINFO) +COMPATIBLE_IOCTL(BNEPGETSUPPFEAT) COMPATIBLE_IOCTL(CMTPCONNADD) COMPATIBLE_IOCTL(CMTPCONNDEL) COMPATIBLE_IOCTL(CMTPGETCONNLIST) diff --git a/fs/coredump.c b/fs/coredump.c index f319926ddf8c..bbbe139ab280 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -657,7 +657,7 @@ void do_coredump(const siginfo_t *siginfo) */ if (!uid_eq(inode->i_uid, current_fsuid())) goto close_fail; - if (!cprm.file->f_op->write) + if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) goto close_fail; if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) goto close_fail; @@ -464,6 +464,23 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, EXPORT_SYMBOL_GPL(dax_fault); /** + * dax_pfn_mkwrite - handle first write to DAX page + * @vma: The virtual memory area where the fault occurred + * @vmf: The description of the fault + * + */ +int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct super_block *sb = file_inode(vma->vm_file)->i_sb; + + sb_start_pagefault(sb); + file_update_time(vma->vm_file); + sb_end_pagefault(sb); + return VM_FAULT_NOPAGE; +} +EXPORT_SYMBOL_GPL(dax_pfn_mkwrite); + +/** * dax_zero_page_range - zero a range within a page of a DAX file * @inode: The file being truncated * @from: The file offset that is being truncated to diff --git a/fs/dcache.c b/fs/dcache.c index c71e3732e53b..d99736a63e3c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2690,7 +2690,7 @@ static int __d_unalias(struct inode *inode, struct dentry *dentry, struct dentry *alias) { struct mutex *m1 = NULL, *m2 = NULL; - int ret = -EBUSY; + int ret = -ESTALE; /* If alias and dentry share a parent, then no extra locks required */ if (alias->d_parent == dentry->d_parent) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 96400ab42d13..61e72d44cf94 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -254,6 +254,9 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) pr_debug("debugfs: creating file '%s'\n",name); + if (IS_ERR(parent)) + return parent; + error = simple_pin_fs(&debug_fs_type, &debugfs_mount, &debugfs_mount_count); if (error) diff --git a/fs/direct-io.c b/fs/direct-io.c index e181b6b2e297..6fb00e3f1059 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -37,7 +37,6 @@ #include <linux/uio.h> #include <linux/atomic.h> #include <linux/prefetch.h> -#include <linux/aio.h> /* * How many user pages to map in one call to get_user_pages(). This determines @@ -265,7 +264,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, ret = err; } - aio_complete(dio->iocb, ret, 0); + dio->iocb->ki_complete(dio->iocb, ret, 0); } kmem_cache_free(dio_cache, dio); @@ -1056,7 +1055,7 @@ static inline int drop_refcount(struct dio *dio) * operation. AIO can if it was a broken operation described above or * in fact if all the bios race to complete before we get here. In * that case dio_complete() translates the EIOCBQUEUED into the proper - * return code that the caller will hand to aio_complete(). + * return code that the caller will hand to ->complete(). * * This is managed by the bio_lock instead of being an atomic_t so that * completion paths can drop their ref and use the remaining count to diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 90d1882b306f..5ba029e627cc 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -124,7 +124,7 @@ ecryptfs_get_key_payload_data(struct key *key) } #define ECRYPTFS_MAX_KEYSET_SIZE 1024 -#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32 +#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 31 #define ECRYPTFS_MAX_NUM_ENC_KEYS 64 #define ECRYPTFS_MAX_IV_BYTES 16 /* 128 bits */ #define ECRYPTFS_SALT_BYTES 2 @@ -237,7 +237,7 @@ struct ecryptfs_crypt_stat { struct crypto_ablkcipher *tfm; struct crypto_hash *hash_tfm; /* Crypto context for generating * the initialization vectors */ - unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; + unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1]; unsigned char key[ECRYPTFS_MAX_KEY_BYTES]; unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES]; struct list_head keysig_list; diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index b07731e68c0b..a65786e26b05 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -31,7 +31,6 @@ #include <linux/security.h> #include <linux/compat.h> #include <linux/fs_stack.h> -#include <linux/aio.h> #include "ecryptfs_kernel.h" /** @@ -52,12 +51,6 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, struct file *file = iocb->ki_filp; rc = generic_file_read_iter(iocb, to); - /* - * Even though this is a async interface, we need to wait - * for IO to finish to update atime - */ - if (-EIOCBQUEUED == rc) - rc = wait_on_sync_kiocb(iocb); if (rc >= 0) { path = ecryptfs_dentry_to_lower_path(file->f_path.dentry); touch_atime(path); @@ -303,9 +296,22 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct file *lower_file = ecryptfs_file_to_lower(file); long rc = -ENOTTY; - if (lower_file->f_op->unlocked_ioctl) + if (!lower_file->f_op->unlocked_ioctl) + return rc; + + switch (cmd) { + case FITRIM: + case FS_IOC_GETFLAGS: + case FS_IOC_SETFLAGS: + case FS_IOC_GETVERSION: + case FS_IOC_SETVERSION: rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); - return rc; + fsstack_copy_attr_all(file_inode(file), file_inode(lower_file)); + + return rc; + default: + return rc; + } } #ifdef CONFIG_COMPAT @@ -315,9 +321,22 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct file *lower_file = ecryptfs_file_to_lower(file); long rc = -ENOIOCTLCMD; - if (lower_file->f_op->compat_ioctl) + if (!lower_file->f_op->compat_ioctl) + return rc; + + switch (cmd) { + case FITRIM: + case FS_IOC32_GETFLAGS: + case FS_IOC32_SETFLAGS: + case FS_IOC32_GETVERSION: + case FS_IOC32_SETVERSION: rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); - return rc; + fsstack_copy_attr_all(file_inode(file), file_inode(lower_file)); + + return rc; + default: + return rc; + } } #endif @@ -339,9 +358,7 @@ const struct file_operations ecryptfs_dir_fops = { const struct file_operations ecryptfs_main_fops = { .llseek = generic_file_llseek, - .read = new_sync_read, .read_iter = ecryptfs_read_update_atime, - .write = new_sync_write, .write_iter = generic_file_write_iter, .iterate = ecryptfs_readdir, .unlocked_ioctl = ecryptfs_unlocked_ioctl, diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 917bd5c9776a..6bd67e2011f0 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -891,7 +891,7 @@ struct ecryptfs_parse_tag_70_packet_silly_stack { struct blkcipher_desc desc; char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1]; char iv[ECRYPTFS_MAX_IV_BYTES]; - char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; + char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1]; }; /** diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 1895d60f4122..c095d3264259 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -407,7 +407,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, if (!cipher_name_set) { int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER); - BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE); + BUG_ON(cipher_name_len > ECRYPTFS_MAX_CIPHER_NAME_SIZE); strcpy(mount_crypt_stat->global_default_cipher_name, ECRYPTFS_DEFAULT_CIPHER); } diff --git a/fs/exofs/file.c b/fs/exofs/file.c index 1a376b42d305..906de66e8e7e 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c @@ -67,8 +67,6 @@ static int exofs_flush(struct file *file, fl_owner_t id) const struct file_operations exofs_file_operations = { .llseek = generic_file_llseek, - .read = new_sync_read, - .write = new_sync_write, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 678f9ab08c48..8d15febd0aa3 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -793,7 +793,6 @@ extern int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync); extern const struct inode_operations ext2_file_inode_operations; extern const struct file_operations ext2_file_operations; -extern const struct file_operations ext2_dax_file_operations; /* inode.c */ extern const struct address_space_operations ext2_aops; diff --git a/fs/ext2/file.c b/fs/ext2/file.c index e31701713516..3a0a6c6406d0 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -39,6 +39,7 @@ static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) static const struct vm_operations_struct ext2_dax_vm_ops = { .fault = ext2_dax_fault, .page_mkwrite = ext2_dax_mkwrite, + .pfn_mkwrite = dax_pfn_mkwrite, }; static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma) @@ -92,8 +93,6 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync) */ const struct file_operations ext2_file_operations = { .llseek = generic_file_llseek, - .read = new_sync_read, - .write = new_sync_write, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .unlocked_ioctl = ext2_ioctl, @@ -108,24 +107,6 @@ const struct file_operations ext2_file_operations = { .splice_write = iter_file_splice_write, }; -#ifdef CONFIG_FS_DAX -const struct file_operations ext2_dax_file_operations = { - .llseek = generic_file_llseek, - .read = new_sync_read, - .write = new_sync_write, - .read_iter = generic_file_read_iter, - .write_iter = generic_file_write_iter, - .unlocked_ioctl = ext2_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = ext2_compat_ioctl, -#endif - .mmap = ext2_file_mmap, - .open = dquot_file_open, - .release = ext2_release_file, - .fsync = ext2_fsync, -}; -#endif - const struct inode_operations ext2_file_inode_operations = { #ifdef CONFIG_EXT2_FS_XATTR .setxattr = generic_setxattr, diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 6434bc000125..b29eb6747116 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -31,7 +31,7 @@ #include <linux/mpage.h> #include <linux/fiemap.h> #include <linux/namei.h> -#include <linux/aio.h> +#include <linux/uio.h> #include "ext2.h" #include "acl.h" #include "xattr.h" @@ -1388,10 +1388,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) if (S_ISREG(inode->i_mode)) { inode->i_op = &ext2_file_inode_operations; - if (test_opt(inode->i_sb, DAX)) { - inode->i_mapping->a_ops = &ext2_aops; - inode->i_fop = &ext2_dax_file_operations; - } else if (test_opt(inode->i_sb, NOBH)) { + if (test_opt(inode->i_sb, NOBH)) { inode->i_mapping->a_ops = &ext2_nobh_aops; inode->i_fop = &ext2_file_operations; } else { diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 148f6e3789ea..ce422931f411 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -104,10 +104,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode return PTR_ERR(inode); inode->i_op = &ext2_file_inode_operations; - if (test_opt(inode->i_sb, DAX)) { - inode->i_mapping->a_ops = &ext2_aops; - inode->i_fop = &ext2_dax_file_operations; - } else if (test_opt(inode->i_sb, NOBH)) { + if (test_opt(inode->i_sb, NOBH)) { inode->i_mapping->a_ops = &ext2_nobh_aops; inode->i_fop = &ext2_file_operations; } else { @@ -125,10 +122,7 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) return PTR_ERR(inode); inode->i_op = &ext2_file_inode_operations; - if (test_opt(inode->i_sb, DAX)) { - inode->i_mapping->a_ops = &ext2_aops; - inode->i_fop = &ext2_dax_file_operations; - } else if (test_opt(inode->i_sb, NOBH)) { + if (test_opt(inode->i_sb, NOBH)) { inode->i_mapping->a_ops = &ext2_nobh_aops; inode->i_fop = &ext2_file_operations; } else { diff --git a/fs/ext3/file.c b/fs/ext3/file.c index a062fa1e1b11..3b8f650de22c 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -50,8 +50,6 @@ static int ext3_release_file (struct inode * inode, struct file * filp) const struct file_operations ext3_file_operations = { .llseek = generic_file_llseek, - .read = new_sync_read, - .write = new_sync_write, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .unlocked_ioctl = ext3_ioctl, diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 2c6ccc49ba27..db07ffbe7c85 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -27,7 +27,7 @@ #include <linux/writeback.h> #include <linux/mpage.h> #include <linux/namei.h> -#include <linux/aio.h> +#include <linux/uio.h> #include "ext3.h" #include "xattr.h" #include "acl.h" diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f63c3d5805c4..8a3981ea35d8 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2593,7 +2593,6 @@ extern const struct file_operations ext4_dir_operations; /* file.c */ extern const struct inode_operations ext4_file_inode_operations; extern const struct file_operations ext4_file_operations; -extern const struct file_operations ext4_dax_file_operations; extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); /* inline.c */ diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 33a09da16c9c..7a6defcf3352 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -23,9 +23,9 @@ #include <linux/jbd2.h> #include <linux/mount.h> #include <linux/path.h> -#include <linux/aio.h> #include <linux/quotaops.h> #include <linux/pagevec.h> +#include <linux/uio.h> #include "ext4.h" #include "ext4_jbd2.h" #include "xattr.h" @@ -206,6 +206,7 @@ static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) static const struct vm_operations_struct ext4_dax_vm_ops = { .fault = ext4_dax_fault, .page_mkwrite = ext4_dax_mkwrite, + .pfn_mkwrite = dax_pfn_mkwrite, }; #else #define ext4_dax_vm_ops ext4_file_vm_ops @@ -607,8 +608,6 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence) const struct file_operations ext4_file_operations = { .llseek = ext4_llseek, - .read = new_sync_read, - .write = new_sync_write, .read_iter = generic_file_read_iter, .write_iter = ext4_file_write_iter, .unlocked_ioctl = ext4_ioctl, @@ -624,26 +623,6 @@ const struct file_operations ext4_file_operations = { .fallocate = ext4_fallocate, }; -#ifdef CONFIG_FS_DAX -const struct file_operations ext4_dax_file_operations = { - .llseek = ext4_llseek, - .read = new_sync_read, - .write = new_sync_write, - .read_iter = generic_file_read_iter, - .write_iter = ext4_file_write_iter, - .unlocked_ioctl = ext4_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = ext4_compat_ioctl, -#endif - .mmap = ext4_file_mmap, - .open = ext4_file_open, - .release = ext4_release_file, - .fsync = ext4_sync_file, - /* Splice not yet supported with DAX */ - .fallocate = ext4_fallocate, -}; -#endif - const struct inode_operations ext4_file_inode_operations = { .setattr = ext4_setattr, .getattr = ext4_getattr, diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 45fe924f82bc..740c7871c117 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -20,9 +20,9 @@ * (sct@redhat.com), 1993, 1998 */ -#include <linux/aio.h> #include "ext4_jbd2.h" #include "truncate.h" +#include <linux/uio.h> #include <trace/events/ext4.h> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5cb9a212b86f..035b7a06f1c3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -37,7 +37,6 @@ #include <linux/printk.h> #include <linux/slab.h> #include <linux/ratelimit.h> -#include <linux/aio.h> #include <linux/bitops.h> #include "ext4_jbd2.h" @@ -4091,10 +4090,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) if (S_ISREG(inode->i_mode)) { inode->i_op = &ext4_file_inode_operations; - if (test_opt(inode->i_sb, DAX)) - inode->i_fop = &ext4_dax_file_operations; - else - inode->i_fop = &ext4_file_operations; + inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &ext4_dir_inode_operations; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 28fe71a2904c..2291923dae4e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2235,10 +2235,7 @@ retry: err = PTR_ERR(inode); if (!IS_ERR(inode)) { inode->i_op = &ext4_file_inode_operations; - if (test_opt(inode->i_sb, DAX)) - inode->i_fop = &ext4_dax_file_operations; - else - inode->i_fop = &ext4_file_operations; + inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); err = ext4_add_nondir(handle, dentry, inode); if (!err && IS_DIRSYNC(dir)) @@ -2302,10 +2299,7 @@ retry: err = PTR_ERR(inode); if (!IS_ERR(inode)) { inode->i_op = &ext4_file_inode_operations; - if (test_opt(inode->i_sb, DAX)) - inode->i_fop = &ext4_dax_file_operations; - else - inode->i_fop = &ext4_file_operations; + inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); d_tmpfile(dentry, inode); err = ext4_orphan_add(handle, inode); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index b24a2541a9ba..464984261e69 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -18,7 +18,6 @@ #include <linux/pagevec.h> #include <linux/mpage.h> #include <linux/namei.h> -#include <linux/aio.h> #include <linux/uio.h> #include <linux/bio.h> #include <linux/workqueue.h> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 985ed023a750..497f8515d205 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -12,12 +12,12 @@ #include <linux/f2fs_fs.h> #include <linux/buffer_head.h> #include <linux/mpage.h> -#include <linux/aio.h> #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/blkdev.h> #include <linux/bio.h> #include <linux/prefetch.h> +#include <linux/uio.h> #include "f2fs.h" #include "node.h" diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 98dac27bc3f7..df6a0596eccf 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1104,8 +1104,6 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) const struct file_operations f2fs_file_operations = { .llseek = f2fs_llseek, - .read = new_sync_read, - .write = new_sync_write, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .open = generic_file_open, diff --git a/fs/fat/file.c b/fs/fat/file.c index 8429c68e3057..1e98d333879f 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -170,8 +170,6 @@ int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) const struct file_operations fat_file_operations = { .llseek = generic_file_llseek, - .read = new_sync_read, - .write = new_sync_write, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 497c7c5263c7..8521207de229 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -19,7 +19,6 @@ #include <linux/mpage.h> #include <linux/buffer_head.h> #include <linux/mount.h> -#include <linux/aio.h> #include <linux/vfs.h> #include <linux/parser.h> #include <linux/uio.h> diff --git a/fs/file_table.c b/fs/file_table.c index 3f85411b03ce..294174dcc226 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -168,10 +168,10 @@ struct file *alloc_file(struct path *path, fmode_t mode, file->f_inode = path->dentry->d_inode; file->f_mapping = path->dentry->d_inode->i_mapping; if ((mode & FMODE_READ) && - likely(fop->read || fop->aio_read || fop->read_iter)) + likely(fop->read || fop->read_iter)) mode |= FMODE_CAN_READ; if ((mode & FMODE_WRITE) && - likely(fop->write || fop->aio_write || fop->write_iter)) + likely(fop->write || fop->write_iter)) mode |= FMODE_CAN_WRITE; file->f_mode = mode; file->f_op = fop; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e907052eeadb..32a8bbd7a9ad 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -53,6 +53,18 @@ struct wb_writeback_work { struct completion *done; /* set if the caller waits */ }; +/* + * If an inode is constantly having its pages dirtied, but then the + * updates stop dirtytime_expire_interval seconds in the past, it's + * possible for the worst case time between when an inode has its + * timestamps updated and when they finally get written out to be two + * dirtytime_expire_intervals. We set the default to 12 hours (in + * seconds), which means most of the time inodes will have their + * timestamps written to disk after 12 hours, but in the worst case a + * few inodes might not their timestamps updated for 24 hours. + */ +unsigned int dirtytime_expire_interval = 12 * 60 * 60; + /** * writeback_in_progress - determine whether there is writeback in progress * @bdi: the device's backing_dev_info structure. @@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue, if ((flags & EXPIRE_DIRTY_ATIME) == 0) older_than_this = work->older_than_this; - else if ((work->reason == WB_REASON_SYNC) == 0) { - expire_time = jiffies - (HZ * 86400); + else if (!work->for_sync) { + expire_time = jiffies - (dirtytime_expire_interval * HZ); older_than_this = &expire_time; } while (!list_empty(delaying_queue)) { @@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, */ redirty_tail(inode, wb); } else if (inode->i_state & I_DIRTY_TIME) { + inode->dirtied_when = jiffies; list_move(&inode->i_wb_list, &wb->b_dirty_time); } else { /* The inode is clean. Remove from writeback lists. */ @@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) spin_lock(&inode->i_lock); dirty = inode->i_state & I_DIRTY; - if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) && - (inode->i_state & I_DIRTY_TIME)) || - (inode->i_state & I_DIRTY_TIME_EXPIRED)) { - dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; - trace_writeback_lazytime(inode); - } + if (inode->i_state & I_DIRTY_TIME) { + if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || + unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) || + unlikely(time_after(jiffies, + (inode->dirtied_time_when + + dirtytime_expire_interval * HZ)))) { + dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; + trace_writeback_lazytime(inode); + } + } else + inode->i_state &= ~I_DIRTY_TIME_EXPIRED; inode->i_state &= ~dirty; /* @@ -1131,6 +1149,56 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason) rcu_read_unlock(); } +/* + * Wake up bdi's periodically to make sure dirtytime inodes gets + * written back periodically. We deliberately do *not* check the + * b_dirtytime list in wb_has_dirty_io(), since this would cause the + * kernel to be constantly waking up once there are any dirtytime + * inodes on the system. So instead we define a separate delayed work + * function which gets called much more rarely. (By default, only + * once every 12 hours.) + * + * If there is any other write activity going on in the file system, + * this function won't be necessary. But if the only thing that has + * happened on the file system is a dirtytime inode caused by an atime + * update, we need this infrastructure below to make sure that inode + * eventually gets pushed out to disk. + */ +static void wakeup_dirtytime_writeback(struct work_struct *w); +static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback); + +static void wakeup_dirtytime_writeback(struct work_struct *w) +{ + struct backing_dev_info *bdi; + + rcu_read_lock(); + list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { + if (list_empty(&bdi->wb.b_dirty_time)) + continue; + bdi_wakeup_thread(bdi); + } + rcu_read_unlock(); + schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); +} + +static int __init start_dirtytime_writeback(void) +{ + schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); + return 0; +} +__initcall(start_dirtytime_writeback); + +int dirtytime_interval_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (ret == 0 && write) + mod_delayed_work(system_wq, &dirtytime_work, 0); + return ret; +} + static noinline void block_dump___mark_inode_dirty(struct inode *inode) { if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { @@ -1269,8 +1337,13 @@ void __mark_inode_dirty(struct inode *inode, int flags) } inode->dirtied_when = jiffies; - list_move(&inode->i_wb_list, dirtytime ? - &bdi->wb.b_dirty_time : &bdi->wb.b_dirty); + if (dirtytime) + inode->dirtied_time_when = jiffies; + if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES)) + list_move(&inode->i_wb_list, &bdi->wb.b_dirty); + else + list_move(&inode->i_wb_list, + &bdi->wb.b_dirty_time); spin_unlock(&bdi->wb.list_lock); trace_writeback_dirty_inode_enqueue(inode); diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 28d0c7abba1c..e5bbf748b698 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -38,7 +38,6 @@ #include <linux/device.h> #include <linux/file.h> #include <linux/fs.h> -#include <linux/aio.h> #include <linux/kdev_t.h> #include <linux/kthread.h> #include <linux/list.h> @@ -48,6 +47,7 @@ #include <linux/slab.h> #include <linux/stat.h> #include <linux/module.h> +#include <linux/uio.h> #include "fuse_i.h" @@ -88,32 +88,23 @@ static struct list_head *cuse_conntbl_head(dev_t devt) * FUSE file. */ -static ssize_t cuse_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) +static ssize_t cuse_read_iter(struct kiocb *kiocb, struct iov_iter *to) { + struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp }; loff_t pos = 0; - struct iovec iov = { .iov_base = buf, .iov_len = count }; - struct fuse_io_priv io = { .async = 0, .file = file }; - struct iov_iter ii; - iov_iter_init(&ii, READ, &iov, 1, count); - return fuse_direct_io(&io, &ii, &pos, FUSE_DIO_CUSE); + return fuse_direct_io(&io, to, &pos, FUSE_DIO_CUSE); } -static ssize_t cuse_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t cuse_write_iter(struct kiocb *kiocb, struct iov_iter *from) { + struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp }; loff_t pos = 0; - struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; - struct fuse_io_priv io = { .async = 0, .file = file }; - struct iov_iter ii; - iov_iter_init(&ii, WRITE, &iov, 1, count); - /* * No locking or generic_write_checks(), the server is * responsible for locking and sanity checks. */ - return fuse_direct_io(&io, &ii, &pos, + return fuse_direct_io(&io, from, &pos, FUSE_DIO_WRITE | FUSE_DIO_CUSE); } @@ -186,8 +177,8 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd, static const struct file_operations cuse_frontend_fops = { .owner = THIS_MODULE, - .read = cuse_read, - .write = cuse_write, + .read_iter = cuse_read_iter, + .write_iter = cuse_write_iter, .open = cuse_open, .release = cuse_release, .unlocked_ioctl = cuse_file_ioctl, diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ed19a7d622fa..c8b68ab2e574 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -19,7 +19,6 @@ #include <linux/pipe_fs_i.h> #include <linux/swap.h> #include <linux/splice.h> -#include <linux/aio.h> MODULE_ALIAS_MISCDEV(FUSE_MINOR); MODULE_ALIAS("devname:fuse"); @@ -711,28 +710,26 @@ struct fuse_copy_state { struct fuse_conn *fc; int write; struct fuse_req *req; - const struct iovec *iov; + struct iov_iter *iter; struct pipe_buffer *pipebufs; struct pipe_buffer *currbuf; struct pipe_inode_info *pipe; unsigned long nr_segs; - unsigned long seglen; - unsigned long addr; struct page *pg; unsigned len; unsigned offset; unsigned move_pages:1; }; -static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc, +static void fuse_copy_init(struct fuse_copy_state *cs, + struct fuse_conn *fc, int write, - const struct iovec *iov, unsigned long nr_segs) + struct iov_iter *iter) { memset(cs, 0, sizeof(*cs)); cs->fc = fc; cs->write = write; - cs->iov = iov; - cs->nr_segs = nr_segs; + cs->iter = iter; } /* Unmap and put previous page of userspace buffer */ @@ -800,22 +797,16 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) cs->nr_segs++; } } else { - if (!cs->seglen) { - BUG_ON(!cs->nr_segs); - cs->seglen = cs->iov[0].iov_len; - cs->addr = (unsigned long) cs->iov[0].iov_base; - cs->iov++; - cs->nr_segs--; - } - err = get_user_pages_fast(cs->addr, 1, cs->write, &page); + size_t off; + err = iov_iter_get_pages(cs->iter, &page, PAGE_SIZE, 1, &off); if (err < 0) return err; - BUG_ON(err != 1); + BUG_ON(!err); + cs->len = err; + cs->offset = off; cs->pg = page; - cs->offset = cs->addr % PAGE_SIZE; - cs->len = min(PAGE_SIZE - cs->offset, cs->seglen); - cs->seglen -= cs->len; - cs->addr += cs->len; + cs->offset = off; + iov_iter_advance(cs->iter, err); } return lock_request(cs->fc, cs->req); @@ -890,8 +881,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) newpage = buf->page; - if (WARN_ON(!PageUptodate(newpage))) - return -EIO; + if (!PageUptodate(newpage)) + SetPageUptodate(newpage); ClearPageMappedToDisk(newpage); @@ -1353,8 +1344,18 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, return err; } -static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static int fuse_dev_open(struct inode *inode, struct file *file) +{ + /* + * The fuse device's file's private_data is used to hold + * the fuse_conn(ection) when it is mounted, and is used to + * keep track of whether the file has been mounted already. + */ + file->private_data = NULL; + return 0; +} + +static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to) { struct fuse_copy_state cs; struct file *file = iocb->ki_filp; @@ -1362,9 +1363,12 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, if (!fc) return -EPERM; - fuse_copy_init(&cs, fc, 1, iov, nr_segs); + if (!iter_is_iovec(to)) + return -EINVAL; + + fuse_copy_init(&cs, fc, 1, to); - return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs)); + return fuse_dev_do_read(fc, file, &cs, iov_iter_count(to)); } static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, @@ -1384,7 +1388,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, if (!bufs) return -ENOMEM; - fuse_copy_init(&cs, fc, 1, NULL, 0); + fuse_copy_init(&cs, fc, 1, NULL); cs.pipebufs = bufs; cs.pipe = pipe; ret = fuse_dev_do_read(fc, in, &cs, len); @@ -1797,6 +1801,9 @@ copy_finish: static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, unsigned int size, struct fuse_copy_state *cs) { + /* Don't try to move pages (yet) */ + cs->move_pages = 0; + switch (code) { case FUSE_NOTIFY_POLL: return fuse_notify_poll(fc, size, cs); @@ -1957,17 +1964,19 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, return err; } -static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from) { struct fuse_copy_state cs; struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp); if (!fc) return -EPERM; - fuse_copy_init(&cs, fc, 0, iov, nr_segs); + if (!iter_is_iovec(from)) + return -EINVAL; + + fuse_copy_init(&cs, fc, 0, from); - return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs)); + return fuse_dev_do_write(fc, &cs, iov_iter_count(from)); } static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, @@ -2030,8 +2039,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, } pipe_unlock(pipe); - fuse_copy_init(&cs, fc, 0, NULL, nbuf); + fuse_copy_init(&cs, fc, 0, NULL); cs.pipebufs = bufs; + cs.nr_segs = nbuf; cs.pipe = pipe; if (flags & SPLICE_F_MOVE) @@ -2217,12 +2227,11 @@ static int fuse_dev_fasync(int fd, struct file *file, int on) const struct file_operations fuse_dev_operations = { .owner = THIS_MODULE, + .open = fuse_dev_open, .llseek = no_llseek, - .read = do_sync_read, - .aio_read = fuse_dev_read, + .read_iter = fuse_dev_read, .splice_read = fuse_dev_splice_read, - .write = do_sync_write, - .aio_write = fuse_dev_write, + .write_iter = fuse_dev_write, .splice_write = fuse_dev_splice_write, .poll = fuse_dev_poll, .release = fuse_dev_release, diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c01ec3bdcfd8..e1afdd7abf90 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -15,8 +15,8 @@ #include <linux/module.h> #include <linux/compat.h> #include <linux/swap.h> -#include <linux/aio.h> #include <linux/falloc.h> +#include <linux/uio.h> static const struct file_operations fuse_direct_io_file_operations; @@ -528,6 +528,17 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) } } +static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io) +{ + if (io->err) + return io->err; + + if (io->bytes >= 0 && io->write) + return -EIO; + + return io->bytes < 0 ? io->size : io->bytes; +} + /** * In case of short read, the caller sets 'pos' to the position of * actual end of fuse request in IO request. Otherwise, if bytes_requested @@ -546,6 +557,7 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) */ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) { + bool is_sync = is_sync_kiocb(io->iocb); int left; spin_lock(&io->lock); @@ -555,30 +567,24 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) io->bytes = pos; left = --io->reqs; + if (!left && is_sync) + complete(io->done); spin_unlock(&io->lock); - if (!left) { - long res; + if (!left && !is_sync) { + ssize_t res = fuse_get_res_by_io(io); - if (io->err) - res = io->err; - else if (io->bytes >= 0 && io->write) - res = -EIO; - else { - res = io->bytes < 0 ? io->size : io->bytes; - - if (!is_sync_kiocb(io->iocb)) { - struct inode *inode = file_inode(io->iocb->ki_filp); - struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_inode *fi = get_fuse_inode(inode); + if (res >= 0) { + struct inode *inode = file_inode(io->iocb->ki_filp); + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); - spin_lock(&fc->lock); - fi->attr_version = ++fc->attr_version; - spin_unlock(&fc->lock); - } + spin_lock(&fc->lock); + fi->attr_version = ++fc->attr_version; + spin_unlock(&fc->lock); } - aio_complete(io->iocb, res, 0); + io->iocb->ki_complete(io->iocb, res, 0); kfree(io); } } @@ -1395,55 +1401,33 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io, return res; } -static ssize_t fuse_direct_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to) { - struct fuse_io_priv io = { .async = 0, .file = file }; - struct iovec iov = { .iov_base = buf, .iov_len = count }; - struct iov_iter ii; - iov_iter_init(&ii, READ, &iov, 1, count); - return __fuse_direct_read(&io, &ii, ppos); -} - -static ssize_t __fuse_direct_write(struct fuse_io_priv *io, - struct iov_iter *iter, - loff_t *ppos) -{ - struct file *file = io->file; - struct inode *inode = file_inode(file); - size_t count = iov_iter_count(iter); - ssize_t res; - - - res = generic_write_checks(file, ppos, &count, 0); - if (!res) { - iov_iter_truncate(iter, count); - res = fuse_direct_io(io, iter, ppos, FUSE_DIO_WRITE); - } - - fuse_invalidate_attr(inode); - - return res; + struct fuse_io_priv io = { .async = 0, .file = iocb->ki_filp }; + return __fuse_direct_read(&io, to, &iocb->ki_pos); } -static ssize_t fuse_direct_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) { - struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; + struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - ssize_t res; struct fuse_io_priv io = { .async = 0, .file = file }; - struct iov_iter ii; - iov_iter_init(&ii, WRITE, &iov, 1, count); + size_t count = iov_iter_count(from); + ssize_t res; if (is_bad_inode(inode)) return -EIO; /* Don't allow parallel writes to the same file */ mutex_lock(&inode->i_mutex); - res = __fuse_direct_write(&io, &ii, ppos); + res = generic_write_checks(file, &iocb->ki_pos, &count, 0); + if (!res) { + iov_iter_truncate(from, count); + res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); + } + fuse_invalidate_attr(inode); if (res > 0) - fuse_write_update_size(inode, *ppos); + fuse_write_update_size(inode, iocb->ki_pos); mutex_unlock(&inode->i_mutex); return res; @@ -2801,6 +2785,7 @@ static ssize_t fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { + DECLARE_COMPLETION_ONSTACK(wait); ssize_t ret = 0; struct file *file = iocb->ki_filp; struct fuse_file *ff = file->private_data; @@ -2852,10 +2837,20 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE) io->async = false; - if (rw == WRITE) - ret = __fuse_direct_write(io, iter, &pos); - else + if (io->async && is_sync_kiocb(iocb)) + io->done = &wait; + + if (rw == WRITE) { + ret = generic_write_checks(file, &pos, &count, 0); + if (!ret) { + iov_iter_truncate(iter, count); + ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE); + } + + fuse_invalidate_attr(inode); + } else { ret = __fuse_direct_read(io, iter, &pos); + } if (io->async) { fuse_aio_complete(io, ret < 0 ? ret : 0, -1); @@ -2864,11 +2859,12 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, if (!is_sync_kiocb(iocb)) return -EIOCBQUEUED; - ret = wait_on_sync_kiocb(iocb); - } else { - kfree(io); + wait_for_completion(&wait); + ret = fuse_get_res_by_io(io); } + kfree(io); + if (rw == WRITE) { if (ret > 0) fuse_write_update_size(inode, pos); @@ -2957,9 +2953,7 @@ out: static const struct file_operations fuse_file_operations = { .llseek = fuse_file_llseek, - .read = new_sync_read, .read_iter = fuse_file_read_iter, - .write = new_sync_write, .write_iter = fuse_file_write_iter, .mmap = fuse_file_mmap, .open = fuse_open, @@ -2977,8 +2971,8 @@ static const struct file_operations fuse_file_operations = { static const struct file_operations fuse_direct_io_file_operations = { .llseek = fuse_file_llseek, - .read = fuse_direct_read, - .write = fuse_direct_write, + .read_iter = fuse_direct_read_iter, + .write_iter = fuse_direct_write_iter, .mmap = fuse_direct_mmap, .open = fuse_open, .flush = fuse_flush, diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 1cdfb07c1376..7354dc142a50 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -263,6 +263,7 @@ struct fuse_io_priv { int err; struct kiocb *iocb; struct file *file; + struct completion *done; }; /** diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 7b3143064af1..1be3b061c05c 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -110,11 +110,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS); if (error) goto out; - - if (acl) - set_cached_acl(inode, type, acl); - else - forget_cached_acl(inode, type); + set_cached_acl(inode, type, acl); out: kfree(data); return error; diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 4ad4f94edebe..a6e6990aea39 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -20,7 +20,7 @@ #include <linux/swap.h> #include <linux/gfs2_ondisk.h> #include <linux/backing-dev.h> -#include <linux/aio.h> +#include <linux/uio.h> #include <trace/events/writeback.h> #include "gfs2.h" @@ -671,12 +671,12 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, if (alloc_required) { struct gfs2_alloc_parms ap = { .aflags = 0, }; - error = gfs2_quota_lock_check(ip); + requested = data_blocks + ind_blocks; + ap.target = requested; + error = gfs2_quota_lock_check(ip, &ap); if (error) goto out_unlock; - requested = data_blocks + ind_blocks; - ap.target = requested; error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_qunlock; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index f0b945ab853e..61296ecbd0e2 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1224,7 +1224,7 @@ static int do_grow(struct inode *inode, u64 size) if (gfs2_is_stuffed(ip) && (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { - error = gfs2_quota_lock_check(ip); + error = gfs2_quota_lock_check(ip, &ap); if (error) return error; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 3e32bb8e2d7e..207eb4a8135e 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -25,7 +25,6 @@ #include <asm/uaccess.h> #include <linux/dlm.h> #include <linux/dlm_plock.h> -#include <linux/aio.h> #include <linux/delay.h> #include "gfs2.h" @@ -429,11 +428,11 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret) goto out_unlock; - ret = gfs2_quota_lock_check(ip); - if (ret) - goto out_unlock; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); ap.target = data_blocks + ind_blocks; + ret = gfs2_quota_lock_check(ip, &ap); + if (ret) + goto out_unlock; ret = gfs2_inplace_reserve(ip, &ap); if (ret) goto out_quota_unlock; @@ -765,22 +764,30 @@ out: brelse(dibh); return error; } - -static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, - unsigned int *data_blocks, unsigned int *ind_blocks) +/** + * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of + * blocks, determine how many bytes can be written. + * @ip: The inode in question. + * @len: Max cap of bytes. What we return in *len must be <= this. + * @data_blocks: Compute and return the number of data blocks needed + * @ind_blocks: Compute and return the number of indirect blocks needed + * @max_blocks: The total blocks available to work with. + * + * Returns: void, but @len, @data_blocks and @ind_blocks are filled in. + */ +static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len, + unsigned int *data_blocks, unsigned int *ind_blocks, + unsigned int max_blocks) { + loff_t max = *len; const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - unsigned int max_blocks = ip->i_rgd->rd_free_clone; unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); for (tmp = max_data; tmp > sdp->sd_diptrs;) { tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); max_data -= tmp; } - /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, - so it might end up with fewer data blocks */ - if (max_data <= *data_blocks) - return; + *data_blocks = max_data; *ind_blocks = max_blocks - max_data; *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; @@ -797,7 +804,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned int data_blocks = 0, ind_blocks = 0, rblocks; - loff_t bytes, max_bytes; + loff_t bytes, max_bytes, max_blks = UINT_MAX; int error; const loff_t pos = offset; const loff_t count = len; @@ -819,6 +826,9 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t gfs2_size_hint(file, offset, len); + gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks); + ap.min_target = data_blocks + ind_blocks; + while (len > 0) { if (len < bytes) bytes = len; @@ -827,27 +837,41 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t offset += bytes; continue; } - error = gfs2_quota_lock_check(ip); + + /* We need to determine how many bytes we can actually + * fallocate without exceeding quota or going over the + * end of the fs. We start off optimistically by assuming + * we can write max_bytes */ + max_bytes = (len > max_chunk_size) ? max_chunk_size : len; + + /* Since max_bytes is most likely a theoretical max, we + * calculate a more realistic 'bytes' to serve as a good + * starting point for the number of bytes we may be able + * to write */ + gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); + ap.target = data_blocks + ind_blocks; + + error = gfs2_quota_lock_check(ip, &ap); if (error) return error; -retry: - gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); + /* ap.allowed tells us how many blocks quota will allow + * us to write. Check if this reduces max_blks */ + if (ap.allowed && ap.allowed < max_blks) + max_blks = ap.allowed; - ap.target = data_blocks + ind_blocks; error = gfs2_inplace_reserve(ip, &ap); - if (error) { - if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { - bytes >>= 1; - bytes &= bsize_mask; - if (bytes == 0) - bytes = sdp->sd_sb.sb_bsize; - goto retry; - } + if (error) goto out_qunlock; - } - max_bytes = bytes; - calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len, - &max_bytes, &data_blocks, &ind_blocks); + + /* check if the selected rgrp limits our max_blks further */ + if (ap.allowed && ap.allowed < max_blks) + max_blks = ap.allowed; + + /* Almost done. Calculate bytes that can be written using + * max_blks. We also recompute max_bytes, data_blocks and + * ind_blocks */ + calc_max_reserv(ip, &max_bytes, &data_blocks, + &ind_blocks, max_blks); rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); @@ -931,6 +955,22 @@ out_uninit: return ret; } +static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, + size_t len, unsigned int flags) +{ + int error; + struct gfs2_inode *ip = GFS2_I(out->f_mapping->host); + + error = gfs2_rs_alloc(ip); + if (error) + return (ssize_t)error; + + gfs2_size_hint(out, *ppos, len); + + return iter_file_splice_write(pipe, out, ppos, len, flags); +} + #ifdef CONFIG_GFS2_FS_LOCKING_DLM /** @@ -1065,9 +1105,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) const struct file_operations gfs2_file_fops = { .llseek = gfs2_llseek, - .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = new_sync_write, .write_iter = gfs2_file_write_iter, .unlocked_ioctl = gfs2_ioctl, .mmap = gfs2_mmap, @@ -1077,7 +1115,7 @@ const struct file_operations gfs2_file_fops = { .lock = gfs2_lock, .flock = gfs2_flock, .splice_read = generic_file_splice_read, - .splice_write = iter_file_splice_write, + .splice_write = gfs2_file_splice_write, .setlease = simple_nosetlease, .fallocate = gfs2_fallocate, }; @@ -1097,9 +1135,7 @@ const struct file_operations gfs2_dir_fops = { const struct file_operations gfs2_file_fops_nolock = { .llseek = gfs2_llseek, - .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = new_sync_write, .write_iter = gfs2_file_write_iter, .unlocked_ioctl = gfs2_ioctl, .mmap = gfs2_mmap, @@ -1107,7 +1143,7 @@ const struct file_operations gfs2_file_fops_nolock = { .release = gfs2_release, .fsync = gfs2_fsync, .splice_read = generic_file_splice_read, - .splice_write = iter_file_splice_write, + .splice_write = gfs2_file_splice_write, .setlease = generic_setlease, .fallocate = gfs2_fallocate, }; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f42dffba056a..0fa8062f85a7 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2047,34 +2047,41 @@ static const struct file_operations gfs2_sbstats_fops = { int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) { - sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); - if (!sdp->debugfs_dir) - return -ENOMEM; - sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", - S_IFREG | S_IRUGO, - sdp->debugfs_dir, sdp, - &gfs2_glocks_fops); - if (!sdp->debugfs_dentry_glocks) + struct dentry *dent; + + dent = debugfs_create_dir(sdp->sd_table_name, gfs2_root); + if (IS_ERR_OR_NULL(dent)) + goto fail; + sdp->debugfs_dir = dent; + + dent = debugfs_create_file("glocks", + S_IFREG | S_IRUGO, + sdp->debugfs_dir, sdp, + &gfs2_glocks_fops); + if (IS_ERR_OR_NULL(dent)) goto fail; + sdp->debugfs_dentry_glocks = dent; - sdp->debugfs_dentry_glstats = debugfs_create_file("glstats", - S_IFREG | S_IRUGO, - sdp->debugfs_dir, sdp, - &gfs2_glstats_fops); - if (!sdp->debugfs_dentry_glstats) + dent = debugfs_create_file("glstats", + S_IFREG | S_IRUGO, + sdp->debugfs_dir, sdp, + &gfs2_glstats_fops); + if (IS_ERR_OR_NULL(dent)) goto fail; + sdp->debugfs_dentry_glstats = dent; - sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats", - S_IFREG | S_IRUGO, - sdp->debugfs_dir, sdp, - &gfs2_sbstats_fops); - if (!sdp->debugfs_dentry_sbstats) + dent = debugfs_create_file("sbstats", + S_IFREG | S_IRUGO, + sdp->debugfs_dir, sdp, + &gfs2_sbstats_fops); + if (IS_ERR_OR_NULL(dent)) goto fail; + sdp->debugfs_dentry_sbstats = dent; return 0; fail: gfs2_delete_debugfs_file(sdp); - return -ENOMEM; + return dent ? PTR_ERR(dent) : -ENOMEM; } void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) @@ -2100,6 +2107,8 @@ void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) int gfs2_register_debugfs(void) { gfs2_root = debugfs_create_dir("gfs2", NULL); + if (IS_ERR(gfs2_root)) + return PTR_ERR(gfs2_root); return gfs2_root ? 0 : -ENOMEM; } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 7a2dbbc0d634..58b75abf6ab2 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -301,8 +301,10 @@ struct gfs2_blkreserv { * to the allocation code. */ struct gfs2_alloc_parms { - u32 target; + u64 target; + u32 min_target; u32 aflags; + u64 allowed; }; enum { diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 73c72253faac..08bc84d7e768 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -382,7 +382,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks) struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, }; int error; - error = gfs2_quota_lock_check(ip); + error = gfs2_quota_lock_check(ip, &ap); if (error) goto out; @@ -525,7 +525,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, int error; if (da->nr_blocks) { - error = gfs2_quota_lock_check(dip); + error = gfs2_quota_lock_check(dip, &ap); if (error) goto fail_quota_locks; @@ -953,7 +953,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (da.nr_blocks) { struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; - error = gfs2_quota_lock_check(dip); + error = gfs2_quota_lock_check(dip, &ap); if (error) goto out_gunlock; @@ -1470,7 +1470,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (da.nr_blocks) { struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; - error = gfs2_quota_lock_check(ndip); + error = gfs2_quota_lock_check(ndip, &ap); if (error) goto out_gunlock; @@ -1669,6 +1669,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) kuid_t ouid, nuid; kgid_t ogid, ngid; int error; + struct gfs2_alloc_parms ap; ouid = inode->i_uid; ogid = inode->i_gid; @@ -1696,9 +1697,11 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (error) goto out; + ap.target = gfs2_get_inode_blocks(&ip->i_inode); + if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { - error = gfs2_quota_check(ip, nuid, ngid); + error = gfs2_quota_check(ip, nuid, ngid, &ap); if (error) goto out_gunlock_q; } @@ -1713,9 +1716,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { - u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); - gfs2_quota_change(ip, -blocks, ouid, ogid); - gfs2_quota_change(ip, blocks, nuid, ngid); + gfs2_quota_change(ip, -ap.target, ouid, ogid); + gfs2_quota_change(ip, ap.target, nuid, ngid); } out_end_trans: diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index fa54cbf4c866..e3065cb9ab08 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -923,6 +923,9 @@ restart: if (error) return error; + if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) + force_refresh = FORCE; + qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { @@ -974,11 +977,8 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) sizeof(struct gfs2_quota_data *), sort_qd, NULL); for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { - int force = NO_FORCE; qd = ip->i_res->rs_qa_qd[x]; - if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) - force = FORCE; - error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]); + error = do_glock(qd, NO_FORCE, &ip->i_res->rs_qa_qd_ghs[x]); if (error) break; } @@ -1094,14 +1094,33 @@ static int print_message(struct gfs2_quota_data *qd, char *type) return 0; } -int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) +/** + * gfs2_quota_check - check if allocating new blocks will exceed quota + * @ip: The inode for which this check is being performed + * @uid: The uid to check against + * @gid: The gid to check against + * @ap: The allocation parameters. ap->target contains the requested + * blocks. ap->min_target, if set, contains the minimum blks + * requested. + * + * Returns: 0 on success. + * min_req = ap->min_target ? ap->min_target : ap->target; + * quota must allow atleast min_req blks for success and + * ap->allowed is set to the number of blocks allowed + * + * -EDQUOT otherwise, quota violation. ap->allowed is set to number + * of blocks available. + */ +int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, + struct gfs2_alloc_parms *ap) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_quota_data *qd; - s64 value; + s64 value, warn, limit; unsigned int x; int error = 0; + ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */ if (!test_bit(GIF_QD_LOCKED, &ip->i_flags)) return 0; @@ -1115,30 +1134,37 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) qid_eq(qd->qd_id, make_kqid_gid(gid)))) continue; + warn = (s64)be64_to_cpu(qd->qd_qb.qb_warn); + limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit); value = (s64)be64_to_cpu(qd->qd_qb.qb_value); spin_lock(&qd_lock); value += qd->qd_change; spin_unlock(&qd_lock); - if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { - print_message(qd, "exceeded"); - quota_send_warning(qd->qd_id, - sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); - - error = -EDQUOT; - break; - } else if (be64_to_cpu(qd->qd_qb.qb_warn) && - (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value && + if (limit > 0 && (limit - value) < ap->allowed) + ap->allowed = limit - value; + /* If we can't meet the target */ + if (limit && limit < (value + (s64)ap->target)) { + /* If no min_target specified or we don't meet + * min_target, return -EDQUOT */ + if (!ap->min_target || ap->min_target > ap->allowed) { + print_message(qd, "exceeded"); + quota_send_warning(qd->qd_id, + sdp->sd_vfs->s_dev, + QUOTA_NL_BHARDWARN); + error = -EDQUOT; + break; + } + } else if (warn && warn < value && time_after_eq(jiffies, qd->qd_last_warn + - gfs2_tune_get(sdp, - gt_quota_warn_period) * HZ)) { + gfs2_tune_get(sdp, gt_quota_warn_period) + * HZ)) { quota_send_warning(qd->qd_id, sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); error = print_message(qd, "warning"); qd->qd_last_warn = jiffies; } } - return error; } diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 55d506eb3c4a..ad04b3acae2b 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -24,7 +24,8 @@ extern void gfs2_quota_unhold(struct gfs2_inode *ip); extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); extern void gfs2_quota_unlock(struct gfs2_inode *ip); -extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); +extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, + struct gfs2_alloc_parms *ap); extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, kuid_t uid, kgid_t gid); @@ -37,7 +38,8 @@ extern int gfs2_quotad(void *data); extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp); -static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) +static inline int gfs2_quota_lock_check(struct gfs2_inode *ip, + struct gfs2_alloc_parms *ap) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); int ret; @@ -48,7 +50,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) return ret; if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) return 0; - ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); + ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap); if (ret) gfs2_quota_unlock(ip); return ret; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 9150207f365c..6af2396a317c 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1946,10 +1946,18 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd) * @ip: the inode to reserve space for * @ap: the allocation parameters * - * Returns: errno + * We try our best to find an rgrp that has at least ap->target blocks + * available. After a couple of passes (loops == 2), the prospects of finding + * such an rgrp diminish. At this stage, we return the first rgrp that has + * atleast ap->min_target blocks available. Either way, we set ap->allowed to + * the number of blocks available in the chosen rgrp. + * + * Returns: 0 on success, + * -ENOMEM if a suitable rgrp can't be found + * errno otherwise */ -int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap) +int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *begin = NULL; @@ -2012,7 +2020,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a /* Skip unuseable resource groups */ if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) || - (ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) + (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) @@ -2027,11 +2035,13 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a goto check_rgrp; /* If rgrp has enough free space, use it */ - if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) { + if (rs->rs_rbm.rgd->rd_free_clone >= ap->target || + (loops == 2 && ap->min_target && + rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) { ip->i_rgd = rs->rs_rbm.rgd; + ap->allowed = ip->i_rgd->rd_free_clone; return 0; } - check_rgrp: /* Check for unlinked inodes which can be reclaimed */ if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index b104f4af3afd..68972ecfbb01 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -41,7 +41,8 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); #define GFS2_AF_ORLOV 1 -extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap); +extern int gfs2_inplace_reserve(struct gfs2_inode *ip, + struct gfs2_alloc_parms *ap); extern void gfs2_inplace_release(struct gfs2_inode *ip); extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 0b81f783f787..fd260ce8869a 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -732,7 +732,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, if (error) return error; - error = gfs2_quota_lock_check(ip); + error = gfs2_quota_lock_check(ip, &ap); if (error) return error; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index d0929bc81782..9337065bcc67 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -14,7 +14,7 @@ #include <linux/pagemap.h> #include <linux/mpage.h> #include <linux/sched.h> -#include <linux/aio.h> +#include <linux/uio.h> #include "hfs_fs.h" #include "btree.h" @@ -674,9 +674,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end, static const struct file_operations hfs_file_operations = { .llseek = generic_file_llseek, - .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = new_sync_write, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index 6e560d56094b..754fdf8c6356 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c @@ -131,13 +131,16 @@ skip: hfs_bnode_write(node, entry, data_off + key_len, entry_len); hfs_bnode_dump(node); - if (new_node) { - /* update parent key if we inserted a key - * at the start of the first node - */ - if (!rec && new_node != node) - hfs_brec_update_parent(fd); + /* + * update parent key if we inserted a key + * at the start of the node and it is not the new node + */ + if (!rec && new_node != node) { + hfs_bnode_read_key(node, fd->search_key, data_off + size); + hfs_brec_update_parent(fd); + } + if (new_node) { hfs_bnode_put(fd->bnode); if (!new_node->parent) { hfs_btree_inc_height(tree); @@ -168,9 +171,6 @@ skip: goto again; } - if (!rec) - hfs_brec_update_parent(fd); - return 0; } @@ -370,6 +370,8 @@ again: if (IS_ERR(parent)) return PTR_ERR(parent); __hfs_brec_find(parent, fd, hfs_find_rec_by_key); + if (fd->record < 0) + return -ENOENT; hfs_bnode_dump(parent); rec = fd->record; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 0cf786f2d046..5f86cadb0542 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -14,7 +14,7 @@ #include <linux/pagemap.h> #include <linux/mpage.h> #include <linux/sched.h> -#include <linux/aio.h> +#include <linux/uio.h> #include "hfsplus_fs.h" #include "hfsplus_raw.h" @@ -341,9 +341,7 @@ static const struct inode_operations hfsplus_file_inode_operations = { static const struct file_operations hfsplus_file_operations = { .llseek = generic_file_llseek, - .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = new_sync_write, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 4fcd40d6f308..91e19f9dffe5 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -66,7 +66,8 @@ extern int stat_file(const char *path, struct hostfs_stat *p, int fd); extern int access_file(char *path, int r, int w, int x); extern int open_file(char *path, int r, int w, int append); extern void *open_dir(char *path, int *err_out); -extern char *read_dir(void *stream, unsigned long long *pos, +extern void seek_dir(void *stream, unsigned long long pos); +extern char *read_dir(void *stream, unsigned long long *pos_out, unsigned long long *ino_out, int *len_out, unsigned int *type_out); extern void close_file(void *stream); @@ -77,8 +78,7 @@ extern int write_file(int fd, unsigned long long *offset, const char *buf, int len); extern int lseek_file(int fd, long long offset, int whence); extern int fsync_file(int fd, int datasync); -extern int file_create(char *name, int ur, int uw, int ux, int gr, - int gw, int gx, int or, int ow, int ox); +extern int file_create(char *name, int mode); extern int set_attr(const char *file, struct hostfs_iattr *attrs, int fd); extern int make_symlink(const char *from, const char *to); extern int unlink_file(const char *file); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index fd62cae0fdcb..b83a0343378b 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -24,6 +24,7 @@ struct hostfs_inode_info { int fd; fmode_t mode; struct inode vfs_inode; + struct mutex open_mutex; }; static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) @@ -92,16 +93,22 @@ static char *__dentry_name(struct dentry *dentry, char *name) __putname(name); return NULL; } + + /* + * This function relies on the fact that dentry_path_raw() will place + * the path name at the end of the provided buffer. + */ + BUG_ON(p + strlen(p) + 1 != name + PATH_MAX); + strlcpy(name, root, PATH_MAX); if (len > p - name) { __putname(name); return NULL; } - if (p > name + len) { - char *s = name + len; - while ((*s++ = *p++) != '\0') - ; - } + + if (p > name + len) + strcpy(name + len, p); + return name; } @@ -135,21 +142,19 @@ static char *follow_link(char *link) int len, n; char *name, *resolved, *end; - len = 64; - while (1) { + name = __getname(); + if (!name) { n = -ENOMEM; - name = kmalloc(len, GFP_KERNEL); - if (name == NULL) - goto out; - - n = hostfs_do_readlink(link, name, len); - if (n < len) - break; - len *= 2; - kfree(name); + goto out_free; } + + n = hostfs_do_readlink(link, name, PATH_MAX); if (n < 0) goto out_free; + else if (n == PATH_MAX) { + n = -E2BIG; + goto out_free; + } if (*name == '/') return name; @@ -168,13 +173,12 @@ static char *follow_link(char *link) } sprintf(resolved, "%s%s", link, name); - kfree(name); + __putname(name); kfree(link); return resolved; out_free: - kfree(name); - out: + __putname(name); return ERR_PTR(n); } @@ -225,6 +229,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb) hi->fd = -1; hi->mode = 0; inode_init_once(&hi->vfs_inode); + mutex_init(&hi->open_mutex); return &hi->vfs_inode; } @@ -257,6 +262,9 @@ static int hostfs_show_options(struct seq_file *seq, struct dentry *root) if (strlen(root_path) > offset) seq_printf(seq, ",%s", root_path + offset); + if (append) + seq_puts(seq, ",append"); + return 0; } @@ -284,6 +292,7 @@ static int hostfs_readdir(struct file *file, struct dir_context *ctx) if (dir == NULL) return -error; next = ctx->pos; + seek_dir(dir, next); while ((name = read_dir(dir, &next, &ino, &len, &type)) != NULL) { if (!dir_emit(ctx, name, len, ino, type)) break; @@ -293,13 +302,12 @@ static int hostfs_readdir(struct file *file, struct dir_context *ctx) return 0; } -static int hostfs_file_open(struct inode *ino, struct file *file) +static int hostfs_open(struct inode *ino, struct file *file) { - static DEFINE_MUTEX(open_mutex); char *name; - fmode_t mode = 0; + fmode_t mode; int err; - int r = 0, w = 0, fd; + int r, w, fd; mode = file->f_mode & (FMODE_READ | FMODE_WRITE); if ((mode & HOSTFS_I(ino)->mode) == mode) @@ -308,12 +316,12 @@ static int hostfs_file_open(struct inode *ino, struct file *file) mode |= HOSTFS_I(ino)->mode; retry: + r = w = 0; + if (mode & FMODE_READ) r = 1; if (mode & FMODE_WRITE) - w = 1; - if (w) - r = 1; + r = w = 1; name = dentry_name(file->f_path.dentry); if (name == NULL) @@ -324,15 +332,16 @@ retry: if (fd < 0) return fd; - mutex_lock(&open_mutex); + mutex_lock(&HOSTFS_I(ino)->open_mutex); /* somebody else had handled it first? */ if ((mode & HOSTFS_I(ino)->mode) == mode) { - mutex_unlock(&open_mutex); + mutex_unlock(&HOSTFS_I(ino)->open_mutex); + close_file(&fd); return 0; } if ((mode | HOSTFS_I(ino)->mode) != mode) { mode |= HOSTFS_I(ino)->mode; - mutex_unlock(&open_mutex); + mutex_unlock(&HOSTFS_I(ino)->open_mutex); close_file(&fd); goto retry; } @@ -342,12 +351,12 @@ retry: err = replace_file(fd, HOSTFS_I(ino)->fd); close_file(&fd); if (err < 0) { - mutex_unlock(&open_mutex); + mutex_unlock(&HOSTFS_I(ino)->open_mutex); return err; } } HOSTFS_I(ino)->mode = mode; - mutex_unlock(&open_mutex); + mutex_unlock(&HOSTFS_I(ino)->open_mutex); return 0; } @@ -378,13 +387,11 @@ static int hostfs_fsync(struct file *file, loff_t start, loff_t end, static const struct file_operations hostfs_file_fops = { .llseek = generic_file_llseek, - .read = new_sync_read, .splice_read = generic_file_splice_read, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, - .write = new_sync_write, .mmap = generic_file_mmap, - .open = hostfs_file_open, + .open = hostfs_open, .release = hostfs_file_release, .fsync = hostfs_fsync, }; @@ -393,6 +400,8 @@ static const struct file_operations hostfs_dir_fops = { .llseek = generic_file_llseek, .iterate = hostfs_readdir, .read = generic_read_dir, + .open = hostfs_open, + .fsync = hostfs_fsync, }; static int hostfs_writepage(struct page *page, struct writeback_control *wbc) @@ -400,7 +409,7 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc) struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; char *buffer; - unsigned long long base; + loff_t base = page_offset(page); int count = PAGE_CACHE_SIZE; int end_index = inode->i_size >> PAGE_CACHE_SHIFT; int err; @@ -409,7 +418,6 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc) count = inode->i_size & (PAGE_CACHE_SIZE-1); buffer = kmap(page); - base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT; err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count); if (err != count) { @@ -434,26 +442,29 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc) static int hostfs_readpage(struct file *file, struct page *page) { char *buffer; - long long start; - int err = 0; + loff_t start = page_offset(page); + int bytes_read, ret = 0; - start = (long long) page->index << PAGE_CACHE_SHIFT; buffer = kmap(page); - err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer, + bytes_read = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer, PAGE_CACHE_SIZE); - if (err < 0) + if (bytes_read < 0) { + ClearPageUptodate(page); + SetPageError(page); + ret = bytes_read; goto out; + } - memset(&buffer[err], 0, PAGE_CACHE_SIZE - err); + memset(buffer + bytes_read, 0, PAGE_CACHE_SIZE - bytes_read); - flush_dcache_page(page); + ClearPageError(page); SetPageUptodate(page); - if (PageError(page)) ClearPageError(page); - err = 0; + out: + flush_dcache_page(page); kunmap(page); unlock_page(page); - return err; + return ret; } static int hostfs_write_begin(struct file *file, struct address_space *mapping, @@ -530,11 +541,13 @@ static int read_name(struct inode *ino, char *name) init_special_inode(ino, st.mode & S_IFMT, rdev); ino->i_op = &hostfs_iops; break; - - default: + case S_IFREG: ino->i_op = &hostfs_iops; ino->i_fop = &hostfs_file_fops; ino->i_mapping->a_ops = &hostfs_aops; + break; + default: + return -EIO; } ino->i_ino = st.ino; @@ -568,10 +581,7 @@ static int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (name == NULL) goto out_put; - fd = file_create(name, - mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR, - mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP, - mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH); + fd = file_create(name, mode & S_IFMT); if (fd < 0) error = fd; else diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index 9765dab95cbd..9c1e0f019880 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c @@ -97,21 +97,27 @@ void *open_dir(char *path, int *err_out) return dir; } -char *read_dir(void *stream, unsigned long long *pos, +void seek_dir(void *stream, unsigned long long pos) +{ + DIR *dir = stream; + + seekdir(dir, pos); +} + +char *read_dir(void *stream, unsigned long long *pos_out, unsigned long long *ino_out, int *len_out, unsigned int *type_out) { DIR *dir = stream; struct dirent *ent; - seekdir(dir, *pos); ent = readdir(dir); if (ent == NULL) return NULL; *len_out = strlen(ent->d_name); *ino_out = ent->d_ino; *type_out = ent->d_type; - *pos = telldir(dir); + *pos_out = ent->d_off; return ent->d_name; } @@ -175,21 +181,10 @@ void close_dir(void *stream) closedir(stream); } -int file_create(char *name, int ur, int uw, int ux, int gr, - int gw, int gx, int or, int ow, int ox) +int file_create(char *name, int mode) { - int mode, fd; - - mode = 0; - mode |= ur ? S_IRUSR : 0; - mode |= uw ? S_IWUSR : 0; - mode |= ux ? S_IXUSR : 0; - mode |= gr ? S_IRGRP : 0; - mode |= gw ? S_IWGRP : 0; - mode |= gx ? S_IXGRP : 0; - mode |= or ? S_IROTH : 0; - mode |= ow ? S_IWOTH : 0; - mode |= ox ? S_IXOTH : 0; + int fd; + fd = open64(name, O_CREAT | O_RDWR, mode); if (fd < 0) return -errno; diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 7f54e5f76cec..6d8cfe9b52d6 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -197,9 +197,7 @@ const struct address_space_operations hpfs_aops = { const struct file_operations hpfs_file_ops = { .llseek = generic_file_llseek, - .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = new_sync_write, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .release = hpfs_file_release, diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c274aca8e8dc..2640d88b0e63 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -34,6 +34,7 @@ #include <linux/security.h> #include <linux/magic.h> #include <linux/migrate.h> +#include <linux/uio.h> #include <asm/uaccess.h> @@ -47,9 +48,10 @@ struct hugetlbfs_config { kuid_t uid; kgid_t gid; umode_t mode; - long nr_blocks; + long max_hpages; long nr_inodes; struct hstate *hstate; + long min_hpages; }; struct hugetlbfs_inode_info { @@ -67,7 +69,7 @@ int sysctl_hugetlb_shm_group; enum { Opt_size, Opt_nr_inodes, Opt_mode, Opt_uid, Opt_gid, - Opt_pagesize, + Opt_pagesize, Opt_min_size, Opt_err, }; @@ -78,6 +80,7 @@ static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_pagesize, "pagesize=%s"}, + {Opt_min_size, "min_size=%s"}, {Opt_err, NULL}, }; @@ -179,42 +182,33 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, } #endif -static int +static size_t hugetlbfs_read_actor(struct page *page, unsigned long offset, - char __user *buf, unsigned long count, - unsigned long size) + struct iov_iter *to, unsigned long size) { - char *kaddr; - unsigned long left, copied = 0; + size_t copied = 0; int i, chunksize; - if (size > count) - size = count; - /* Find which 4k chunk and offset with in that chunk */ i = offset >> PAGE_CACHE_SHIFT; offset = offset & ~PAGE_CACHE_MASK; while (size) { + size_t n; chunksize = PAGE_CACHE_SIZE; if (offset) chunksize -= offset; if (chunksize > size) chunksize = size; - kaddr = kmap(&page[i]); - left = __copy_to_user(buf, kaddr + offset, chunksize); - kunmap(&page[i]); - if (left) { - copied += (chunksize - left); - break; - } + n = copy_page_to_iter(&page[i], offset, chunksize, to); + copied += n; + if (n != chunksize) + return copied; offset = 0; size -= chunksize; - buf += chunksize; - copied += chunksize; i++; } - return copied ? copied : -EFAULT; + return copied; } /* @@ -222,39 +216,34 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset, * data. Its *very* similar to do_generic_mapping_read(), we can't use that * since it has PAGE_CACHE_SIZE assumptions. */ -static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, - size_t len, loff_t *ppos) +static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) { - struct hstate *h = hstate_file(filp); - struct address_space *mapping = filp->f_mapping; + struct file *file = iocb->ki_filp; + struct hstate *h = hstate_file(file); + struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - unsigned long index = *ppos >> huge_page_shift(h); - unsigned long offset = *ppos & ~huge_page_mask(h); + unsigned long index = iocb->ki_pos >> huge_page_shift(h); + unsigned long offset = iocb->ki_pos & ~huge_page_mask(h); unsigned long end_index; loff_t isize; ssize_t retval = 0; - /* validate length */ - if (len == 0) - goto out; - - for (;;) { + while (iov_iter_count(to)) { struct page *page; - unsigned long nr, ret; - int ra; + size_t nr, copied; /* nr is the maximum number of bytes to copy from this page */ nr = huge_page_size(h); isize = i_size_read(inode); if (!isize) - goto out; + break; end_index = (isize - 1) >> huge_page_shift(h); - if (index >= end_index) { - if (index > end_index) - goto out; + if (index > end_index) + break; + if (index == end_index) { nr = ((isize - 1) & ~huge_page_mask(h)) + 1; if (nr <= offset) - goto out; + break; } nr = nr - offset; @@ -265,39 +254,27 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, * We have a HOLE, zero out the user-buffer for the * length of the hole or request. */ - ret = len < nr ? len : nr; - if (clear_user(buf, ret)) - ra = -EFAULT; - else - ra = 0; + copied = iov_iter_zero(nr, to); } else { unlock_page(page); /* * We have the page, copy it to user space buffer. */ - ra = hugetlbfs_read_actor(page, offset, buf, len, nr); - ret = ra; + copied = hugetlbfs_read_actor(page, offset, to, nr); page_cache_release(page); } - if (ra < 0) { - if (retval == 0) - retval = ra; - goto out; + offset += copied; + retval += copied; + if (copied != nr && iov_iter_count(to)) { + if (!retval) + retval = -EFAULT; + break; } - - offset += ret; - retval += ret; - len -= ret; index += offset >> huge_page_shift(h); offset &= ~huge_page_mask(h); - - /* short read or no more work */ - if ((ret != nr) || (len == 0)) - break; } -out: - *ppos = ((loff_t)index << huge_page_shift(h)) + offset; + iocb->ki_pos = ((loff_t)index << huge_page_shift(h)) + offset; return retval; } @@ -319,7 +296,7 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping, static void truncate_huge_page(struct page *page) { - cancel_dirty_page(page, /* No IO accounting for huge pages? */0); + ClearPageDirty(page); ClearPageUptodate(page); delete_from_page_cache(page); } @@ -721,7 +698,7 @@ static void init_once(void *foo) } const struct file_operations hugetlbfs_file_operations = { - .read = hugetlbfs_read, + .read_iter = hugetlbfs_read_iter, .mmap = hugetlbfs_file_mmap, .fsync = noop_fsync, .get_unmapped_area = hugetlb_get_unmapped_area, @@ -754,14 +731,38 @@ static const struct super_operations hugetlbfs_ops = { .show_options = generic_show_options, }; +enum { NO_SIZE, SIZE_STD, SIZE_PERCENT }; + +/* + * Convert size option passed from command line to number of huge pages + * in the pool specified by hstate. Size option could be in bytes + * (val_type == SIZE_STD) or percentage of the pool (val_type == SIZE_PERCENT). + */ +static long long +hugetlbfs_size_to_hpages(struct hstate *h, unsigned long long size_opt, + int val_type) +{ + if (val_type == NO_SIZE) + return -1; + + if (val_type == SIZE_PERCENT) { + size_opt <<= huge_page_shift(h); + size_opt *= h->max_huge_pages; + do_div(size_opt, 100); + } + + size_opt >>= huge_page_shift(h); + return size_opt; +} + static int hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) { char *p, *rest; substring_t args[MAX_OPT_ARGS]; int option; - unsigned long long size = 0; - enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE; + unsigned long long max_size_opt = 0, min_size_opt = 0; + int max_val_type = NO_SIZE, min_val_type = NO_SIZE; if (!options) return 0; @@ -799,10 +800,10 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) /* memparse() will accept a K/M/G without a digit */ if (!isdigit(*args[0].from)) goto bad_val; - size = memparse(args[0].from, &rest); - setsize = SIZE_STD; + max_size_opt = memparse(args[0].from, &rest); + max_val_type = SIZE_STD; if (*rest == '%') - setsize = SIZE_PERCENT; + max_val_type = SIZE_PERCENT; break; } @@ -825,6 +826,17 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) break; } + case Opt_min_size: { + /* memparse() will accept a K/M/G without a digit */ + if (!isdigit(*args[0].from)) + goto bad_val; + min_size_opt = memparse(args[0].from, &rest); + min_val_type = SIZE_STD; + if (*rest == '%') + min_val_type = SIZE_PERCENT; + break; + } + default: pr_err("Bad mount option: \"%s\"\n", p); return -EINVAL; @@ -832,15 +844,22 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) } } - /* Do size after hstate is set up */ - if (setsize > NO_SIZE) { - struct hstate *h = pconfig->hstate; - if (setsize == SIZE_PERCENT) { - size <<= huge_page_shift(h); - size *= h->max_huge_pages; - do_div(size, 100); - } - pconfig->nr_blocks = (size >> huge_page_shift(h)); + /* + * Use huge page pool size (in hstate) to convert the size + * options to number of huge pages. If NO_SIZE, -1 is returned. + */ + pconfig->max_hpages = hugetlbfs_size_to_hpages(pconfig->hstate, + max_size_opt, max_val_type); + pconfig->min_hpages = hugetlbfs_size_to_hpages(pconfig->hstate, + min_size_opt, min_val_type); + + /* + * If max_size was specified, then min_size must be smaller + */ + if (max_val_type > NO_SIZE && + pconfig->min_hpages > pconfig->max_hpages) { + pr_err("minimum size can not be greater than maximum size\n"); + return -EINVAL; } return 0; @@ -859,12 +878,13 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) save_mount_options(sb, data); - config.nr_blocks = -1; /* No limit on size by default */ + config.max_hpages = -1; /* No limit on size by default */ config.nr_inodes = -1; /* No limit on number of inodes by default */ config.uid = current_fsuid(); config.gid = current_fsgid(); config.mode = 0755; config.hstate = &default_hstate; + config.min_hpages = -1; /* No default minimum size */ ret = hugetlbfs_parse_options(data, &config); if (ret) return ret; @@ -878,8 +898,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) sbinfo->max_inodes = config.nr_inodes; sbinfo->free_inodes = config.nr_inodes; sbinfo->spool = NULL; - if (config.nr_blocks != -1) { - sbinfo->spool = hugepage_new_subpool(config.nr_blocks); + /* + * Allocate and initialize subpool if maximum or minimum size is + * specified. Any needed reservations (for minimim size) are taken + * taken when the subpool is created. + */ + if (config.max_hpages != -1 || config.min_hpages != -1) { + sbinfo->spool = hugepage_new_subpool(config.hstate, + config.max_hpages, + config.min_hpages); if (!sbinfo->spool) goto out_free; } diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 64989ca9ba90..f509f62e12f6 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -51,9 +51,7 @@ const struct file_operations jffs2_file_operations = { .llseek = generic_file_llseek, .open = generic_file_open, - .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = new_sync_write, .write_iter = generic_file_write_iter, .unlocked_ioctl=jffs2_ioctl, .mmap = generic_file_readonly_mmap, diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index d72817ac51f6..762c7a3cf43d 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -195,7 +195,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat /* unchecked xdatum is chained with c->xattr_unchecked */ list_del_init(&xd->xindex); - dbg_xattr("success on verfying xdatum (xid=%u, version=%u)\n", + dbg_xattr("success on verifying xdatum (xid=%u, version=%u)\n", xd->xid, xd->version); return 0; diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 10815f8dfd8b..ae46788b9723 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -151,8 +151,6 @@ const struct inode_operations jfs_file_inode_operations = { const struct file_operations jfs_file_operations = { .open = jfs_open, .llseek = generic_file_llseek, - .write = new_sync_write, - .read = new_sync_read, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index bd3df1ca3c9b..3197aed10614 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -22,8 +22,8 @@ #include <linux/buffer_head.h> #include <linux/pagemap.h> #include <linux/quotaops.h> +#include <linux/uio.h> #include <linux/writeback.h> -#include <linux/aio.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 49ba7ff1bbb9..16a0922beb59 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -183,30 +183,23 @@ static inline void remove_metapage(struct page *page, struct metapage *mp) #endif -static void init_once(void *foo) -{ - struct metapage *mp = (struct metapage *)foo; - - mp->lid = 0; - mp->lsn = 0; - mp->flag = 0; - mp->data = NULL; - mp->clsn = 0; - mp->log = NULL; - set_bit(META_free, &mp->flag); - init_waitqueue_head(&mp->wait); -} - static inline struct metapage *alloc_metapage(gfp_t gfp_mask) { - return mempool_alloc(metapage_mempool, gfp_mask); + struct metapage *mp = mempool_alloc(metapage_mempool, gfp_mask); + + if (mp) { + mp->lid = 0; + mp->lsn = 0; + mp->data = NULL; + mp->clsn = 0; + mp->log = NULL; + init_waitqueue_head(&mp->wait); + } + return mp; } static inline void free_metapage(struct metapage *mp) { - mp->flag = 0; - set_bit(META_free, &mp->flag); - mempool_free(mp, metapage_mempool); } @@ -216,7 +209,7 @@ int __init metapage_init(void) * Allocate the metapage structures */ metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage), - 0, 0, init_once); + 0, 0, NULL); if (metapage_cache == NULL) return -ENOMEM; diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h index a78beda85f68..337e9e51ac06 100644 --- a/fs/jfs/jfs_metapage.h +++ b/fs/jfs/jfs_metapage.h @@ -48,7 +48,6 @@ struct metapage { /* metapage flag */ #define META_locked 0 -#define META_free 1 #define META_dirty 2 #define META_sync 3 #define META_discard 4 diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 5d30c56ae075..4cd9798f4948 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -102,7 +102,7 @@ void jfs_error(struct super_block *sb, const char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; - pr_err("ERROR: (device %s): %pf: %pV\n", + pr_err("ERROR: (device %s): %ps: %pV\n", sb->s_id, __builtin_return_address(0), &vaf); va_end(args); diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index b684e8a132e6..2bacb9988566 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -207,6 +207,7 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, goto out_free; } + of->event = atomic_read(&of->kn->attr.open->event); ops = kernfs_ops(of->kn); if (ops->read) len = ops->read(of, buf, len, *ppos); diff --git a/fs/locks.c b/fs/locks.c index 365c82e1b3a9..52b780fb5258 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -203,11 +203,11 @@ static struct kmem_cache *flctx_cache __read_mostly; static struct kmem_cache *filelock_cache __read_mostly; static struct file_lock_context * -locks_get_lock_context(struct inode *inode) +locks_get_lock_context(struct inode *inode, int type) { struct file_lock_context *new; - if (likely(inode->i_flctx)) + if (likely(inode->i_flctx) || type == F_UNLCK) goto out; new = kmem_cache_alloc(flctx_cache, GFP_KERNEL); @@ -223,14 +223,7 @@ locks_get_lock_context(struct inode *inode) * Assign the pointer if it's not already assigned. If it is, then * free the context we just allocated. */ - spin_lock(&inode->i_lock); - if (likely(!inode->i_flctx)) { - inode->i_flctx = new; - new = NULL; - } - spin_unlock(&inode->i_lock); - - if (new) + if (cmpxchg(&inode->i_flctx, NULL, new)) kmem_cache_free(flctx_cache, new); out: return inode->i_flctx; @@ -276,8 +269,10 @@ void locks_release_private(struct file_lock *fl) } if (fl->fl_lmops) { - if (fl->fl_lmops->lm_put_owner) - fl->fl_lmops->lm_put_owner(fl); + if (fl->fl_lmops->lm_put_owner) { + fl->fl_lmops->lm_put_owner(fl->fl_owner); + fl->fl_owner = NULL; + } fl->fl_lmops = NULL; } } @@ -333,7 +328,7 @@ void locks_copy_conflock(struct file_lock *new, struct file_lock *fl) if (fl->fl_lmops) { if (fl->fl_lmops->lm_get_owner) - fl->fl_lmops->lm_get_owner(new, fl); + fl->fl_lmops->lm_get_owner(fl->fl_owner); } } EXPORT_SYMBOL(locks_copy_conflock); @@ -592,11 +587,15 @@ posix_owner_key(struct file_lock *fl) static void locks_insert_global_blocked(struct file_lock *waiter) { + lockdep_assert_held(&blocked_lock_lock); + hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter)); } static void locks_delete_global_blocked(struct file_lock *waiter) { + lockdep_assert_held(&blocked_lock_lock); + hash_del(&waiter->fl_link); } @@ -730,7 +729,7 @@ static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *s /* POSIX locks owned by the same process do not conflict with * each other. */ - if (!IS_POSIX(sys_fl) || posix_same_owner(caller_fl, sys_fl)) + if (posix_same_owner(caller_fl, sys_fl)) return (0); /* Check whether they overlap */ @@ -748,7 +747,7 @@ static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *s /* FLOCK locks referring to the same filp do not conflict with * each other. */ - if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file)) + if (caller_fl->fl_file == sys_fl->fl_file) return (0); if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) return 0; @@ -838,6 +837,8 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, { int i = 0; + lockdep_assert_held(&blocked_lock_lock); + /* * This deadlock detector can't reasonably detect deadlocks with * FL_OFDLCK locks, since they aren't owned by a process, per-se. @@ -871,9 +872,12 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) bool found = false; LIST_HEAD(dispose); - ctx = locks_get_lock_context(inode); - if (!ctx) - return -ENOMEM; + ctx = locks_get_lock_context(inode, request->fl_type); + if (!ctx) { + if (request->fl_type != F_UNLCK) + return -ENOMEM; + return (request->fl_flags & FL_EXISTS) ? -ENOENT : 0; + } if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { new_fl = locks_alloc_lock(); @@ -939,9 +943,9 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str bool added = false; LIST_HEAD(dispose); - ctx = locks_get_lock_context(inode); + ctx = locks_get_lock_context(inode, request->fl_type); if (!ctx) - return -ENOMEM; + return (request->fl_type == F_UNLCK) ? 0 : -ENOMEM; /* * We may need two file_lock structures for this operation, @@ -964,8 +968,6 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str */ if (request->fl_type != F_UNLCK) { list_for_each_entry(fl, &ctx->flc_posix, fl_list) { - if (!IS_POSIX(fl)) - continue; if (!posix_locks_conflict(request, fl)) continue; if (conflock) @@ -1388,9 +1390,8 @@ any_leases_conflict(struct inode *inode, struct file_lock *breaker) int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) { int error = 0; - struct file_lock *new_fl; struct file_lock_context *ctx = inode->i_flctx; - struct file_lock *fl; + struct file_lock *new_fl, *fl, *tmp; unsigned long break_time; int want_write = (mode & O_ACCMODE) != O_RDONLY; LIST_HEAD(dispose); @@ -1420,7 +1421,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) break_time++; /* so that 0 means no break time */ } - list_for_each_entry(fl, &ctx->flc_lease, fl_list) { + list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) { if (!leases_conflict(fl, new_fl)) continue; if (want_write) { @@ -1606,7 +1607,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr lease = *flp; trace_generic_add_lease(inode, lease); - ctx = locks_get_lock_context(inode); + /* Note that arg is never F_UNLCK here */ + ctx = locks_get_lock_context(inode, arg); if (!ctx) return -ENOMEM; @@ -1665,7 +1667,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr } if (my_fl != NULL) { - error = lease->fl_lmops->lm_change(my_fl, arg, &dispose); + lease = my_fl; + error = lease->fl_lmops->lm_change(lease, arg, &dispose); if (error) goto out; goto out_setup; @@ -1727,7 +1730,7 @@ static int generic_delete_lease(struct file *filp, void *owner) break; } } - trace_generic_delete_lease(inode, fl); + trace_generic_delete_lease(inode, victim); if (victim) error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); spin_unlock(&ctx->flc_lock); @@ -2555,15 +2558,10 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, : (fl->fl_type == F_WRLCK) ? "WRITE" : "READ "); } if (inode) { -#ifdef WE_CAN_BREAK_LSLK_NOW - seq_printf(f, "%d %s:%ld ", fl_pid, - inode->i_sb->s_id, inode->i_ino); -#else - /* userspace relies on this representation of dev_t ;-( */ + /* userspace relies on this representation of dev_t */ seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino); -#endif } else { seq_printf(f, "%d <none>:0 ", fl_pid); } diff --git a/fs/logfs/file.c b/fs/logfs/file.c index 8538752df2f6..b2c13f739ffa 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c @@ -271,8 +271,6 @@ const struct file_operations logfs_reg_fops = { .llseek = generic_file_llseek, .mmap = generic_file_readonly_mmap, .open = generic_file_open, - .read = new_sync_read, - .write = new_sync_write, }; const struct address_space_operations logfs_reg_aops = { diff --git a/fs/minix/file.c b/fs/minix/file.c index a967de085ac0..6d63e27ec961 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -14,9 +14,7 @@ */ const struct file_operations minix_file_operations = { .llseek = generic_file_llseek, - .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = new_sync_write, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .fsync = generic_file_fsync, diff --git a/fs/namei.c b/fs/namei.c index c83145af4bfc..76fb76a0818b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -119,15 +119,14 @@ * PATH_MAX includes the nul terminator --RR. */ -#define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename)) +#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) struct filename * getname_flags(const char __user *filename, int flags, int *empty) { - struct filename *result, *err; - int len; - long max; + struct filename *result; char *kname; + int len; result = audit_reusename(filename); if (result) @@ -136,22 +135,18 @@ getname_flags(const char __user *filename, int flags, int *empty) result = __getname(); if (unlikely(!result)) return ERR_PTR(-ENOMEM); - result->refcnt = 1; /* * First, try to embed the struct filename inside the names_cache * allocation */ - kname = (char *)result + sizeof(*result); + kname = (char *)result->iname; result->name = kname; - result->separate = false; - max = EMBEDDED_NAME_MAX; -recopy: - len = strncpy_from_user(kname, filename, max); + len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX); if (unlikely(len < 0)) { - err = ERR_PTR(len); - goto error; + __putname(result); + return ERR_PTR(len); } /* @@ -160,43 +155,49 @@ recopy: * names_cache allocation for the pathname, and re-do the copy from * userland. */ - if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) { + if (unlikely(len == EMBEDDED_NAME_MAX)) { + const size_t size = offsetof(struct filename, iname[1]); kname = (char *)result; - result = kzalloc(sizeof(*result), GFP_KERNEL); - if (!result) { - err = ERR_PTR(-ENOMEM); - result = (struct filename *)kname; - goto error; + /* + * size is chosen that way we to guarantee that + * result->iname[0] is within the same object and that + * kname can't be equal to result->iname, no matter what. + */ + result = kzalloc(size, GFP_KERNEL); + if (unlikely(!result)) { + __putname(kname); + return ERR_PTR(-ENOMEM); } result->name = kname; - result->separate = true; - result->refcnt = 1; - max = PATH_MAX; - goto recopy; + len = strncpy_from_user(kname, filename, PATH_MAX); + if (unlikely(len < 0)) { + __putname(kname); + kfree(result); + return ERR_PTR(len); + } + if (unlikely(len == PATH_MAX)) { + __putname(kname); + kfree(result); + return ERR_PTR(-ENAMETOOLONG); + } } + result->refcnt = 1; /* The empty path is special. */ if (unlikely(!len)) { if (empty) *empty = 1; - err = ERR_PTR(-ENOENT); - if (!(flags & LOOKUP_EMPTY)) - goto error; + if (!(flags & LOOKUP_EMPTY)) { + putname(result); + return ERR_PTR(-ENOENT); + } } - err = ERR_PTR(-ENAMETOOLONG); - if (unlikely(len >= PATH_MAX)) - goto error; - result->uptr = filename; result->aname = NULL; audit_getname(result); return result; - -error: - putname(result); - return err; } struct filename * @@ -216,8 +217,7 @@ getname_kernel(const char * filename) return ERR_PTR(-ENOMEM); if (len <= EMBEDDED_NAME_MAX) { - result->name = (char *)(result) + sizeof(*result); - result->separate = false; + result->name = (char *)result->iname; } else if (len <= PATH_MAX) { struct filename *tmp; @@ -227,7 +227,6 @@ getname_kernel(const char * filename) return ERR_PTR(-ENOMEM); } tmp->name = (char *)result; - tmp->separate = true; result = tmp; } else { __putname(result); @@ -249,7 +248,7 @@ void putname(struct filename *name) if (--name->refcnt > 0) return; - if (name->separate) { + if (name->name != name->iname) { __putname(name->name); kfree(name); } else @@ -1851,10 +1850,11 @@ static int link_path_walk(const char *name, struct nameidata *nd) return err; } -static int path_init(int dfd, const char *name, unsigned int flags, +static int path_init(int dfd, const struct filename *name, unsigned int flags, struct nameidata *nd) { int retval = 0; + const char *s = name->name; nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT; @@ -1863,7 +1863,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (flags & LOOKUP_ROOT) { struct dentry *root = nd->root.dentry; struct inode *inode = root->d_inode; - if (*name) { + if (*s) { if (!d_can_lookup(root)) return -ENOTDIR; retval = inode_permission(inode, MAY_EXEC); @@ -1885,7 +1885,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->root.mnt = NULL; nd->m_seq = read_seqbegin(&mount_lock); - if (*name=='/') { + if (*s == '/') { if (flags & LOOKUP_RCU) { rcu_read_lock(); nd->seq = set_root_rcu(nd); @@ -1919,7 +1919,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, dentry = f.file->f_path.dentry; - if (*name) { + if (*s) { if (!d_can_lookup(dentry)) { fdput(f); return -ENOTDIR; @@ -1949,7 +1949,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, return -ECHILD; done: current->total_link_count = 0; - return link_path_walk(name, nd); + return link_path_walk(s, nd); } static void path_cleanup(struct nameidata *nd) @@ -1972,7 +1972,7 @@ static inline int lookup_last(struct nameidata *nd, struct path *path) } /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ -static int path_lookupat(int dfd, const char *name, +static int path_lookupat(int dfd, const struct filename *name, unsigned int flags, struct nameidata *nd) { struct path path; @@ -2027,31 +2027,17 @@ static int path_lookupat(int dfd, const char *name, static int filename_lookup(int dfd, struct filename *name, unsigned int flags, struct nameidata *nd) { - int retval = path_lookupat(dfd, name->name, flags | LOOKUP_RCU, nd); + int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd); if (unlikely(retval == -ECHILD)) - retval = path_lookupat(dfd, name->name, flags, nd); + retval = path_lookupat(dfd, name, flags, nd); if (unlikely(retval == -ESTALE)) - retval = path_lookupat(dfd, name->name, - flags | LOOKUP_REVAL, nd); + retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd); if (likely(!retval)) audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT); return retval; } -static int do_path_lookup(int dfd, const char *name, - unsigned int flags, struct nameidata *nd) -{ - struct filename *filename = getname_kernel(name); - int retval = PTR_ERR(filename); - - if (!IS_ERR(filename)) { - retval = filename_lookup(dfd, filename, flags, nd); - putname(filename); - } - return retval; -} - /* does lookup, returns the object with parent locked */ struct dentry *kern_path_locked(const char *name, struct path *path) { @@ -2089,9 +2075,15 @@ out: int kern_path(const char *name, unsigned int flags, struct path *path) { struct nameidata nd; - int res = do_path_lookup(AT_FDCWD, name, flags, &nd); - if (!res) - *path = nd.path; + struct filename *filename = getname_kernel(name); + int res = PTR_ERR(filename); + + if (!IS_ERR(filename)) { + res = filename_lookup(AT_FDCWD, filename, flags, &nd); + putname(filename); + if (!res) + *path = nd.path; + } return res; } EXPORT_SYMBOL(kern_path); @@ -2108,15 +2100,22 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, const char *name, unsigned int flags, struct path *path) { - struct nameidata nd; - int err; - nd.root.dentry = dentry; - nd.root.mnt = mnt; + struct filename *filename = getname_kernel(name); + int err = PTR_ERR(filename); + BUG_ON(flags & LOOKUP_PARENT); - /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */ - err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd); - if (!err) - *path = nd.path; + + /* the first argument of filename_lookup() is ignored with LOOKUP_ROOT */ + if (!IS_ERR(filename)) { + struct nameidata nd; + nd.root.dentry = dentry; + nd.root.mnt = mnt; + err = filename_lookup(AT_FDCWD, filename, + flags | LOOKUP_ROOT, &nd); + if (!err) + *path = nd.path; + putname(filename); + } return err; } EXPORT_SYMBOL(vfs_path_lookup); @@ -2138,9 +2137,7 @@ static struct dentry *lookup_hash(struct nameidata *nd) * @len: maximum length @len should be interpreted to * * Note that this routine is purely a helper for filesystem usage and should - * not be called by generic code. Also note that by using this function the - * nameidata argument is passed to the filesystem methods and a filesystem - * using this helper needs to be prepared for that. + * not be called by generic code. */ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) { @@ -2341,7 +2338,8 @@ out: * Returns 0 and "path" will be valid on success; Returns error otherwise. */ static int -path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags) +path_mountpoint(int dfd, const struct filename *name, struct path *path, + unsigned int flags) { struct nameidata nd; int err; @@ -2370,20 +2368,20 @@ out: } static int -filename_mountpoint(int dfd, struct filename *s, struct path *path, +filename_mountpoint(int dfd, struct filename *name, struct path *path, unsigned int flags) { int error; - if (IS_ERR(s)) - return PTR_ERR(s); - error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU); + if (IS_ERR(name)) + return PTR_ERR(name); + error = path_mountpoint(dfd, name, path, flags | LOOKUP_RCU); if (unlikely(error == -ECHILD)) - error = path_mountpoint(dfd, s->name, path, flags); + error = path_mountpoint(dfd, name, path, flags); if (unlikely(error == -ESTALE)) - error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL); + error = path_mountpoint(dfd, name, path, flags | LOOKUP_REVAL); if (likely(!error)) - audit_inode(s, path->dentry, 0); - putname(s); + audit_inode(name, path->dentry, 0); + putname(name); return error; } @@ -3156,7 +3154,7 @@ static int do_tmpfile(int dfd, struct filename *pathname, static const struct qstr name = QSTR_INIT("/", 1); struct dentry *dentry, *child; struct inode *dir; - int error = path_lookupat(dfd, pathname->name, + int error = path_lookupat(dfd, pathname, flags | LOOKUP_DIRECTORY, nd); if (unlikely(error)) return error; @@ -3229,7 +3227,7 @@ static struct file *path_openat(int dfd, struct filename *pathname, goto out; } - error = path_init(dfd, pathname->name, flags, nd); + error = path_init(dfd, pathname, flags, nd); if (unlikely(error)) goto out; diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 1dd7007f974d..479bf8db264e 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -98,30 +98,24 @@ out: } static ssize_t -ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +ncp_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); size_t already_read = 0; - off_t pos; + off_t pos = iocb->ki_pos; size_t bufsize; int error; - void* freepage; + void *freepage; size_t freelen; ncp_dbg(1, "enter %pD2\n", file); - pos = *ppos; - - if ((ssize_t) count < 0) { - return -EINVAL; - } - if (!count) + if (!iov_iter_count(to)) return 0; if (pos > inode->i_sb->s_maxbytes) return 0; - if (pos + count > inode->i_sb->s_maxbytes) { - count = inode->i_sb->s_maxbytes - pos; - } + iov_iter_truncate(to, inode->i_sb->s_maxbytes - pos); error = ncp_make_open(inode, O_RDONLY); if (error) { @@ -138,31 +132,29 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) goto outrel; error = 0; /* First read in as much as possible for each bufsize. */ - while (already_read < count) { + while (iov_iter_count(to)) { int read_this_time; - size_t to_read = min_t(unsigned int, + size_t to_read = min_t(size_t, bufsize - (pos % bufsize), - count - already_read); + iov_iter_count(to)); error = ncp_read_bounce(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle, - pos, to_read, buf, &read_this_time, + pos, to_read, to, &read_this_time, freepage, freelen); if (error) { error = -EIO; /* NW errno -> Linux errno */ break; } pos += read_this_time; - buf += read_this_time; already_read += read_this_time; - if (read_this_time != to_read) { + if (read_this_time != to_read) break; - } } vfree(freepage); - *ppos = pos; + iocb->ki_pos = pos; file_accessed(file); @@ -173,42 +165,26 @@ outrel: } static ssize_t -ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) +ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { + struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); size_t already_written = 0; - off_t pos; + loff_t pos = iocb->ki_pos; + size_t count = iov_iter_count(from); size_t bufsize; int errno; - void* bouncebuffer; + void *bouncebuffer; ncp_dbg(1, "enter %pD2\n", file); - if ((ssize_t) count < 0) - return -EINVAL; - pos = *ppos; - if (file->f_flags & O_APPEND) { - pos = i_size_read(inode); - } - - if (pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) { - if (pos >= MAX_NON_LFS) { - return -EFBIG; - } - if (count > MAX_NON_LFS - (u32)pos) { - count = MAX_NON_LFS - (u32)pos; - } - } - if (pos >= inode->i_sb->s_maxbytes) { - if (count || pos > inode->i_sb->s_maxbytes) { - return -EFBIG; - } - } - if (pos + count > inode->i_sb->s_maxbytes) { - count = inode->i_sb->s_maxbytes - pos; - } + errno = generic_write_checks(file, &pos, &count, 0); + if (errno) + return errno; + iov_iter_truncate(from, count); if (!count) return 0; + errno = ncp_make_open(inode, O_WRONLY); if (errno) { ncp_dbg(1, "open failed, error=%d\n", errno); @@ -216,8 +192,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * } bufsize = NCP_SERVER(inode)->buffer_size; - already_written = 0; - errno = file_update_time(file); if (errno) goto outrel; @@ -227,13 +201,13 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * errno = -EIO; /* -ENOMEM */ goto outrel; } - while (already_written < count) { + while (iov_iter_count(from)) { int written_this_time; - size_t to_write = min_t(unsigned int, - bufsize - (pos % bufsize), - count - already_written); + size_t to_write = min_t(size_t, + bufsize - ((off_t)pos % bufsize), + iov_iter_count(from)); - if (copy_from_user(bouncebuffer, buf, to_write)) { + if (copy_from_iter(bouncebuffer, to_write, from) != to_write) { errno = -EFAULT; break; } @@ -244,16 +218,14 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * break; } pos += written_this_time; - buf += written_this_time; already_written += written_this_time; - if (written_this_time != to_write) { + if (written_this_time != to_write) break; - } } vfree(bouncebuffer); - *ppos = pos; + iocb->ki_pos = pos; if (pos > i_size_read(inode)) { mutex_lock(&inode->i_mutex); @@ -277,8 +249,8 @@ static int ncp_release(struct inode *inode, struct file *file) { const struct file_operations ncp_file_operations = { .llseek = generic_file_llseek, - .read = ncp_file_read, - .write = ncp_file_write, + .read_iter = ncp_file_read_iter, + .write_iter = ncp_file_write_iter, .unlocked_ioctl = ncp_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ncp_compat_ioctl, diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c index 482387532f54..2b502a0d7941 100644 --- a/fs/ncpfs/ncplib_kernel.c +++ b/fs/ncpfs/ncplib_kernel.c @@ -1001,8 +1001,8 @@ out: */ int ncp_read_bounce(struct ncp_server *server, const char *file_id, - __u32 offset, __u16 to_read, char __user *target, int *bytes_read, - void* bounce, __u32 bufsize) + __u32 offset, __u16 to_read, struct iov_iter *to, + int *bytes_read, void *bounce, __u32 bufsize) { int result; @@ -1025,7 +1025,7 @@ ncp_read_bounce(struct ncp_server *server, const char *file_id, (offset & 1); *bytes_read = len; result = 0; - if (copy_to_user(target, source, len)) + if (copy_to_iter(source, len, to) != len) result = -EFAULT; } } diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 250e443a07f3..5233fbc1747a 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -53,7 +53,7 @@ static inline int ncp_read_bounce_size(__u32 size) { return sizeof(struct ncp_reply_header) + 2 + 2 + size + 8; }; int ncp_read_bounce(struct ncp_server *, const char *, __u32, __u16, - char __user *, int *, void* bounce, __u32 bouncelen); + struct iov_iter *, int *, void *bounce, __u32 bouncelen); int ncp_read_kernel(struct ncp_server *, const char *, __u32, __u16, char *, int *); int ncp_write_kernel(struct ncp_server *, const char *, __u32, __u16, diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index c7abc10279af..f31fd0dd92c6 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -1,6 +1,6 @@ config NFS_FS tristate "NFS client support" - depends on INET && FILE_LOCKING + depends on INET && FILE_LOCKING && MULTIUSER select LOCKD select SUNRPC select NFS_ACL_SUPPORT if NFS_V3_ACL diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f9f4845db989..19874151e95c 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -433,7 +433,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat static bool nfs_client_init_is_complete(const struct nfs_client *clp) { - return clp->cl_cons_state != NFS_CS_INITING; + return clp->cl_cons_state <= NFS_CS_READY; } int nfs_wait_client_init_complete(const struct nfs_client *clp) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index a1f0685b42ff..a6ad68865880 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -181,8 +181,8 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); spin_unlock(&delegation->lock); - put_rpccred(oldcred); rcu_read_unlock(); + put_rpccred(oldcred); trace_nfs4_reclaim_delegation(inode, res->delegation_type); } else { /* We appear to have raced with a delegation return. */ @@ -370,7 +370,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct delegation = NULL; goto out; } - freeme = nfs_detach_delegation_locked(nfsi, + if (test_and_set_bit(NFS_DELEGATION_RETURNING, + &old_delegation->flags)) + goto out; + freeme = nfs_detach_delegation_locked(nfsi, old_delegation, clp); if (freeme == NULL) goto out; @@ -433,6 +436,8 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) { bool ret = false; + if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) + goto out; if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) ret = true; if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) { @@ -444,6 +449,7 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) ret = true; spin_unlock(&delegation->lock); } +out: return ret; } @@ -471,14 +477,20 @@ restart: super_list) { if (!nfs_delegation_need_return(delegation)) continue; - inode = nfs_delegation_grab_inode(delegation); - if (inode == NULL) + if (!nfs_sb_active(server->super)) continue; + inode = nfs_delegation_grab_inode(delegation); + if (inode == NULL) { + rcu_read_unlock(); + nfs_sb_deactive(server->super); + goto restart; + } delegation = nfs_start_delegation_return_locked(NFS_I(inode)); rcu_read_unlock(); err = nfs_end_delegation_return(inode, delegation, 0); iput(inode); + nfs_sb_deactive(server->super); if (!err) goto restart; set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); @@ -809,19 +821,30 @@ restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry_rcu(delegation, &server->delegations, super_list) { + if (test_bit(NFS_DELEGATION_RETURNING, + &delegation->flags)) + continue; if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) continue; - inode = nfs_delegation_grab_inode(delegation); - if (inode == NULL) + if (!nfs_sb_active(server->super)) continue; - delegation = nfs_detach_delegation(NFS_I(inode), - delegation, server); + inode = nfs_delegation_grab_inode(delegation); + if (inode == NULL) { + rcu_read_unlock(); + nfs_sb_deactive(server->super); + goto restart; + } + delegation = nfs_start_delegation_return_locked(NFS_I(inode)); rcu_read_unlock(); - - if (delegation != NULL) - nfs_free_delegation(delegation); + if (delegation != NULL) { + delegation = nfs_detach_delegation(NFS_I(inode), + delegation, server); + if (delegation != NULL) + nfs_free_delegation(delegation); + } iput(inode); + nfs_sb_deactive(server->super); goto restart; } } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9b0c55cb2a2e..c19e16f0b2d0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -408,14 +408,22 @@ static int xdr_decode(nfs_readdir_descriptor_t *desc, return 0; } +/* Match file and dirent using either filehandle or fileid + * Note: caller is responsible for checking the fsid + */ static int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) { + struct nfs_inode *nfsi; + if (dentry->d_inode == NULL) goto different; - if (nfs_compare_fh(entry->fh, NFS_FH(dentry->d_inode)) != 0) - goto different; - return 1; + + nfsi = NFS_I(dentry->d_inode); + if (entry->fattr->fileid == nfsi->fileid) + return 1; + if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0) + return 1; different: return 0; } @@ -469,6 +477,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) struct inode *inode; int status; + if (!(entry->fattr->valid & NFS_ATTR_FATTR_FILEID)) + return; + if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID)) + return; if (filename.name[0] == '.') { if (filename.len == 1) return; @@ -479,6 +491,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) dentry = d_lookup(parent, &filename); if (dentry != NULL) { + /* Is there a mountpoint here? If so, just exit */ + if (!nfs_fsid_equal(&NFS_SB(dentry->d_sb)->fsid, + &entry->fattr->fsid)) + goto out; if (nfs_same_file(dentry, entry)) { nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); status = nfs_refresh_inode(dentry->d_inode, entry->fattr); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e907c8cf732e..c3929fb2ab26 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -265,7 +265,7 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t return -EINVAL; #else - VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); + VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); if (rw == READ) return nfs_file_direct_read(iocb, iter, pos); @@ -393,7 +393,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write) long res = (long) dreq->error; if (!res) res = (long) dreq->count; - aio_complete(dreq->iocb, res, 0); + dreq->iocb->ki_complete(dreq->iocb, res, 0); } complete_all(&dreq->completion); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 94712fc781fa..f6a3adedf027 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -26,7 +26,6 @@ #include <linux/nfs_mount.h> #include <linux/mm.h> #include <linux/pagemap.h> -#include <linux/aio.h> #include <linux/gfp.h> #include <linux/swap.h> @@ -178,7 +177,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) iocb->ki_filp, iov_iter_count(to), (unsigned long) iocb->ki_pos); - result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); + result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping); if (!result) { result = generic_file_read_iter(iocb, to); if (result > 0) @@ -199,7 +198,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n", filp, (unsigned long) count, (unsigned long long) *ppos); - res = nfs_revalidate_mapping(inode, filp->f_mapping); + res = nfs_revalidate_mapping_protected(inode, filp->f_mapping); if (!res) { res = generic_file_splice_read(filp, ppos, pipe, count, flags); if (res > 0) @@ -372,6 +371,10 @@ start: nfs_wait_bit_killable, TASK_KILLABLE); if (ret) return ret; + /* + * Wait for O_DIRECT to complete + */ + nfs_inode_dio_wait(mapping->host); page = grab_cache_page_write_begin(mapping, index, flags); if (!page) @@ -619,6 +622,9 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) /* make sure the cache has finished storing the page */ nfs_fscache_wait_on_page_write(NFS_I(inode), page); + wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING, + nfs_wait_bit_killable, TASK_KILLABLE); + lock_page(page); mapping = page_file_mapping(page); if (mapping != inode->i_mapping) @@ -920,8 +926,6 @@ EXPORT_SYMBOL_GPL(nfs_flock); const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, - .read = new_sync_read, - .write = new_sync_write, .read_iter = nfs_file_read, .write_iter = nfs_file_write, .mmap = nfs_file_mmap, diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 83107be3dd01..d42dff6d5e98 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -556,6 +556,7 @@ EXPORT_SYMBOL_GPL(nfs_setattr); * This is a copy of the common vmtruncate, but with the locking * corrected to take into account the fact that NFS requires * inode->i_size to be updated under the inode->i_lock. + * Note: must be called with inode->i_lock held! */ static int nfs_vmtruncate(struct inode * inode, loff_t offset) { @@ -565,14 +566,14 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) if (err) goto out; - spin_lock(&inode->i_lock); i_size_write(inode, offset); /* Optimisation */ if (offset == 0) NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; - spin_unlock(&inode->i_lock); + spin_unlock(&inode->i_lock); truncate_pagecache(inode, offset); + spin_lock(&inode->i_lock); out: return err; } @@ -585,10 +586,15 @@ out: * Note: we do this in the *proc.c in order to ensure that * it works for things like exclusive creates too. */ -void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) +void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, + struct nfs_fattr *fattr) { + /* Barrier: bump the attribute generation count. */ + nfs_fattr_set_barrier(fattr); + + spin_lock(&inode->i_lock); + NFS_I(inode)->attr_gencount = fattr->gencount; if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { - spin_lock(&inode->i_lock); if ((attr->ia_valid & ATTR_MODE) != 0) { int mode = attr->ia_mode & S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO; @@ -600,12 +606,13 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) inode->i_gid = attr->ia_gid; nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL); - spin_unlock(&inode->i_lock); } if ((attr->ia_valid & ATTR_SIZE) != 0) { nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); nfs_vmtruncate(inode, attr->ia_size); } + nfs_update_inode(inode, fattr); + spin_unlock(&inode->i_lock); } EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); @@ -1028,6 +1035,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map if (mapping->nrpages != 0) { if (S_ISREG(inode->i_mode)) { + unmap_mapping_range(mapping, 0, 0, 0); ret = nfs_sync_mapping(mapping); if (ret < 0) return ret; @@ -1060,11 +1068,14 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode) } /** - * nfs_revalidate_mapping - Revalidate the pagecache + * __nfs_revalidate_mapping - Revalidate the pagecache * @inode - pointer to host inode * @mapping - pointer to mapping + * @may_lock - take inode->i_mutex? */ -int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +static int __nfs_revalidate_mapping(struct inode *inode, + struct address_space *mapping, + bool may_lock) { struct nfs_inode *nfsi = NFS_I(inode); unsigned long *bitlock = &nfsi->flags; @@ -1113,7 +1124,12 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; spin_unlock(&inode->i_lock); trace_nfs_invalidate_mapping_enter(inode); - ret = nfs_invalidate_mapping(inode, mapping); + if (may_lock) { + mutex_lock(&inode->i_mutex); + ret = nfs_invalidate_mapping(inode, mapping); + mutex_unlock(&inode->i_mutex); + } else + ret = nfs_invalidate_mapping(inode, mapping); trace_nfs_invalidate_mapping_exit(inode, ret); clear_bit_unlock(NFS_INO_INVALIDATING, bitlock); @@ -1123,6 +1139,29 @@ out: return ret; } +/** + * nfs_revalidate_mapping - Revalidate the pagecache + * @inode - pointer to host inode + * @mapping - pointer to mapping + */ +int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +{ + return __nfs_revalidate_mapping(inode, mapping, false); +} + +/** + * nfs_revalidate_mapping_protected - Revalidate the pagecache + * @inode - pointer to host inode + * @mapping - pointer to mapping + * + * Differs from nfs_revalidate_mapping() in that it grabs the inode->i_mutex + * while invalidating the mapping. + */ +int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping) +{ + return __nfs_revalidate_mapping(inode, mapping, true); +} + static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); @@ -1231,13 +1270,6 @@ static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fat return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0; } -static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr) -{ - if (!(fattr->valid & NFS_ATTR_FATTR_SIZE)) - return 0; - return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); -} - static atomic_long_t nfs_attr_generation_counter; static unsigned long nfs_read_attr_generation_counter(void) @@ -1249,6 +1281,7 @@ unsigned long nfs_inc_attr_generation_counter(void) { return atomic_long_inc_return(&nfs_attr_generation_counter); } +EXPORT_SYMBOL_GPL(nfs_inc_attr_generation_counter); void nfs_fattr_init(struct nfs_fattr *fattr) { @@ -1260,6 +1293,22 @@ void nfs_fattr_init(struct nfs_fattr *fattr) } EXPORT_SYMBOL_GPL(nfs_fattr_init); +/** + * nfs_fattr_set_barrier + * @fattr: attributes + * + * Used to set a barrier after an attribute was updated. This + * barrier ensures that older attributes from RPC calls that may + * have raced with our update cannot clobber these new values. + * Note that you are still responsible for ensuring that other + * operations which change the attribute on the server do not + * collide. + */ +void nfs_fattr_set_barrier(struct nfs_fattr *fattr) +{ + fattr->gencount = nfs_inc_attr_generation_counter(); +} + struct nfs_fattr *nfs_alloc_fattr(void) { struct nfs_fattr *fattr; @@ -1370,7 +1419,6 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || nfs_ctime_need_update(inode, fattr) || - nfs_size_need_update(inode, fattr) || ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); } @@ -1460,6 +1508,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) int status; spin_lock(&inode->i_lock); + nfs_fattr_set_barrier(fattr); status = nfs_post_op_update_inode_locked(inode, fattr); spin_unlock(&inode->i_lock); @@ -1468,7 +1517,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); /** - * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache + * nfs_post_op_update_inode_force_wcc_locked - update the inode attribute cache * @inode - pointer to inode * @fattr - updated attributes * @@ -1478,11 +1527,10 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); * * This function is mainly designed to be used by the ->write_done() functions. */ -int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) +int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr) { int status; - spin_lock(&inode->i_lock); /* Don't do a WCC update if these attributes are already stale */ if ((fattr->valid & NFS_ATTR_FATTR) == 0 || !nfs_inode_attrs_need_update(inode, fattr)) { @@ -1514,6 +1562,27 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa } out_noforce: status = nfs_post_op_update_inode_locked(inode, fattr); + return status; +} + +/** + * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes + * + * After an operation that has changed the inode metadata, mark the + * attribute cache as being invalid, then try to update it. Fake up + * weak cache consistency data, if none exist. + * + * This function is mainly designed to be used by the ->write_done() functions. + */ +int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) +{ + int status; + + spin_lock(&inode->i_lock); + nfs_fattr_set_barrier(fattr); + status = nfs_post_op_update_inode_force_wcc_locked(inode, fattr); spin_unlock(&inode->i_lock); return status; } @@ -1715,6 +1784,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; + /* Set barrier to be more recent than all outstanding updates */ nfsi->attr_gencount = nfs_inc_attr_generation_counter(); } else { if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { @@ -1722,6 +1792,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; } + /* Set the barrier to be more recent than this fattr */ + if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0) + nfsi->attr_gencount = fattr->gencount; } invalid &= ~NFS_INO_INVALID_ATTR; /* Don't invalidate the data if we were to blame */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b802fb3a2d99..9e6475bc5ba2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -459,6 +459,7 @@ void nfs_mark_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo, u32 ds_commit_idx); int nfs_write_need_commit(struct nfs_pgio_header *); +void nfs_writeback_update_inode(struct nfs_pgio_header *hdr); int nfs_generic_commit_list(struct inode *inode, struct list_head *head, int how, struct nfs_commit_info *cinfo); void nfs_retry_commit(struct list_head *page_list, diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 78e557c3ab87..1f11d2533ee4 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -138,7 +138,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) - nfs_setattr_update_inode(inode, sattr); + nfs_setattr_update_inode(inode, sattr, fattr); dprintk("NFS reply setattr: %d\n", status); return status; } @@ -834,7 +834,7 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr); + nfs_writeback_update_inode(hdr); return 0; } diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 2a932fdc57cb..53852a4bd88b 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1987,6 +1987,11 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, if (entry->fattr->valid & NFS_ATTR_FATTR_V3) entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); + if (entry->fattr->fileid != entry->ino) { + entry->fattr->mounted_on_fileid = entry->ino; + entry->fattr->valid |= NFS_ATTR_FATTR_MOUNTED_ON_FILEID; + } + /* In fact, a post_op_fh3: */ p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 8646af9b11d2..86d6214ea022 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -621,6 +621,9 @@ int nfs41_walk_client_list(struct nfs_client *new, spin_lock(&nn->nfs_client_lock); list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { + if (pos == new) + goto found; + if (pos->rpc_ops != new->rpc_ops) continue; @@ -639,10 +642,6 @@ int nfs41_walk_client_list(struct nfs_client *new, prev = pos; status = nfs_wait_client_init_complete(pos); - if (pos->cl_cons_state == NFS_CS_SESSION_INITING) { - nfs4_schedule_lease_recovery(pos); - status = nfs4_wait_clnt_recover(pos); - } spin_lock(&nn->nfs_client_lock); if (status < 0) break; @@ -668,7 +667,7 @@ int nfs41_walk_client_list(struct nfs_client *new, */ if (!nfs4_match_client_owner_id(pos, new)) continue; - +found: atomic_inc(&pos->cl_count); *result = pos; status = 0; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 8b46389c4c5b..0181cde1d102 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -170,8 +170,6 @@ const struct file_operations nfs4_file_operations = { #else .llseek = nfs_file_llseek, #endif - .read = new_sync_read, - .write = new_sync_write, .read_iter = nfs_file_read, .write_iter = nfs_file_write, .mmap = nfs_file_mmap, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 88180ac5ea0e..627f37c44456 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -901,6 +901,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) if (!cinfo->atomic || cinfo->before != dir->i_version) nfs_force_lookup_revalidate(dir); dir->i_version = cinfo->after; + nfsi->attr_gencount = nfs_inc_attr_generation_counter(); nfs_fscache_invalidate(dir); spin_unlock(&dir->i_lock); } @@ -1552,6 +1553,9 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod opendata->o_arg.open_flags = 0; opendata->o_arg.fmode = fmode; + opendata->o_arg.share_access = nfs4_map_atomic_open_share( + NFS_SB(opendata->dentry->d_sb), + fmode, 0); memset(&opendata->o_res, 0, sizeof(opendata->o_res)); memset(&opendata->c_res, 0, sizeof(opendata->c_res)); nfs4_init_opendata_res(opendata); @@ -2413,8 +2417,8 @@ static int _nfs4_do_open(struct inode *dir, opendata->o_res.f_attr, sattr, state, label, olabel); if (status == 0) { - nfs_setattr_update_inode(state->inode, sattr); - nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); + nfs_setattr_update_inode(state->inode, sattr, + opendata->o_res.f_attr); nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); } } @@ -2651,7 +2655,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) case -NFS4ERR_BAD_STATEID: case -NFS4ERR_EXPIRED: if (!nfs4_stateid_match(&calldata->arg.stateid, - &state->stateid)) { + &state->open_stateid)) { rpc_restart_call_prepare(task); goto out_release; } @@ -2687,7 +2691,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags); is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags); is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags); - nfs4_stateid_copy(&calldata->arg.stateid, &state->stateid); + nfs4_stateid_copy(&calldata->arg.stateid, &state->open_stateid); /* Calculate the change in open mode */ calldata->arg.fmode = 0; if (state->n_rdwr == 0) { @@ -3288,7 +3292,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label); if (status == 0) { - nfs_setattr_update_inode(inode, sattr); + nfs_setattr_update_inode(inode, sattr, fattr); nfs_setsecurity(inode, fattr, label); } nfs4_label_free(label); @@ -4234,7 +4238,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, } if (task->tk_status >= 0) { renew_lease(NFS_SERVER(inode), hdr->timestamp); - nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr); + nfs_writeback_update_inode(hdr); } return 0; } @@ -6893,9 +6897,13 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, if (status == 0) { clp->cl_clientid = res.clientid; - clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R); - if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R)) + clp->cl_exchange_flags = res.flags; + /* Client ID is not confirmed */ + if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R)) { + clear_bit(NFS4_SESSION_ESTABLISHED, + &clp->cl_session->session_state); clp->cl_seqid = res.seqid; + } kfree(clp->cl_serverowner); clp->cl_serverowner = res.server_owner; @@ -7227,6 +7235,9 @@ static void nfs4_update_session(struct nfs4_session *session, struct nfs41_create_session_res *res) { nfs4_copy_sessionid(&session->sess_id, &res->sessionid); + /* Mark client id and session as being confirmed */ + session->clp->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R; + set_bit(NFS4_SESSION_ESTABLISHED, &session->session_state); session->flags = res->flags; memcpy(&session->fc_attrs, &res->fc_attrs, sizeof(session->fc_attrs)); if (res->flags & SESSION4_BACK_CHAN) @@ -7322,8 +7333,8 @@ int nfs4_proc_destroy_session(struct nfs4_session *session, dprintk("--> nfs4_proc_destroy_session\n"); /* session is still being setup */ - if (session->clp->cl_cons_state != NFS_CS_READY) - return status; + if (!test_and_clear_bit(NFS4_SESSION_ESTABLISHED, &session->session_state)) + return 0; status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); trace_nfs4_destroy_session(session->clp, status); diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index fc46c7455898..e3ea2c5324d6 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -70,6 +70,7 @@ struct nfs4_session { enum nfs4_session_state { NFS4_SESSION_INITING, + NFS4_SESSION_ESTABLISHED, }; extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5ad908e9ce9c..f95e3b58bbc3 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -346,9 +346,23 @@ int nfs41_discover_server_trunking(struct nfs_client *clp, status = nfs4_proc_exchange_id(clp, cred); if (status != NFS4_OK) return status; - set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); - return nfs41_walk_client_list(clp, result, cred); + status = nfs41_walk_client_list(clp, result, cred); + if (status < 0) + return status; + if (clp != *result) + return 0; + + /* Purge state if the client id was established in a prior instance */ + if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R) + set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state); + else + set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); + nfs4_schedule_state_manager(clp); + status = nfs_wait_client_init_complete(clp); + if (status < 0) + nfs_put_client(clp); + return status; } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index b09cc23d6f43..c63189acd052 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -139,7 +139,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) - nfs_setattr_update_inode(inode, sattr); + nfs_setattr_update_inode(inode, sattr, fattr); dprintk("NFS reply setattr: %d\n", status); return status; } @@ -609,10 +609,8 @@ static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct inode *inode = hdr->inode; - if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr); + nfs_writeback_update_inode(hdr); return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 595d81e354d1..759931088094 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1377,6 +1377,36 @@ static int nfs_should_remove_suid(const struct inode *inode) return 0; } +static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr, + struct nfs_fattr *fattr) +{ + struct nfs_pgio_args *argp = &hdr->args; + struct nfs_pgio_res *resp = &hdr->res; + + if (!(fattr->valid & NFS_ATTR_FATTR_SIZE)) + return; + if (argp->offset + resp->count != fattr->size) + return; + if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) + return; + /* Set attribute barrier */ + nfs_fattr_set_barrier(fattr); +} + +void nfs_writeback_update_inode(struct nfs_pgio_header *hdr) +{ + struct nfs_fattr *fattr = hdr->res.fattr; + struct inode *inode = hdr->inode; + + if (fattr == NULL) + return; + spin_lock(&inode->i_lock); + nfs_writeback_check_extend(hdr, fattr); + nfs_post_op_update_inode_force_wcc_locked(inode, fattr); + spin_unlock(&inode->i_lock); +} +EXPORT_SYMBOL_GPL(nfs_writeback_update_inode); + /* * This function is called when the WRITE call is complete. */ @@ -1846,11 +1876,6 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) * request from the inode / page_private pointer and * release it */ nfs_inode_remove_request(req); - /* - * In case nfs_inode_remove_request has marked the - * page as being dirty - */ - cancel_dirty_page(page, PAGE_CACHE_SIZE); nfs_unlock_and_release_request(req); } diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 683bf718aead..fc2d108f5272 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -6,6 +6,7 @@ config NFSD select SUNRPC select EXPORTFS select NFS_ACL_SUPPORT if NFSD_V2_ACL + depends on MULTIUSER help Choose Y here if you want to allow other computers to access files residing on this system using Sun's Network File System diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index cdbc78c72542..03d647bf195d 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -137,7 +137,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, seg->offset = iomap.offset; seg->length = iomap.length; - dprintk("GET: %lld:%lld %d\n", bex->foff, bex->len, bex->es); + dprintk("GET: 0x%llx:0x%llx %d\n", bex->foff, bex->len, bex->es); return 0; out_error: diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c index 9da89fddab33..9aa2796da90d 100644 --- a/fs/nfsd/blocklayoutxdr.c +++ b/fs/nfsd/blocklayoutxdr.c @@ -122,19 +122,19 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, p = xdr_decode_hyper(p, &bex.foff); if (bex.foff & (block_size - 1)) { - dprintk("%s: unaligned offset %lld\n", + dprintk("%s: unaligned offset 0x%llx\n", __func__, bex.foff); goto fail; } p = xdr_decode_hyper(p, &bex.len); if (bex.len & (block_size - 1)) { - dprintk("%s: unaligned length %lld\n", + dprintk("%s: unaligned length 0x%llx\n", __func__, bex.foff); goto fail; } p = xdr_decode_hyper(p, &bex.soff); if (bex.soff & (block_size - 1)) { - dprintk("%s: unaligned disk offset %lld\n", + dprintk("%s: unaligned disk offset 0x%llx\n", __func__, bex.soff); goto fail; } diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 3c1bfa155571..6904213a4363 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -118,7 +118,7 @@ void nfsd4_setup_layout_type(struct svc_export *exp) { struct super_block *sb = exp->ex_path.mnt->mnt_sb; - if (exp->ex_flags & NFSEXP_NOPNFS) + if (!(exp->ex_flags & NFSEXP_PNFS)) return; if (sb->s_export_op->get_uuid && @@ -440,15 +440,14 @@ nfsd4_return_file_layout(struct nfs4_layout *lp, struct nfsd4_layout_seg *seg, list_move_tail(&lp->lo_perstate, reaplist); return; } - end = seg->offset; + lo->offset = layout_end(seg); } else { /* retain the whole layout segment on a split. */ if (layout_end(seg) < end) { dprintk("%s: split not supported\n", __func__); return; } - - lo->offset = layout_end(seg); + end = seg->offset; } layout_update_len(lo, end); @@ -513,6 +512,9 @@ nfsd4_return_client_layouts(struct svc_rqst *rqstp, spin_lock(&clp->cl_lock); list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) { + if (ls->ls_layout_type != lrp->lr_layout_type) + continue; + if (lrp->lr_return_type == RETURN_FSID && !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle, &cstate->current_fh.fh_handle)) @@ -587,7 +589,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls) rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str)); - nfsd4_cb_layout_fail(ls); + trace_layout_recall_fail(&ls->ls_stid.sc_stateid); printk(KERN_WARNING "nfsd: client %s failed to respond to layout recall. " diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d30bea8d0277..92b9d97aff4f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1237,8 +1237,8 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp, nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp); gdp->gd_notify_types &= ops->notify_types; - exp_put(exp); out: + exp_put(exp); return nfserr; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f6b2a09f793f..326a545ea7b2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1638,7 +1638,7 @@ __destroy_client(struct nfs4_client *clp) nfs4_put_stid(&dp->dl_stid); } while (!list_empty(&clp->cl_revoked)) { - dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); + dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); nfs4_put_stid(&dp->dl_stid); } @@ -3221,7 +3221,7 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, } else nfs4_free_openowner(&oo->oo_owner); spin_unlock(&clp->cl_lock); - return oo; + return ret; } static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { @@ -4932,20 +4932,22 @@ nfs4_transform_lock_offset(struct file_lock *lock) lock->fl_end = OFFSET_MAX; } -static void nfsd4_fl_get_owner(struct file_lock *dst, struct file_lock *src) +static fl_owner_t +nfsd4_fl_get_owner(fl_owner_t owner) { - struct nfs4_lockowner *lo = (struct nfs4_lockowner *)src->fl_owner; - dst->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lo->lo_owner)); + struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner; + + nfs4_get_stateowner(&lo->lo_owner); + return owner; } -static void nfsd4_fl_put_owner(struct file_lock *fl) +static void +nfsd4_fl_put_owner(fl_owner_t owner) { - struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner; + struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner; - if (lo) { + if (lo) nfs4_put_stateowner(&lo->lo_owner); - fl->fl_owner = NULL; - } } static const struct lock_manager_operations nfsd_posix_mng_ops = { @@ -5062,7 +5064,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, } else nfs4_free_lockowner(&lo->lo_owner); spin_unlock(&clp->cl_lock); - return lo; + return ret; } static void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index df5e66caf100..5fb7e78169a6 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1562,7 +1562,11 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, p = xdr_decode_hyper(p, &lgp->lg_seg.offset); p = xdr_decode_hyper(p, &lgp->lg_seg.length); p = xdr_decode_hyper(p, &lgp->lg_minlength); - nfsd4_decode_stateid(argp, &lgp->lg_sid); + + status = nfsd4_decode_stateid(argp, &lgp->lg_sid); + if (status) + return status; + READ_BUF(4); lgp->lg_maxcount = be32_to_cpup(p++); @@ -1580,7 +1584,11 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, p = xdr_decode_hyper(p, &lcp->lc_seg.offset); p = xdr_decode_hyper(p, &lcp->lc_seg.length); lcp->lc_reclaim = be32_to_cpup(p++); - nfsd4_decode_stateid(argp, &lcp->lc_sid); + + status = nfsd4_decode_stateid(argp, &lcp->lc_sid); + if (status) + return status; + READ_BUF(4); lcp->lc_newoffset = be32_to_cpup(p++); if (lcp->lc_newoffset) { @@ -1628,7 +1636,11 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, READ_BUF(16); p = xdr_decode_hyper(p, &lrp->lr_seg.offset); p = xdr_decode_hyper(p, &lrp->lr_seg.length); - nfsd4_decode_stateid(argp, &lrp->lr_sid); + + status = nfsd4_decode_stateid(argp, &lrp->lr_sid); + if (status) + return status; + READ_BUF(4); lrp->lrf_body_len = be32_to_cpup(p++); if (lrp->lrf_body_len > 0) { @@ -4123,7 +4135,7 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr_resource; *p++ = cpu_to_be32(lrp->lrs_present); if (lrp->lrs_present) - nfsd4_encode_stateid(xdr, &lrp->lr_sid); + return nfsd4_encode_stateid(xdr, &lrp->lr_sid); return nfs_ok; } #endif /* CONFIG_NFSD_PNFS */ diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 83a9694ec485..46ec934f5dee 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -165,13 +165,17 @@ int nfsd_reply_cache_init(void) { unsigned int hashsize; unsigned int i; + int status = 0; max_drc_entries = nfsd_cache_size_limit(); atomic_set(&num_drc_entries, 0); hashsize = nfsd_hashsize(max_drc_entries); maskbits = ilog2(hashsize); - register_shrinker(&nfsd_reply_cache_shrinker); + status = register_shrinker(&nfsd_reply_cache_shrinker); + if (status) + return status; + drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), 0, 0, NULL); if (!drc_slab) diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index a8c728acb7a8..54575e3cc1a2 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -143,8 +143,6 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) */ const struct file_operations nilfs_file_operations = { .llseek = generic_file_llseek, - .read = new_sync_read, - .write = new_sync_write, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .unlocked_ioctl = nilfs_ioctl, diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 8b5969538f39..ab4987bc637f 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -26,7 +26,7 @@ #include <linux/mpage.h> #include <linux/pagemap.h> #include <linux/writeback.h> -#include <linux/aio.h> +#include <linux/uio.h> #include "nilfs.h" #include "btnode.h" #include "segment.h" diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 469086b9f99b..0c3f303baf32 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1907,6 +1907,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { struct nilfs_inode_info *ii, *n; + int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE); int defer_iput = false; spin_lock(&nilfs->ns_inode_lock); @@ -1919,10 +1920,10 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, brelse(ii->i_bh); ii->i_bh = NULL; list_del_init(&ii->i_dirty); - if (!ii->vfs_inode.i_nlink) { + if (!ii->vfs_inode.i_nlink || during_mount) { /* - * Defer calling iput() to avoid a deadlock - * over I_SYNC flag for inodes with i_nlink == 0 + * Defer calling iput() to avoid deadlocks if + * i_nlink == 0 or mount is not yet finished. */ list_add_tail(&ii->i_dirty, &sci->sc_iput_queue); defer_iput = true; diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 9a66ff79ff27..d2f97ecca6a5 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -143,7 +143,8 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, !(marks_mask & FS_ISDIR & ~marks_ignored_mask)) return false; - if (event_mask & marks_mask & ~marks_ignored_mask) + if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask & + ~marks_ignored_mask) return true; return false; diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index 36ae529511c4..2ff263e6d363 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile @@ -8,7 +8,7 @@ ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o -ccflags-y := -DNTFS_VERSION=\"2.1.31\" +ccflags-y := -DNTFS_VERSION=\"2.1.32\" ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 1da9b2d184dc..840e95e3f1d2 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -1,7 +1,7 @@ /* * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. + * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc. * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -28,7 +28,6 @@ #include <linux/swap.h> #include <linux/uio.h> #include <linux/writeback.h> -#include <linux/aio.h> #include <asm/page.h> #include <asm/uaccess.h> @@ -329,62 +328,168 @@ err_out: return err; } -/** - * ntfs_fault_in_pages_readable - - * - * Fault a number of userspace pages into pagetables. - * - * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes - * with more than two userspace pages as well as handling the single page case - * elegantly. - * - * If you find this difficult to understand, then think of the while loop being - * the following code, except that we do without the integer variable ret: - * - * do { - * ret = __get_user(c, uaddr); - * uaddr += PAGE_SIZE; - * } while (!ret && uaddr < end); - * - * Note, the final __get_user() may well run out-of-bounds of the user buffer, - * but _not_ out-of-bounds of the page the user buffer belongs to, and since - * this is only a read and not a write, and since it is still in the same page, - * it should not matter and this makes the code much simpler. - */ -static inline void ntfs_fault_in_pages_readable(const char __user *uaddr, - int bytes) +static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos, + size_t *count) { - const char __user *end; - volatile char c; - - /* Set @end to the first byte outside the last page we care about. */ - end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes); - - while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end)) - ; -} - -/** - * ntfs_fault_in_pages_readable_iovec - - * - * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs. - */ -static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov, - size_t iov_ofs, int bytes) -{ - do { - const char __user *buf; - unsigned len; + loff_t pos; + s64 end, ll; + ssize_t err; + unsigned long flags; + struct inode *vi = file_inode(file); + ntfs_inode *base_ni, *ni = NTFS_I(vi); + ntfs_volume *vol = ni->vol; - buf = iov->iov_base + iov_ofs; - len = iov->iov_len - iov_ofs; - if (len > bytes) - len = bytes; - ntfs_fault_in_pages_readable(buf, len); - bytes -= len; - iov++; - iov_ofs = 0; - } while (bytes); + ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " + "0x%llx, count 0x%lx.", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), + (unsigned long long)*ppos, (unsigned long)*count); + /* We can write back this queue in page reclaim. */ + current->backing_dev_info = inode_to_bdi(vi); + err = generic_write_checks(file, ppos, count, S_ISBLK(vi->i_mode)); + if (unlikely(err)) + goto out; + /* + * All checks have passed. Before we start doing any writing we want + * to abort any totally illegal writes. + */ + BUG_ON(NInoMstProtected(ni)); + BUG_ON(ni->type != AT_DATA); + /* If file is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + /* Only $DATA attributes can be encrypted. */ + /* + * Reminder for later: Encrypted files are _always_ + * non-resident so that the content can always be encrypted. + */ + ntfs_debug("Denying write access to encrypted file."); + err = -EACCES; + goto out; + } + if (NInoCompressed(ni)) { + /* Only unnamed $DATA attribute can be compressed. */ + BUG_ON(ni->name_len); + /* + * Reminder for later: If resident, the data is not actually + * compressed. Only on the switch to non-resident does + * compression kick in. This is in contrast to encrypted files + * (see above). + */ + ntfs_error(vi->i_sb, "Writing to compressed files is not " + "implemented yet. Sorry."); + err = -EOPNOTSUPP; + goto out; + } + if (*count == 0) + goto out; + base_ni = ni; + if (NInoAttr(ni)) + base_ni = ni->ext.base_ntfs_ino; + err = file_remove_suid(file); + if (unlikely(err)) + goto out; + /* + * Our ->update_time method always succeeds thus file_update_time() + * cannot fail either so there is no need to check the return code. + */ + file_update_time(file); + pos = *ppos; + /* The first byte after the last cluster being written to. */ + end = (pos + *count + vol->cluster_size_mask) & + ~(u64)vol->cluster_size_mask; + /* + * If the write goes beyond the allocated size, extend the allocation + * to cover the whole of the write, rounded up to the nearest cluster. + */ + read_lock_irqsave(&ni->size_lock, flags); + ll = ni->allocated_size; + read_unlock_irqrestore(&ni->size_lock, flags); + if (end > ll) { + /* + * Extend the allocation without changing the data size. + * + * Note we ensure the allocation is big enough to at least + * write some data but we do not require the allocation to be + * complete, i.e. it may be partial. + */ + ll = ntfs_attr_extend_allocation(ni, end, -1, pos); + if (likely(ll >= 0)) { + BUG_ON(pos >= ll); + /* If the extension was partial truncate the write. */ + if (end > ll) { + ntfs_debug("Truncating write to inode 0x%lx, " + "attribute type 0x%x, because " + "the allocation was only " + "partially extended.", + vi->i_ino, (unsigned) + le32_to_cpu(ni->type)); + *count = ll - pos; + } + } else { + err = ll; + read_lock_irqsave(&ni->size_lock, flags); + ll = ni->allocated_size; + read_unlock_irqrestore(&ni->size_lock, flags); + /* Perform a partial write if possible or fail. */ + if (pos < ll) { + ntfs_debug("Truncating write to inode 0x%lx " + "attribute type 0x%x, because " + "extending the allocation " + "failed (error %d).", + vi->i_ino, (unsigned) + le32_to_cpu(ni->type), + (int)-err); + *count = ll - pos; + } else { + if (err != -ENOSPC) + ntfs_error(vi->i_sb, "Cannot perform " + "write to inode " + "0x%lx, attribute " + "type 0x%x, because " + "extending the " + "allocation failed " + "(error %ld).", + vi->i_ino, (unsigned) + le32_to_cpu(ni->type), + (long)-err); + else + ntfs_debug("Cannot perform write to " + "inode 0x%lx, " + "attribute type 0x%x, " + "because there is not " + "space left.", + vi->i_ino, (unsigned) + le32_to_cpu(ni->type)); + goto out; + } + } + } + /* + * If the write starts beyond the initialized size, extend it up to the + * beginning of the write and initialize all non-sparse space between + * the old initialized size and the new one. This automatically also + * increments the vfs inode->i_size to keep it above or equal to the + * initialized_size. + */ + read_lock_irqsave(&ni->size_lock, flags); + ll = ni->initialized_size; + read_unlock_irqrestore(&ni->size_lock, flags); + if (pos > ll) { + /* + * Wait for ongoing direct i/o to complete before proceeding. + * New direct i/o cannot start as we hold i_mutex. + */ + inode_dio_wait(vi); + err = ntfs_attr_extend_initialized(ni, pos); + if (unlikely(err < 0)) + ntfs_error(vi->i_sb, "Cannot perform write to inode " + "0x%lx, attribute type 0x%x, because " + "extending the initialized size " + "failed (error %d).", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), + (int)-err); + } +out: + return err; } /** @@ -421,8 +526,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping, goto err_out; } } - err = add_to_page_cache_lru(*cached_page, mapping, index, - GFP_KERNEL); + err = add_to_page_cache_lru(*cached_page, mapping, + index, GFP_KERNEL); if (unlikely(err)) { if (err == -EEXIST) continue; @@ -1268,180 +1373,6 @@ rl_not_mapped_enoent: return err; } -/* - * Copy as much as we can into the pages and return the number of bytes which - * were successfully copied. If a fault is encountered then clear the pages - * out to (ofs + bytes) and return the number of bytes which were copied. - */ -static inline size_t ntfs_copy_from_user(struct page **pages, - unsigned nr_pages, unsigned ofs, const char __user *buf, - size_t bytes) -{ - struct page **last_page = pages + nr_pages; - char *addr; - size_t total = 0; - unsigned len; - int left; - - do { - len = PAGE_CACHE_SIZE - ofs; - if (len > bytes) - len = bytes; - addr = kmap_atomic(*pages); - left = __copy_from_user_inatomic(addr + ofs, buf, len); - kunmap_atomic(addr); - if (unlikely(left)) { - /* Do it the slow way. */ - addr = kmap(*pages); - left = __copy_from_user(addr + ofs, buf, len); - kunmap(*pages); - if (unlikely(left)) - goto err_out; - } - total += len; - bytes -= len; - if (!bytes) - break; - buf += len; - ofs = 0; - } while (++pages < last_page); -out: - return total; -err_out: - total += len - left; - /* Zero the rest of the target like __copy_from_user(). */ - while (++pages < last_page) { - bytes -= len; - if (!bytes) - break; - len = PAGE_CACHE_SIZE; - if (len > bytes) - len = bytes; - zero_user(*pages, 0, len); - } - goto out; -} - -static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr, - const struct iovec *iov, size_t iov_ofs, size_t bytes) -{ - size_t total = 0; - - while (1) { - const char __user *buf = iov->iov_base + iov_ofs; - unsigned len; - size_t left; - - len = iov->iov_len - iov_ofs; - if (len > bytes) - len = bytes; - left = __copy_from_user_inatomic(vaddr, buf, len); - total += len; - bytes -= len; - vaddr += len; - if (unlikely(left)) { - total -= left; - break; - } - if (!bytes) - break; - iov++; - iov_ofs = 0; - } - return total; -} - -static inline void ntfs_set_next_iovec(const struct iovec **iovp, - size_t *iov_ofsp, size_t bytes) -{ - const struct iovec *iov = *iovp; - size_t iov_ofs = *iov_ofsp; - - while (bytes) { - unsigned len; - - len = iov->iov_len - iov_ofs; - if (len > bytes) - len = bytes; - bytes -= len; - iov_ofs += len; - if (iov->iov_len == iov_ofs) { - iov++; - iov_ofs = 0; - } - } - *iovp = iov; - *iov_ofsp = iov_ofs; -} - -/* - * This has the same side-effects and return value as ntfs_copy_from_user(). - * The difference is that on a fault we need to memset the remainder of the - * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s - * single-segment behaviour. - * - * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when - * atomic and when not atomic. This is ok because it calls - * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In - * fact, the only difference between __copy_from_user_inatomic() and - * __copy_from_user() is that the latter calls might_sleep() and the former - * should not zero the tail of the buffer on error. And on many architectures - * __copy_from_user_inatomic() is just defined to __copy_from_user() so it - * makes no difference at all on those architectures. - */ -static inline size_t ntfs_copy_from_user_iovec(struct page **pages, - unsigned nr_pages, unsigned ofs, const struct iovec **iov, - size_t *iov_ofs, size_t bytes) -{ - struct page **last_page = pages + nr_pages; - char *addr; - size_t copied, len, total = 0; - - do { - len = PAGE_CACHE_SIZE - ofs; - if (len > bytes) - len = bytes; - addr = kmap_atomic(*pages); - copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs, - *iov, *iov_ofs, len); - kunmap_atomic(addr); - if (unlikely(copied != len)) { - /* Do it the slow way. */ - addr = kmap(*pages); - copied = __ntfs_copy_from_user_iovec_inatomic(addr + - ofs, *iov, *iov_ofs, len); - if (unlikely(copied != len)) - goto err_out; - kunmap(*pages); - } - total += len; - ntfs_set_next_iovec(iov, iov_ofs, len); - bytes -= len; - if (!bytes) - break; - ofs = 0; - } while (++pages < last_page); -out: - return total; -err_out: - BUG_ON(copied > len); - /* Zero the rest of the target like __copy_from_user(). */ - memset(addr + ofs + copied, 0, len - copied); - kunmap(*pages); - total += copied; - ntfs_set_next_iovec(iov, iov_ofs, copied); - while (++pages < last_page) { - bytes -= len; - if (!bytes) - break; - len = PAGE_CACHE_SIZE; - if (len > bytes) - len = bytes; - zero_user(*pages, 0, len); - } - goto out; -} - static inline void ntfs_flush_dcache_pages(struct page **pages, unsigned nr_pages) { @@ -1762,86 +1693,83 @@ err_out: return err; } -static void ntfs_write_failed(struct address_space *mapping, loff_t to) +/* + * Copy as much as we can into the pages and return the number of bytes which + * were successfully copied. If a fault is encountered then clear the pages + * out to (ofs + bytes) and return the number of bytes which were copied. + */ +static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages, + unsigned ofs, struct iov_iter *i, size_t bytes) { - struct inode *inode = mapping->host; + struct page **last_page = pages + nr_pages; + size_t total = 0; + struct iov_iter data = *i; + unsigned len, copied; - if (to > inode->i_size) { - truncate_pagecache(inode, inode->i_size); - ntfs_truncate_vfs(inode); - } + do { + len = PAGE_CACHE_SIZE - ofs; + if (len > bytes) + len = bytes; + copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs, + len); + total += copied; + bytes -= copied; + if (!bytes) + break; + iov_iter_advance(&data, copied); + if (copied < len) + goto err; + ofs = 0; + } while (++pages < last_page); +out: + return total; +err: + /* Zero the rest of the target like __copy_from_user(). */ + len = PAGE_CACHE_SIZE - copied; + do { + if (len > bytes) + len = bytes; + zero_user(*pages, copied, len); + bytes -= len; + copied = 0; + len = PAGE_CACHE_SIZE; + } while (++pages < last_page); + goto out; } /** - * ntfs_file_buffered_write - - * - * Locking: The vfs is holding ->i_mutex on the inode. + * ntfs_perform_write - perform buffered write to a file + * @file: file to write to + * @i: iov_iter with data to write + * @pos: byte offset in file at which to begin writing to */ -static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, - const struct iovec *iov, unsigned long nr_segs, - loff_t pos, loff_t *ppos, size_t count) +static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i, + loff_t pos) { - struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *vi = mapping->host; ntfs_inode *ni = NTFS_I(vi); ntfs_volume *vol = ni->vol; struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER]; struct page *cached_page = NULL; - char __user *buf = NULL; - s64 end, ll; VCN last_vcn; LCN lcn; - unsigned long flags; - size_t bytes, iov_ofs = 0; /* Offset in the current iovec. */ - ssize_t status, written; + size_t bytes; + ssize_t status, written = 0; unsigned nr_pages; - int err; - ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " - "pos 0x%llx, count 0x%lx.", - vi->i_ino, (unsigned)le32_to_cpu(ni->type), - (unsigned long long)pos, (unsigned long)count); - if (unlikely(!count)) - return 0; - BUG_ON(NInoMstProtected(ni)); - /* - * If the attribute is not an index root and it is encrypted or - * compressed, we cannot write to it yet. Note we need to check for - * AT_INDEX_ALLOCATION since this is the type of both directory and - * index inodes. - */ - if (ni->type != AT_INDEX_ALLOCATION) { - /* If file is encrypted, deny access, just like NT4. */ - if (NInoEncrypted(ni)) { - /* - * Reminder for later: Encrypted files are _always_ - * non-resident so that the content can always be - * encrypted. - */ - ntfs_debug("Denying write access to encrypted file."); - return -EACCES; - } - if (NInoCompressed(ni)) { - /* Only unnamed $DATA attribute can be compressed. */ - BUG_ON(ni->type != AT_DATA); - BUG_ON(ni->name_len); - /* - * Reminder for later: If resident, the data is not - * actually compressed. Only on the switch to non- - * resident does compression kick in. This is in - * contrast to encrypted files (see above). - */ - ntfs_error(vi->i_sb, "Writing to compressed files is " - "not implemented yet. Sorry."); - return -EOPNOTSUPP; - } - } + ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " + "0x%llx, count 0x%lx.", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), + (unsigned long long)pos, + (unsigned long)iov_iter_count(i)); /* * If a previous ntfs_truncate() failed, repeat it and abort if it * fails again. */ if (unlikely(NInoTruncateFailed(ni))) { + int err; + inode_dio_wait(vi); err = ntfs_truncate(vi); if (err || NInoTruncateFailed(ni)) { @@ -1855,81 +1783,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, return err; } } - /* The first byte after the write. */ - end = pos + count; - /* - * If the write goes beyond the allocated size, extend the allocation - * to cover the whole of the write, rounded up to the nearest cluster. - */ - read_lock_irqsave(&ni->size_lock, flags); - ll = ni->allocated_size; - read_unlock_irqrestore(&ni->size_lock, flags); - if (end > ll) { - /* Extend the allocation without changing the data size. */ - ll = ntfs_attr_extend_allocation(ni, end, -1, pos); - if (likely(ll >= 0)) { - BUG_ON(pos >= ll); - /* If the extension was partial truncate the write. */ - if (end > ll) { - ntfs_debug("Truncating write to inode 0x%lx, " - "attribute type 0x%x, because " - "the allocation was only " - "partially extended.", - vi->i_ino, (unsigned) - le32_to_cpu(ni->type)); - end = ll; - count = ll - pos; - } - } else { - err = ll; - read_lock_irqsave(&ni->size_lock, flags); - ll = ni->allocated_size; - read_unlock_irqrestore(&ni->size_lock, flags); - /* Perform a partial write if possible or fail. */ - if (pos < ll) { - ntfs_debug("Truncating write to inode 0x%lx, " - "attribute type 0x%x, because " - "extending the allocation " - "failed (error code %i).", - vi->i_ino, (unsigned) - le32_to_cpu(ni->type), err); - end = ll; - count = ll - pos; - } else { - ntfs_error(vol->sb, "Cannot perform write to " - "inode 0x%lx, attribute type " - "0x%x, because extending the " - "allocation failed (error " - "code %i).", vi->i_ino, - (unsigned) - le32_to_cpu(ni->type), err); - return err; - } - } - } - written = 0; - /* - * If the write starts beyond the initialized size, extend it up to the - * beginning of the write and initialize all non-sparse space between - * the old initialized size and the new one. This automatically also - * increments the vfs inode->i_size to keep it above or equal to the - * initialized_size. - */ - read_lock_irqsave(&ni->size_lock, flags); - ll = ni->initialized_size; - read_unlock_irqrestore(&ni->size_lock, flags); - if (pos > ll) { - err = ntfs_attr_extend_initialized(ni, pos); - if (err < 0) { - ntfs_error(vol->sb, "Cannot perform write to inode " - "0x%lx, attribute type 0x%x, because " - "extending the initialized size " - "failed (error code %i).", vi->i_ino, - (unsigned)le32_to_cpu(ni->type), err); - status = err; - goto err_out; - } - } /* * Determine the number of pages per cluster for non-resident * attributes. @@ -1937,10 +1790,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, nr_pages = 1; if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni)) nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT; - /* Finally, perform the actual write. */ last_vcn = -1; - if (likely(nr_segs == 1)) - buf = iov->iov_base; do { VCN vcn; pgoff_t idx, start_idx; @@ -1965,10 +1815,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, vol->cluster_size_bits, false); up_read(&ni->runlist.lock); if (unlikely(lcn < LCN_HOLE)) { - status = -EIO; if (lcn == LCN_ENOMEM) status = -ENOMEM; - else + else { + status = -EIO; ntfs_error(vol->sb, "Cannot " "perform write to " "inode 0x%lx, " @@ -1977,6 +1827,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, "is corrupt.", vi->i_ino, (unsigned) le32_to_cpu(ni->type)); + } break; } if (lcn == LCN_HOLE) { @@ -1989,8 +1840,9 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, } } } - if (bytes > count) - bytes = count; + if (bytes > iov_iter_count(i)) + bytes = iov_iter_count(i); +again: /* * Bring in the user page(s) that we will copy from _first_. * Otherwise there is a nasty deadlock on copying from the same @@ -1999,10 +1851,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, * pages being swapped out between us bringing them into memory * and doing the actual copying. */ - if (likely(nr_segs == 1)) - ntfs_fault_in_pages_readable(buf, bytes); - else - ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes); + if (unlikely(iov_iter_fault_in_multipages_readable(i, bytes))) { + status = -EFAULT; + break; + } /* Get and lock @do_pages starting at index @start_idx. */ status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages, pages, &cached_page); @@ -2018,56 +1870,57 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, status = ntfs_prepare_pages_for_non_resident_write( pages, do_pages, pos, bytes); if (unlikely(status)) { - loff_t i_size; - do { unlock_page(pages[--do_pages]); page_cache_release(pages[do_pages]); } while (do_pages); - /* - * The write preparation may have instantiated - * allocated space outside i_size. Trim this - * off again. We can ignore any errors in this - * case as we will just be waisting a bit of - * allocated space, which is not a disaster. - */ - i_size = i_size_read(vi); - if (pos + bytes > i_size) { - ntfs_write_failed(mapping, pos + bytes); - } break; } } u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index; - if (likely(nr_segs == 1)) { - copied = ntfs_copy_from_user(pages + u, do_pages - u, - ofs, buf, bytes); - buf += copied; - } else - copied = ntfs_copy_from_user_iovec(pages + u, - do_pages - u, ofs, &iov, &iov_ofs, - bytes); + copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs, + i, bytes); ntfs_flush_dcache_pages(pages + u, do_pages - u); - status = ntfs_commit_pages_after_write(pages, do_pages, pos, - bytes); - if (likely(!status)) { - written += copied; - count -= copied; - pos += copied; - if (unlikely(copied != bytes)) - status = -EFAULT; + status = 0; + if (likely(copied == bytes)) { + status = ntfs_commit_pages_after_write(pages, do_pages, + pos, bytes); + if (!status) + status = bytes; } do { unlock_page(pages[--do_pages]); page_cache_release(pages[do_pages]); } while (do_pages); - if (unlikely(status)) + if (unlikely(status < 0)) break; - balance_dirty_pages_ratelimited(mapping); + copied = status; cond_resched(); - } while (count); -err_out: - *ppos = pos; + if (unlikely(!copied)) { + size_t sc; + + /* + * We failed to copy anything. Fall back to single + * segment length write. + * + * This is needed to avoid possible livelock in the + * case that all segments in the iov cannot be copied + * at once without a pagefault. + */ + sc = iov_iter_single_seg_count(i); + if (bytes > sc) + bytes = sc; + goto again; + } + iov_iter_advance(i, copied); + pos += copied; + written += copied; + balance_dirty_pages_ratelimited(mapping); + if (fatal_signal_pending(current)) { + status = -EINTR; + break; + } + } while (iov_iter_count(i)); if (cached_page) page_cache_release(cached_page); ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", @@ -2077,59 +1930,56 @@ err_out: } /** - * ntfs_file_aio_write_nolock - + * ntfs_file_write_iter_nolock - write data to a file + * @iocb: IO state structure (file, offset, etc.) + * @from: iov_iter with data to write + * + * Basically the same as __generic_file_write_iter() except that it ends + * up calling ntfs_perform_write() instead of generic_perform_write() and that + * O_DIRECT is not implemented. */ -static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, - const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) +static ssize_t ntfs_file_write_iter_nolock(struct kiocb *iocb, + struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - loff_t pos; - size_t count; /* after file limit checks */ - ssize_t written, err; + loff_t pos = iocb->ki_pos; + ssize_t written = 0; + ssize_t err; + size_t count = iov_iter_count(from); - count = iov_length(iov, nr_segs); - pos = *ppos; - /* We can write back this queue in page reclaim. */ - current->backing_dev_info = inode_to_bdi(inode); - written = 0; - err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); - if (err) - goto out; - if (!count) - goto out; - err = file_remove_suid(file); - if (err) - goto out; - err = file_update_time(file); - if (err) - goto out; - written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos, - count); -out: + err = ntfs_prepare_file_for_write(file, &pos, &count); + if (count && !err) { + iov_iter_truncate(from, count); + written = ntfs_perform_write(file, from, pos); + if (likely(written >= 0)) + iocb->ki_pos = pos + written; + } current->backing_dev_info = NULL; return written ? written : err; } /** - * ntfs_file_aio_write - + * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock() + * @iocb: IO state structure + * @from: iov_iter with data to write + * + * Basically the same as generic_file_write_iter() except that it ends up + * calling ntfs_file_write_iter_nolock() instead of + * __generic_file_write_iter(). */ -static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; + struct inode *vi = file_inode(file); ssize_t ret; - BUG_ON(iocb->ki_pos != pos); - - mutex_lock(&inode->i_mutex); - ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); - mutex_unlock(&inode->i_mutex); + mutex_lock(&vi->i_mutex); + ret = ntfs_file_write_iter_nolock(iocb, from); + mutex_unlock(&vi->i_mutex); if (ret > 0) { - int err = generic_write_sync(file, iocb->ki_pos - ret, ret); + ssize_t err; + + err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0) ret = err; } @@ -2197,37 +2047,15 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end, #endif /* NTFS_RW */ const struct file_operations ntfs_file_ops = { - .llseek = generic_file_llseek, /* Seek inside file. */ - .read = new_sync_read, /* Read from file. */ - .read_iter = generic_file_read_iter, /* Async read from file. */ + .llseek = generic_file_llseek, + .read_iter = generic_file_read_iter, #ifdef NTFS_RW - .write = do_sync_write, /* Write to file. */ - .aio_write = ntfs_file_aio_write, /* Async write to file. */ - /*.release = ,*/ /* Last file is closed. See - fs/ext2/file.c:: - ext2_release_file() for - how to use this to discard - preallocated space for - write opened files. */ - .fsync = ntfs_file_fsync, /* Sync a file to disk. */ - /*.aio_fsync = ,*/ /* Sync all outstanding async - i/o operations on a - kiocb. */ + .write_iter = ntfs_file_write_iter, + .fsync = ntfs_file_fsync, #endif /* NTFS_RW */ - /*.ioctl = ,*/ /* Perform function on the - mounted filesystem. */ - .mmap = generic_file_mmap, /* Mmap file. */ - .open = ntfs_file_open, /* Open file. */ - .splice_read = generic_file_splice_read /* Zero-copy data send with - the data source being on - the ntfs partition. We do - not need to care about the - data destination. */ - /*.sendpage = ,*/ /* Zero-copy data send with - the data destination being - on the ntfs partition. We - do not need to care about - the data source. */ + .mmap = generic_file_mmap, + .open = ntfs_file_open, + .splice_read = generic_file_splice_read, }; const struct inode_operations ntfs_file_inode_ops = { diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 898b9949d363..1d0c21df0d80 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -28,7 +28,6 @@ #include <linux/quotaops.h> #include <linux/slab.h> #include <linux/log2.h> -#include <linux/aio.h> #include "aops.h" #include "attrib.h" diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 044158bd22be..2d7f76e52c37 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -3370,7 +3370,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path, ret = ocfs2_get_right_path(et, left_path, &right_path); if (ret) { mlog_errno(ret); - goto out; + return ret; } right_el = path_leaf_el(right_path); @@ -3453,8 +3453,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path, subtree_index); } out: - if (right_path) - ocfs2_free_path(right_path); + ocfs2_free_path(right_path); return ret; } @@ -3536,7 +3535,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path, ret = ocfs2_get_left_path(et, right_path, &left_path); if (ret) { mlog_errno(ret); - goto out; + return ret; } left_el = path_leaf_el(left_path); @@ -3647,8 +3646,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path, right_path, subtree_index); } out: - if (left_path) - ocfs2_free_path(left_path); + ocfs2_free_path(left_path); return ret; } @@ -4334,17 +4332,17 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, } else if (path->p_tree_depth > 0) { status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos); if (status) - goto out; + goto exit; if (left_cpos != 0) { left_path = ocfs2_new_path_from_path(path); if (!left_path) - goto out; + goto exit; status = ocfs2_find_path(et->et_ci, left_path, left_cpos); if (status) - goto out; + goto free_left_path; new_el = path_leaf_el(left_path); @@ -4361,7 +4359,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, le16_to_cpu(new_el->l_next_free_rec), le16_to_cpu(new_el->l_count)); status = -EINVAL; - goto out; + goto free_left_path; } rec = &new_el->l_recs[ le16_to_cpu(new_el->l_next_free_rec) - 1]; @@ -4388,18 +4386,18 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, path->p_tree_depth > 0) { status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos); if (status) - goto out; + goto free_left_path; if (right_cpos == 0) - goto out; + goto free_left_path; right_path = ocfs2_new_path_from_path(path); if (!right_path) - goto out; + goto free_left_path; status = ocfs2_find_path(et->et_ci, right_path, right_cpos); if (status) - goto out; + goto free_right_path; new_el = path_leaf_el(right_path); rec = &new_el->l_recs[0]; @@ -4413,7 +4411,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, (unsigned long long)le64_to_cpu(eb->h_blkno), le16_to_cpu(new_el->l_next_free_rec)); status = -EINVAL; - goto out; + goto free_right_path; } rec = &new_el->l_recs[1]; } @@ -4430,12 +4428,11 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, ret = contig_type; } -out: - if (left_path) - ocfs2_free_path(left_path); - if (right_path) - ocfs2_free_path(right_path); - +free_right_path: + ocfs2_free_path(right_path); +free_left_path: + ocfs2_free_path(left_path); +exit: return ret; } @@ -6858,13 +6855,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, if (pages == NULL) { ret = -ENOMEM; mlog_errno(ret); - goto out; + return ret; } ret = ocfs2_reserve_clusters(osb, 1, &data_ac); if (ret) { mlog_errno(ret); - goto out; + goto free_pages; } } @@ -6996,9 +6993,8 @@ out_commit: out: if (data_ac) ocfs2_free_alloc_context(data_ac); - if (pages) - kfree(pages); - +free_pages: + kfree(pages); return ret; } diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 44db1808cdb5..8d2bc840c288 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -29,6 +29,7 @@ #include <linux/mpage.h> #include <linux/quotaops.h> #include <linux/blkdev.h> +#include <linux/uio.h> #include <cluster/masklog.h> @@ -663,6 +664,117 @@ static int ocfs2_is_overwrite(struct ocfs2_super *osb, return 0; } +static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb, + struct inode *inode, loff_t offset, + u64 zero_len, int cluster_align) +{ + u32 p_cpos = 0; + u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode)); + unsigned int num_clusters = 0; + unsigned int ext_flags = 0; + int ret = 0; + + if (offset <= i_size_read(inode) || cluster_align) + return 0; + + ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters, + &ext_flags); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { + u64 s = i_size_read(inode); + sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) + + (do_div(s, osb->s_clustersize) >> 9); + + ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector, + zero_len >> 9, GFP_NOFS, false); + if (ret < 0) + mlog_errno(ret); + } + + return ret; +} + +static int ocfs2_direct_IO_extend_no_holes(struct ocfs2_super *osb, + struct inode *inode, loff_t offset) +{ + u64 zero_start, zero_len, total_zero_len; + u32 p_cpos = 0, clusters_to_add; + u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode)); + unsigned int num_clusters = 0; + unsigned int ext_flags = 0; + u32 size_div, offset_div; + int ret = 0; + + { + u64 o = offset; + u64 s = i_size_read(inode); + + offset_div = do_div(o, osb->s_clustersize); + size_div = do_div(s, osb->s_clustersize); + } + + if (offset <= i_size_read(inode)) + return 0; + + clusters_to_add = ocfs2_bytes_to_clusters(inode->i_sb, offset) - + ocfs2_bytes_to_clusters(inode->i_sb, i_size_read(inode)); + total_zero_len = offset - i_size_read(inode); + if (clusters_to_add) + total_zero_len -= offset_div; + + /* Allocate clusters to fill out holes, and this is only needed + * when we add more than one clusters. Otherwise the cluster will + * be allocated during direct IO */ + if (clusters_to_add > 1) { + ret = ocfs2_extend_allocation(inode, + OCFS2_I(inode)->ip_clusters, + clusters_to_add - 1, 0); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + while (total_zero_len) { + ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters, + &ext_flags); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + zero_start = ocfs2_clusters_to_bytes(osb->sb, p_cpos) + + size_div; + zero_len = ocfs2_clusters_to_bytes(osb->sb, num_clusters) - + size_div; + zero_len = min(total_zero_len, zero_len); + + if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { + ret = blkdev_issue_zeroout(osb->sb->s_bdev, + zero_start >> 9, zero_len >> 9, + GFP_NOFS, false); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + } + + total_zero_len -= zero_len; + v_cpos += ocfs2_bytes_to_clusters(osb->sb, zero_len + size_div); + + /* Only at first iteration can be cluster not aligned. + * So set size_div to 0 for the rest */ + size_div = 0; + } + +out: + return ret; +} + static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) @@ -677,8 +789,8 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, struct buffer_head *di_bh = NULL; size_t count = iter->count; journal_t *journal = osb->journal->j_journal; - u32 zero_len; - int cluster_align; + u64 zero_len_head, zero_len_tail; + int cluster_align_head, cluster_align_tail; loff_t final_size = offset + count; int append_write = offset >= i_size_read(inode) ? 1 : 0; unsigned int num_clusters = 0; @@ -686,9 +798,16 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, { u64 o = offset; + u64 s = i_size_read(inode); + + zero_len_head = do_div(o, 1 << osb->s_clustersize_bits); + cluster_align_head = !zero_len_head; - zero_len = do_div(o, 1 << osb->s_clustersize_bits); - cluster_align = !zero_len; + zero_len_tail = osb->s_clustersize - + do_div(s, osb->s_clustersize); + if ((offset - i_size_read(inode)) < zero_len_tail) + zero_len_tail = offset - i_size_read(inode); + cluster_align_tail = !zero_len_tail; } /* @@ -706,21 +825,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, } if (append_write) { - ret = ocfs2_inode_lock(inode, &di_bh, 1); + ret = ocfs2_inode_lock(inode, NULL, 1); if (ret < 0) { mlog_errno(ret); goto clean_orphan; } + /* zeroing out the previously allocated cluster tail + * that but not zeroed */ if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) - ret = ocfs2_zero_extend(inode, di_bh, offset); + ret = ocfs2_direct_IO_zero_extend(osb, inode, offset, + zero_len_tail, cluster_align_tail); else - ret = ocfs2_extend_no_holes(inode, di_bh, offset, + ret = ocfs2_direct_IO_extend_no_holes(osb, inode, offset); if (ret < 0) { mlog_errno(ret); ocfs2_inode_unlock(inode, 1); - brelse(di_bh); goto clean_orphan; } @@ -728,13 +849,10 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, if (is_overwrite < 0) { mlog_errno(is_overwrite); ocfs2_inode_unlock(inode, 1); - brelse(di_bh); goto clean_orphan; } ocfs2_inode_unlock(inode, 1); - brelse(di_bh); - di_bh = NULL; } written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev, @@ -771,15 +889,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, if (ret < 0) mlog_errno(ret); } - } else if (written < 0 && append_write && !is_overwrite && - !cluster_align) { + } else if (written > 0 && append_write && !is_overwrite && + !cluster_align_head) { + /* zeroing out the allocated cluster head */ u32 p_cpos = 0; u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); + ret = ocfs2_inode_lock(inode, NULL, 0); + if (ret < 0) { + mlog_errno(ret); + goto clean_orphan; + } + ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters, &ext_flags); if (ret < 0) { mlog_errno(ret); + ocfs2_inode_unlock(inode, 0); goto clean_orphan; } @@ -787,9 +913,11 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, ret = blkdev_issue_zeroout(osb->sb->s_bdev, p_cpos << (osb->s_clustersize_bits - 9), - zero_len >> 9, GFP_KERNEL, false); + zero_len_head >> 9, GFP_NOFS, false); if (ret < 0) mlog_errno(ret); + + ocfs2_inode_unlock(inode, 0); } clean_orphan: diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index 6cae155d54df..dd59599b022d 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h @@ -22,7 +22,7 @@ #ifndef OCFS2_AOPS_H #define OCFS2_AOPS_H -#include <linux/aio.h> +#include <linux/fs.h> handle_t *ocfs2_start_walk_page_trans(struct inode *inode, struct page *page, diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 16eff45727ee..8e19b9d7aba8 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1312,7 +1312,9 @@ static int o2hb_debug_init(void) int ret = -ENOMEM; o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); - if (!o2hb_debug_dir) { + if (IS_ERR_OR_NULL(o2hb_debug_dir)) { + ret = o2hb_debug_dir ? + PTR_ERR(o2hb_debug_dir) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -1325,7 +1327,9 @@ static int o2hb_debug_init(void) sizeof(o2hb_live_node_bitmap), O2NM_MAX_NODES, o2hb_live_node_bitmap); - if (!o2hb_debug_livenodes) { + if (IS_ERR_OR_NULL(o2hb_debug_livenodes)) { + ret = o2hb_debug_livenodes ? + PTR_ERR(o2hb_debug_livenodes) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -1338,7 +1342,9 @@ static int o2hb_debug_init(void) sizeof(o2hb_live_region_bitmap), O2NM_MAX_REGIONS, o2hb_live_region_bitmap); - if (!o2hb_debug_liveregions) { + if (IS_ERR_OR_NULL(o2hb_debug_liveregions)) { + ret = o2hb_debug_liveregions ? + PTR_ERR(o2hb_debug_liveregions) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -1352,7 +1358,9 @@ static int o2hb_debug_init(void) sizeof(o2hb_quorum_region_bitmap), O2NM_MAX_REGIONS, o2hb_quorum_region_bitmap); - if (!o2hb_debug_quorumregions) { + if (IS_ERR_OR_NULL(o2hb_debug_quorumregions)) { + ret = o2hb_debug_quorumregions ? + PTR_ERR(o2hb_debug_quorumregions) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -1366,7 +1374,9 @@ static int o2hb_debug_init(void) sizeof(o2hb_failed_region_bitmap), O2NM_MAX_REGIONS, o2hb_failed_region_bitmap); - if (!o2hb_debug_failedregions) { + if (IS_ERR_OR_NULL(o2hb_debug_failedregions)) { + ret = o2hb_debug_failedregions ? + PTR_ERR(o2hb_debug_failedregions) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -2000,7 +2010,8 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) reg->hr_debug_dir = debugfs_create_dir(config_item_name(®->hr_item), dir); - if (!reg->hr_debug_dir) { + if (IS_ERR_OR_NULL(reg->hr_debug_dir)) { + ret = reg->hr_debug_dir ? PTR_ERR(reg->hr_debug_dir) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -2013,7 +2024,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) O2HB_DB_TYPE_REGION_LIVENODES, sizeof(reg->hr_live_node_bitmap), O2NM_MAX_NODES, reg); - if (!reg->hr_debug_livenodes) { + if (IS_ERR_OR_NULL(reg->hr_debug_livenodes)) { + ret = reg->hr_debug_livenodes ? + PTR_ERR(reg->hr_debug_livenodes) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -2025,7 +2038,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) sizeof(*(reg->hr_db_regnum)), O2HB_DB_TYPE_REGION_NUMBER, 0, O2NM_MAX_NODES, reg); - if (!reg->hr_debug_regnum) { + if (IS_ERR_OR_NULL(reg->hr_debug_regnum)) { + ret = reg->hr_debug_regnum ? + PTR_ERR(reg->hr_debug_regnum) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -2037,7 +2052,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) sizeof(*(reg->hr_db_elapsed_time)), O2HB_DB_TYPE_REGION_ELAPSED_TIME, 0, 0, reg); - if (!reg->hr_debug_elapsed_time) { + if (IS_ERR_OR_NULL(reg->hr_debug_elapsed_time)) { + ret = reg->hr_debug_elapsed_time ? + PTR_ERR(reg->hr_debug_elapsed_time) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -2049,13 +2066,16 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) sizeof(*(reg->hr_db_pinned)), O2HB_DB_TYPE_REGION_PINNED, 0, 0, reg); - if (!reg->hr_debug_pinned) { + if (IS_ERR_OR_NULL(reg->hr_debug_pinned)) { + ret = reg->hr_debug_pinned ? + PTR_ERR(reg->hr_debug_pinned) : -ENOMEM; mlog_errno(ret); goto bail; } - ret = 0; + return 0; bail: + debugfs_remove_recursive(reg->hr_debug_dir); return ret; } diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 2260fb9e6508..7fdc25a4d8c0 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -196,13 +196,14 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; } \ } while (0) -#define mlog_errno(st) do { \ +#define mlog_errno(st) ({ \ int _st = (st); \ if (_st != -ERESTARTSYS && _st != -EINTR && \ _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \ _st != -EDQUOT) \ mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ -} while (0) + _st; \ +}) #define mlog_bug_on_msg(cond, fmt, args...) do { \ if (cond) { \ diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index b08050bd3f2e..ccd4dcfc3645 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -18,7 +18,7 @@ * * linux/fs/minix/dir.c * - * Copyright (C) 1991, 1992 Linux Torvalds + * Copyright (C) 1991, 1992 Linus Torvalds * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public @@ -2047,22 +2047,19 @@ int ocfs2_check_dir_for_entry(struct inode *dir, const char *name, int namelen) { - int ret; + int ret = 0; struct ocfs2_dir_lookup_result lookup = { NULL, }; trace_ocfs2_check_dir_for_entry( (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); - ret = -EEXIST; - if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) - goto bail; + if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) { + ret = -EEXIST; + mlog_errno(ret); + } - ret = 0; -bail: ocfs2_free_dir_lookup_result(&lookup); - if (ret) - mlog_errno(ret); return ret; } diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 11849a44dc5a..956edf67be20 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1391,6 +1391,11 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb, int noqueue_attempted = 0; int dlm_locked = 0; + if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) { + mlog_errno(-EINVAL); + return -EINVAL; + } + ocfs2_init_mask_waiter(&mw); if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) @@ -2954,7 +2959,7 @@ static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) osb->osb_debug_root, osb, &ocfs2_dlm_debug_fops); - if (!dlm_debug->d_locking_state) { + if (IS_ERR_OR_NULL(dlm_debug->d_locking_state)) { ret = -EINVAL; mlog(ML_ERROR, "Unable to create locking state debugfs file.\n"); diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 29651167190d..540dc4bdd042 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -82,7 +82,6 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, } status = ocfs2_test_inode_bit(osb, blkno, &set); - trace_ocfs2_get_dentry_test_bit(status, set); if (status < 0) { if (status == -EINVAL) { /* @@ -96,6 +95,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, goto unlock_nfs_sync; } + trace_ocfs2_get_dentry_test_bit(status, set); /* If the inode allocator bit is clear, this inode must be stale */ if (!set) { status = -ESTALE; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 46e0d4e857c7..8c48e989beba 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2280,7 +2280,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, file->f_path.dentry->d_name.name, (unsigned int)from->nr_segs); /* GRRRRR */ - if (iocb->ki_nbytes == 0) + if (count == 0) return 0; appending = file->f_flags & O_APPEND ? 1 : 0; @@ -2330,8 +2330,7 @@ relock: } can_do_direct = direct_io; - ret = ocfs2_prepare_inode_for_write(file, ppos, - iocb->ki_nbytes, appending, + ret = ocfs2_prepare_inode_for_write(file, ppos, count, appending, &can_do_direct, &has_refcount); if (ret < 0) { mlog_errno(ret); @@ -2339,8 +2338,7 @@ relock: } if (direct_io && !is_sync_kiocb(iocb)) - unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_nbytes, - *ppos); + unaligned_dio = ocfs2_is_io_unaligned(inode, count, *ppos); /* * We can't complete the direct I/O as requested, fall back to @@ -2394,7 +2392,6 @@ relock: /* * for completing the rest of the request. */ - *ppos += written; count -= written; written_buffered = generic_perform_write(file, from, *ppos); /* @@ -2409,7 +2406,6 @@ relock: goto out_dio; } - iocb->ki_pos = *ppos + written_buffered; /* We need to ensure that the page cache pages are written to * disk and invalidated to preserve the expected O_DIRECT * semantics. @@ -2418,6 +2414,7 @@ relock: ret = filemap_write_and_wait_range(file->f_mapping, *ppos, endbyte); if (ret == 0) { + iocb->ki_pos = *ppos + written_buffered; written += written_buffered; invalidate_mapping_pages(mapping, *ppos >> PAGE_CACHE_SHIFT, @@ -2440,10 +2437,14 @@ out_dio: /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); + if (unlikely(written <= 0)) + goto no_sync; + if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || ((file->f_flags & O_DIRECT) && !direct_io)) { - ret = filemap_fdatawrite_range(file->f_mapping, *ppos, - *ppos + count - 1); + ret = filemap_fdatawrite_range(file->f_mapping, + iocb->ki_pos - written, + iocb->ki_pos - 1); if (ret < 0) written = ret; @@ -2454,10 +2455,12 @@ out_dio: } if (!ret) - ret = filemap_fdatawait_range(file->f_mapping, *ppos, - *ppos + count - 1); + ret = filemap_fdatawait_range(file->f_mapping, + iocb->ki_pos - written, + iocb->ki_pos - 1); } +no_sync: /* * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io * function pointer which is called when o_direct io completes so that @@ -2678,8 +2681,6 @@ const struct inode_operations ocfs2_special_file_iops = { */ const struct file_operations ocfs2_fops = { .llseek = ocfs2_file_llseek, - .read = new_sync_read, - .write = new_sync_write, .mmap = ocfs2_mmap, .fsync = ocfs2_sync_file, .release = ocfs2_file_release, @@ -2726,8 +2727,6 @@ const struct file_operations ocfs2_dops = { */ const struct file_operations ocfs2_fops_no_plocks = { .llseek = ocfs2_file_llseek, - .read = new_sync_read, - .write = new_sync_write, .mmap = ocfs2_mmap, .fsync = ocfs2_sync_file, .release = ocfs2_file_release, diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 3025c0da6b8a..be71ca0937f7 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -624,7 +624,7 @@ static int ocfs2_remove_inode(struct inode *inode, ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, le16_to_cpu(di->i_suballoc_slot)); if (!inode_alloc_inode) { - status = -EEXIST; + status = -ENOENT; mlog_errno(status); goto bail; } @@ -742,7 +742,7 @@ static int ocfs2_wipe_inode(struct inode *inode, ORPHAN_DIR_SYSTEM_INODE, orphaned_slot); if (!orphan_dir_inode) { - status = -EEXIST; + status = -ENOENT; mlog_errno(status); goto bail; } diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 044013455621..857bbbcd39f3 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -666,7 +666,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, if (le32_to_cpu(alloc->id1.bitmap1.i_used) != ocfs2_local_alloc_count_bits(alloc)) { ocfs2_error(osb->sb, "local alloc inode %llu says it has " - "%u free bits, but a count shows %u", + "%u used bits, but a count shows %u", (unsigned long long)le64_to_cpu(alloc->i_blkno), le32_to_cpu(alloc->id1.bitmap1.i_used), ocfs2_local_alloc_count_bits(alloc)); @@ -839,7 +839,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, u32 *numbits, struct ocfs2_alloc_reservation *resv) { - int numfound, bitoff, left, startoff, lastzero; + int numfound = 0, bitoff, left, startoff, lastzero; int local_resv = 0; struct ocfs2_alloc_reservation r; void *bitmap = NULL; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b5c3a5ea3ee6..09f90cbf0e24 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -2322,10 +2322,10 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, trace_ocfs2_orphan_del( (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, - name, namelen); + name, strlen(name)); /* find it's spot in the orphan directory */ - status = ocfs2_find_entry(name, namelen, orphan_dir_inode, + status = ocfs2_find_entry(name, strlen(name), orphan_dir_inode, &lookup); if (status) { mlog_errno(status); @@ -2808,7 +2808,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, ORPHAN_DIR_SYSTEM_INODE, osb->slot_num); if (!orphan_dir_inode) { - status = -EEXIST; + status = -ENOENT; mlog_errno(status); goto leave; } diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 8490c64d34fe..460c6c37e683 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -502,7 +502,7 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb) static inline int ocfs2_supports_append_dio(struct ocfs2_super *osb) { - if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_APPEND_DIO) + if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_APPEND_DIO) return 1; return 0; } diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 20e37a3ed26f..db64ce2d4667 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -102,11 +102,11 @@ | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \ - | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO) + | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO \ + | OCFS2_FEATURE_INCOMPAT_APPEND_DIO) #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ - | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA \ - | OCFS2_FEATURE_RO_COMPAT_APPEND_DIO) + | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) /* * Heartbeat-only devices are missing journals and other files. The @@ -179,6 +179,11 @@ #define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000 /* + * Append Direct IO support + */ +#define OCFS2_FEATURE_INCOMPAT_APPEND_DIO 0x8000 + +/* * backup superblock flag is used to indicate that this volume * has backup superblocks. */ @@ -200,10 +205,6 @@ #define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002 #define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004 -/* - * Append Direct IO support - */ -#define OCFS2_FEATURE_RO_COMPAT_APPEND_DIO 0x0008 /* The byte offset of the first backup block will be 1G. * The following will be 4G, 16G, 64G, 256G and 1T. diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index ee541f92dab4..df3a500789c7 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4276,7 +4276,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, error = posix_acl_create(dir, &mode, &default_acl, &acl); if (error) { mlog_errno(error); - goto out; + return error; } error = ocfs2_create_inode_in_orphan(dir, mode, diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index d5493e361a38..e78a203d44c8 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -427,7 +427,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) if (!si) { status = -ENOMEM; mlog_errno(status); - goto bail; + return status; } si->si_extended = ocfs2_uses_extended_slot_map(osb); @@ -452,7 +452,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) osb->slot_info = (struct ocfs2_slot_info *)si; bail: - if (status < 0 && si) + if (status < 0) __ocfs2_free_slot_info(si); return status; diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 1724d43d3da1..220cae7bbdbc 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -295,7 +295,7 @@ static int o2cb_cluster_check(void) set_bit(node_num, netmap); if (!memcmp(hbmap, netmap, sizeof(hbmap))) return 0; - if (i < O2CB_MAP_STABILIZE_COUNT) + if (i < O2CB_MAP_STABILIZE_COUNT - 1) msleep(1000); } diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 720aa389e0ea..2768eb1da2b8 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -1004,10 +1004,8 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn) BUG_ON(conn == NULL); lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); - if (!lc) { - rc = -ENOMEM; - goto out; - } + if (!lc) + return -ENOMEM; init_waitqueue_head(&lc->oc_wait); init_completion(&lc->oc_sync_wait); @@ -1063,7 +1061,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn) } out: - if (rc && lc) + if (rc) kfree(lc); return rc; } diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 0cb889a17ae1..4479029630bb 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -2499,6 +2499,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle, alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); + ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh, + start_bit, count); goto bail; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 26675185b886..837ddce4b659 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1112,7 +1112,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, ocfs2_debugfs_root); - if (!osb->osb_debug_root) { + if (IS_ERR_OR_NULL(osb->osb_debug_root)) { status = -EINVAL; mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n"); goto read_super_error; @@ -1122,7 +1122,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->osb_debug_root, osb, &ocfs2_osb_debug_fops); - if (!osb->osb_ctxt) { + if (IS_ERR_OR_NULL(osb->osb_ctxt)) { status = -EINVAL; mlog_errno(status); goto read_super_error; @@ -1606,8 +1606,9 @@ static int __init ocfs2_init(void) } ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); - if (!ocfs2_debugfs_root) { - status = -ENOMEM; + if (IS_ERR_OR_NULL(ocfs2_debugfs_root)) { + status = ocfs2_debugfs_root ? + PTR_ERR(ocfs2_debugfs_root) : -ENOMEM; mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); goto out4; } @@ -2069,6 +2070,8 @@ static int ocfs2_initialize_super(struct super_block *sb, cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); + memcpy(sb->s_uuid, di->id2.i_super.s_uuid, + sizeof(di->id2.i_super.s_uuid)); osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; @@ -2333,7 +2336,7 @@ static int ocfs2_initialize_super(struct super_block *sb, mlog_errno(status); goto bail; } - cleancache_init_shared_fs((char *)&di->id2.i_super.s_uuid, sb); + cleancache_init_shared_fs(sb); bail: return status; @@ -2563,22 +2566,22 @@ static void ocfs2_handle_error(struct super_block *sb) ocfs2_set_ro_flag(osb, 0); } -static char error_buf[1024]; - -void __ocfs2_error(struct super_block *sb, - const char *function, - const char *fmt, ...) +void __ocfs2_error(struct super_block *sb, const char *function, + const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - vsnprintf(error_buf, sizeof(error_buf), fmt, args); - va_end(args); + vaf.fmt = fmt; + vaf.va = &args; /* Not using mlog here because we want to show the actual * function the error came from. */ - printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %s\n", - sb->s_id, function, error_buf); + printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %pV\n", + sb->s_id, function, &vaf); + + va_end(args); ocfs2_handle_error(sb); } @@ -2586,18 +2589,21 @@ void __ocfs2_error(struct super_block *sb, /* Handle critical errors. This is intentionally more drastic than * ocfs2_handle_error, so we only use for things like journal errors, * etc. */ -void __ocfs2_abort(struct super_block* sb, - const char *function, +void __ocfs2_abort(struct super_block *sb, const char *function, const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - vsnprintf(error_buf, sizeof(error_buf), fmt, args); - va_end(args); - printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n", - sb->s_id, function, error_buf); + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_CRIT "OCFS2: abort (device %s): %s: %pV\n", + sb->s_id, function, &vaf); + + va_end(args); /* We don't have the cluster support yet to go straight to * hard readonly in here. Until then, we want to keep diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 85b190dc132f..4ca7533be479 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1238,6 +1238,10 @@ static int ocfs2_xattr_block_get(struct inode *inode, i, &block_off, &name_offset); + if (ret) { + mlog_errno(ret); + goto cleanup; + } xs->base = bucket_block(xs->bucket, block_off); } if (ocfs2_xattr_is_local(xs->here)) { @@ -5665,6 +5669,10 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, &xv, NULL); + if (ret) { + mlog_errno(ret); + break; + } ret = ocfs2_lock_xattr_remove_allocators(inode, xv, args->ref_ci, diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 902e88527fce..f993be7f2156 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -337,8 +337,6 @@ static sector_t omfs_bmap(struct address_space *mapping, sector_t block) const struct file_operations omfs_file_operations = { .llseek = generic_file_llseek, - .read = new_sync_read, - .write = new_sync_write, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, diff --git a/fs/open.c b/fs/open.c index 33f9cbf2610b..6796f04d6032 100644 --- a/fs/open.c +++ b/fs/open.c @@ -570,6 +570,7 @@ static int chown_common(struct path *path, uid_t user, gid_t group) uid = make_kuid(current_user_ns(), user); gid = make_kgid(current_user_ns(), group); +retry_deleg: newattrs.ia_valid = ATTR_CTIME; if (user != (uid_t) -1) { if (!uid_valid(uid)) @@ -586,7 +587,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group) if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; -retry_deleg: mutex_lock(&inode->i_mutex); error = security_path_chown(path, uid, gid); if (!error) @@ -734,10 +734,10 @@ static int do_dentry_open(struct file *f, if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(inode); if ((f->f_mode & FMODE_READ) && - likely(f->f_op->read || f->f_op->aio_read || f->f_op->read_iter)) + likely(f->f_op->read || f->f_op->read_iter)) f->f_mode |= FMODE_CAN_READ; if ((f->f_mode & FMODE_WRITE) && - likely(f->f_op->write || f->f_op->aio_write || f->f_op->write_iter)) + likely(f->f_op->write || f->f_op->write_iter)) f->f_mode |= FMODE_CAN_WRITE; f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); @@ -988,9 +988,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, return ERR_PTR(err); if (flags & O_CREAT) return ERR_PTR(-EINVAL); - if (!filename && (flags & O_DIRECTORY)) - if (!dentry->d_inode->i_op->lookup) - return ERR_PTR(-ENOTDIR); return do_file_open_root(dentry, mnt, filename, &op); } EXPORT_SYMBOL(file_open_root); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index b90952f528b1..5f0d1993e6e3 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -529,8 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data) { struct ovl_fs *ufs = sb->s_fs_info; - if (!(*flags & MS_RDONLY) && - (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY))) + if (!(*flags & MS_RDONLY) && !ufs->upper_mnt) return -EROFS; return 0; @@ -615,9 +614,19 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) break; default: + pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); return -EINVAL; } } + + /* Workdir is useless in non-upper mount */ + if (!config->upperdir && config->workdir) { + pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", + config->workdir); + kfree(config->workdir); + config->workdir = NULL; + } + return 0; } @@ -837,7 +846,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_stack_depth = 0; if (ufs->config.upperdir) { - /* FIXME: workdir is not needed for a R/O mount */ if (!ufs->config.workdir) { pr_err("overlayfs: missing 'workdir'\n"); goto out_free_config; @@ -847,6 +855,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (err) goto out_free_config; + /* Upper fs should not be r/o */ + if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) { + pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); + err = -EINVAL; + goto out_put_upperpath; + } + err = ovl_mount_dir(ufs->config.workdir, &workpath); if (err) goto out_put_upperpath; @@ -869,8 +884,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) err = -EINVAL; stacklen = ovl_split_lowerdirs(lowertmp); - if (stacklen > OVL_MAX_STACK) + if (stacklen > OVL_MAX_STACK) { + pr_err("overlayfs: too many lower directries, limit is %d\n", + OVL_MAX_STACK); goto out_free_lowertmp; + } else if (!ufs->config.upperdir && stacklen == 1) { + pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); + goto out_free_lowertmp; + } stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); if (!stack) @@ -932,8 +953,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) ufs->numlower++; } - /* If the upper fs is r/o or nonexistent, we mark overlayfs r/o too */ - if (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)) + /* If the upper fs is nonexistent, we mark overlayfs r/o too */ + if (!ufs->upper_mnt) sb->s_flags |= MS_RDONLY; sb->s_d_op = &ovl_dentry_operations; diff --git a/fs/pipe.c b/fs/pipe.c index 21981e58e2a6..822da5b7cff0 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -21,7 +21,6 @@ #include <linux/audit.h> #include <linux/syscalls.h> #include <linux/fcntl.h> -#include <linux/aio.h> #include <asm/uaccess.h> #include <asm/ioctls.h> @@ -947,9 +946,7 @@ err: const struct file_operations pipefifo_fops = { .open = fifo_open, .llseek = no_llseek, - .read = new_sync_read, .read_iter = pipe_read, - .write = new_sync_write, .write_iter = pipe_write, .poll = pipe_poll, .unlocked_ioctl = pipe_ioctl, diff --git a/fs/proc/array.c b/fs/proc/array.c index 1295a00ca316..fd02a9ebfc30 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -99,8 +99,8 @@ static inline void task_name(struct seq_file *m, struct task_struct *p) buf = m->buf + m->count; /* Ignore error for now */ - string_escape_str(tcomm, &buf, m->size - m->count, - ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\"); + buf += string_escape_str(tcomm, buf, m->size - m->count, + ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\"); m->count = buf - m->buf; seq_putc(m, '\n'); @@ -188,6 +188,24 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, from_kgid_munged(user_ns, GROUP_AT(group_info, g))); put_cred(cred); +#ifdef CONFIG_PID_NS + seq_puts(m, "\nNStgid:"); + for (g = ns->level; g <= pid->level; g++) + seq_printf(m, "\t%d", + task_tgid_nr_ns(p, pid->numbers[g].ns)); + seq_puts(m, "\nNSpid:"); + for (g = ns->level; g <= pid->level; g++) + seq_printf(m, "\t%d", + task_pid_nr_ns(p, pid->numbers[g].ns)); + seq_puts(m, "\nNSpgid:"); + for (g = ns->level; g <= pid->level; g++) + seq_printf(m, "\t%d", + task_pgrp_nr_ns(p, pid->numbers[g].ns)); + seq_puts(m, "\nNSsid:"); + for (g = ns->level; g <= pid->level; g++) + seq_printf(m, "\t%d", + task_session_nr_ns(p, pid->numbers[g].ns)); +#endif seq_putc(m, '\n'); } @@ -614,7 +632,9 @@ static int children_seq_show(struct seq_file *seq, void *v) pid_t pid; pid = pid_nr_ns(v, inode->i_sb->s_fs_info); - return seq_printf(seq, "%d ", pid); + seq_printf(seq, "%d ", pid); + + return 0; } static void *children_seq_start(struct seq_file *seq, loff_t *pos) diff --git a/fs/proc/base.c b/fs/proc/base.c index 3f3d7aeb0712..7a3b82f986dd 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -238,13 +238,15 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, wchan = get_wchan(task); - if (lookup_symbol_name(wchan, symname) < 0) + if (lookup_symbol_name(wchan, symname) < 0) { if (!ptrace_may_access(task, PTRACE_MODE_READ)) return 0; - else - return seq_printf(m, "%lu", wchan); - else - return seq_printf(m, "%s", symname); + seq_printf(m, "%lu", wchan); + } else { + seq_printf(m, "%s", symname); + } + + return 0; } #endif /* CONFIG_KALLSYMS */ @@ -309,10 +311,12 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { - return seq_printf(m, "%llu %llu %lu\n", - (unsigned long long)task->se.sum_exec_runtime, - (unsigned long long)task->sched_info.run_delay, - task->sched_info.pcount); + seq_printf(m, "%llu %llu %lu\n", + (unsigned long long)task->se.sum_exec_runtime, + (unsigned long long)task->sched_info.run_delay, + task->sched_info.pcount); + + return 0; } #endif @@ -387,7 +391,9 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, points = oom_badness(task, NULL, NULL, totalpages) * 1000 / totalpages; read_unlock(&tasklist_lock); - return seq_printf(m, "%lu\n", points); + seq_printf(m, "%lu\n", points); + + return 0; } struct limit_names { @@ -432,15 +438,15 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns, * print the file header */ seq_printf(m, "%-25s %-20s %-20s %-10s\n", - "Limit", "Soft Limit", "Hard Limit", "Units"); + "Limit", "Soft Limit", "Hard Limit", "Units"); for (i = 0; i < RLIM_NLIMITS; i++) { if (rlim[i].rlim_cur == RLIM_INFINITY) seq_printf(m, "%-25s %-20s ", - lnames[i].name, "unlimited"); + lnames[i].name, "unlimited"); else seq_printf(m, "%-25s %-20lu ", - lnames[i].name, rlim[i].rlim_cur); + lnames[i].name, rlim[i].rlim_cur); if (rlim[i].rlim_max == RLIM_INFINITY) seq_printf(m, "%-20s ", "unlimited"); @@ -462,7 +468,9 @@ static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns, { long nr; unsigned long args[6], sp, pc; - int res = lock_trace(task); + int res; + + res = lock_trace(task); if (res) return res; @@ -477,7 +485,8 @@ static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns, args[0], args[1], args[2], args[3], args[4], args[5], sp, pc); unlock_trace(task); - return res; + + return 0; } #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ @@ -2002,12 +2011,13 @@ static int show_timer(struct seq_file *m, void *v) notify = timer->it_sigev_notify; seq_printf(m, "ID: %d\n", timer->it_id); - seq_printf(m, "signal: %d/%p\n", timer->sigq->info.si_signo, - timer->sigq->info.si_value.sival_ptr); + seq_printf(m, "signal: %d/%p\n", + timer->sigq->info.si_signo, + timer->sigq->info.si_value.sival_ptr); seq_printf(m, "notify: %s/%s.%d\n", - nstr[notify & ~SIGEV_THREAD_ID], - (notify & SIGEV_THREAD_ID) ? "tid" : "pid", - pid_nr_ns(timer->it_pid, tp->ns)); + nstr[notify & ~SIGEV_THREAD_ID], + (notify & SIGEV_THREAD_ID) ? "tid" : "pid", + pid_nr_ns(timer->it_pid, tp->ns)); seq_printf(m, "ClockID: %d\n", timer->it_clock); return 0; @@ -2352,21 +2362,23 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh unlock_task_sighand(task, &flags); } - result = seq_printf(m, - "rchar: %llu\n" - "wchar: %llu\n" - "syscr: %llu\n" - "syscw: %llu\n" - "read_bytes: %llu\n" - "write_bytes: %llu\n" - "cancelled_write_bytes: %llu\n", - (unsigned long long)acct.rchar, - (unsigned long long)acct.wchar, - (unsigned long long)acct.syscr, - (unsigned long long)acct.syscw, - (unsigned long long)acct.read_bytes, - (unsigned long long)acct.write_bytes, - (unsigned long long)acct.cancelled_write_bytes); + seq_printf(m, + "rchar: %llu\n" + "wchar: %llu\n" + "syscr: %llu\n" + "syscw: %llu\n" + "read_bytes: %llu\n" + "write_bytes: %llu\n" + "cancelled_write_bytes: %llu\n", + (unsigned long long)acct.rchar, + (unsigned long long)acct.wchar, + (unsigned long long)acct.syscr, + (unsigned long long)acct.syscw, + (unsigned long long)acct.read_bytes, + (unsigned long long)acct.write_bytes, + (unsigned long long)acct.cancelled_write_bytes); + result = 0; + out_unlock: mutex_unlock(&task->signal->cred_guard_mutex); return result; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 956b75d61809..6dee68d013ff 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1325,6 +1325,9 @@ out: static int pagemap_open(struct inode *inode, struct file *file) { + /* do not disclose physical addresses: attack vector */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about " "to stop being page-shift some time soon. See the " "linux/Documentation/vm/pagemap.txt for details.\n"); diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index b32ce53d24ee..56e1ffda4d89 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -364,6 +364,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, case PSTORE_TYPE_PMSG: scnprintf(name, sizeof(name), "pmsg-%s-%lld", psname, id); break; + case PSTORE_TYPE_PPC_OPAL: + sprintf(name, "powerpc-opal-%s-%lld", psname, id); + break; case PSTORE_TYPE_UNKNOWN: scnprintf(name, sizeof(name), "unknown-%s-%lld", psname, id); break; diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 39d1373128e9..44a549beeafa 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -539,6 +539,9 @@ static int ramoops_probe(struct platform_device *pdev) mem_address = pdata->mem_address; record_size = pdata->record_size; dump_oops = pdata->dump_oops; + ramoops_console_size = pdata->console_size; + ramoops_pmsg_size = pdata->pmsg_size; + ramoops_ftrace_size = pdata->ftrace_size; pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n", cxt->size, (unsigned long long)cxt->phys_addr, diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 4f56de822d2f..183a212694bf 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -31,9 +31,7 @@ #include "internal.h" const struct file_operations ramfs_file_operations = { - .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = new_sync_write, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .fsync = noop_fsync, diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index f6ab41b39612..0b38befa69f3 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -44,9 +44,7 @@ const struct file_operations ramfs_file_operations = { .mmap_capabilities = ramfs_mmap_capabilities, .mmap = ramfs_nommu_mmap, .get_unmapped_area = ramfs_nommu_get_unmapped_area, - .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = new_sync_write, .write_iter = generic_file_write_iter, .fsync = noop_fsync, .splice_read = generic_file_splice_read, diff --git a/fs/read_write.c b/fs/read_write.c index 8e1b68786d66..45d583c33879 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -9,7 +9,6 @@ #include <linux/fcntl.h> #include <linux/file.h> #include <linux/uio.h> -#include <linux/aio.h> #include <linux/fsnotify.h> #include <linux/security.h> #include <linux/export.h> @@ -23,13 +22,10 @@ #include <asm/unistd.h> typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); -typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *, - unsigned long, loff_t); typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *); const struct file_operations generic_ro_fops = { .llseek = generic_file_llseek, - .read = new_sync_read, .read_iter = generic_file_read_iter, .mmap = generic_file_readonly_mmap, .splice_read = generic_file_splice_read, @@ -343,13 +339,10 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos) init_sync_kiocb(&kiocb, file); kiocb.ki_pos = *ppos; - kiocb.ki_nbytes = iov_iter_count(iter); iter->type |= READ; ret = file->f_op->read_iter(&kiocb, iter); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); - + BUG_ON(ret == -EIOCBQUEUED); if (ret > 0) *ppos = kiocb.ki_pos; return ret; @@ -366,13 +359,10 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos) init_sync_kiocb(&kiocb, file); kiocb.ki_pos = *ppos; - kiocb.ki_nbytes = iov_iter_count(iter); iter->type |= WRITE; ret = file->f_op->write_iter(&kiocb, iter); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); - + BUG_ON(ret == -EIOCBQUEUED); if (ret > 0) *ppos = kiocb.ki_pos; return ret; @@ -418,26 +408,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; } -ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) -{ - struct iovec iov = { .iov_base = buf, .iov_len = len }; - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, filp); - kiocb.ki_pos = *ppos; - kiocb.ki_nbytes = len; - - ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); - *ppos = kiocb.ki_pos; - return ret; -} - -EXPORT_SYMBOL(do_sync_read); - -ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) +static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) { struct iovec iov = { .iov_base = buf, .iov_len = len }; struct kiocb kiocb; @@ -446,34 +417,25 @@ ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *p init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - kiocb.ki_nbytes = len; iov_iter_init(&iter, READ, &iov, 1, len); ret = filp->f_op->read_iter(&kiocb, &iter); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } -EXPORT_SYMBOL(new_sync_read); - ssize_t __vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { - ssize_t ret; - if (file->f_op->read) - ret = file->f_op->read(file, buf, count, pos); - else if (file->f_op->aio_read) - ret = do_sync_read(file, buf, count, pos); + return file->f_op->read(file, buf, count, pos); else if (file->f_op->read_iter) - ret = new_sync_read(file, buf, count, pos); + return new_sync_read(file, buf, count, pos); else - ret = -EINVAL; - - return ret; + return -EINVAL; } +EXPORT_SYMBOL(__vfs_read); ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { @@ -502,26 +464,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) EXPORT_SYMBOL(vfs_read); -ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) -{ - struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, filp); - kiocb.ki_pos = *ppos; - kiocb.ki_nbytes = len; - - ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); - *ppos = kiocb.ki_pos; - return ret; -} - -EXPORT_SYMBOL(do_sync_write); - -ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) +static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) { struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; struct kiocb kiocb; @@ -530,17 +473,25 @@ ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, lo init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - kiocb.ki_nbytes = len; iov_iter_init(&iter, WRITE, &iov, 1, len); ret = filp->f_op->write_iter(&kiocb, &iter); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } -EXPORT_SYMBOL(new_sync_write); +ssize_t __vfs_write(struct file *file, const char __user *p, size_t count, + loff_t *pos) +{ + if (file->f_op->write) + return file->f_op->write(file, p, count, pos); + else if (file->f_op->write_iter) + return new_sync_write(file, p, count, pos); + else + return -EINVAL; +} +EXPORT_SYMBOL(__vfs_write); ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) { @@ -556,12 +507,7 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t p = (__force const char __user *)buf; if (count > MAX_RW_COUNT) count = MAX_RW_COUNT; - if (file->f_op->write) - ret = file->f_op->write(file, p, count, pos); - else if (file->f_op->aio_write) - ret = do_sync_write(file, p, count, pos); - else - ret = new_sync_write(file, p, count, pos); + ret = __vfs_write(file, p, count, pos); set_fs(old_fs); if (ret > 0) { fsnotify_modify(file); @@ -588,12 +534,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ if (ret >= 0) { count = ret; file_start_write(file); - if (file->f_op->write) - ret = file->f_op->write(file, buf, count, pos); - else if (file->f_op->aio_write) - ret = do_sync_write(file, buf, count, pos); - else - ret = new_sync_write(file, buf, count, pos); + ret = __vfs_write(file, buf, count, pos); if (ret > 0) { fsnotify_modify(file); add_wchar(current, ret); @@ -710,60 +651,32 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) } EXPORT_SYMBOL(iov_shorten); -static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov, - unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn) +static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, + loff_t *ppos, iter_fn_t fn) { struct kiocb kiocb; - struct iov_iter iter; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - kiocb.ki_nbytes = len; - iov_iter_init(&iter, rw, iov, nr_segs, len); - ret = fn(&kiocb, &iter); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); - *ppos = kiocb.ki_pos; - return ret; -} - -static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, - unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) -{ - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, filp); - kiocb.ki_pos = *ppos; - kiocb.ki_nbytes = len; - - ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); + ret = fn(&kiocb, iter); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } /* Do it by hand, with file-ops */ -static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, - unsigned long nr_segs, loff_t *ppos, io_fn_t fn) +static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, + loff_t *ppos, io_fn_t fn) { - struct iovec *vector = iov; ssize_t ret = 0; - while (nr_segs > 0) { - void __user *base; - size_t len; + while (iov_iter_count(iter)) { + struct iovec iovec = iov_iter_iovec(iter); ssize_t nr; - base = vector->iov_base; - len = vector->iov_len; - vector++; - nr_segs--; - - nr = fn(filp, base, len, ppos); + nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos); if (nr < 0) { if (!ret) @@ -771,8 +684,9 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, break; } ret += nr; - if (nr != len) + if (nr != iovec.iov_len) break; + iov_iter_advance(iter, nr); } return ret; @@ -863,48 +777,42 @@ static ssize_t do_readv_writev(int type, struct file *file, size_t tot_len; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; + struct iov_iter iter; ssize_t ret; io_fn_t fn; - iov_fn_t fnv; iter_fn_t iter_fn; - ret = rw_copy_check_uvector(type, uvector, nr_segs, - ARRAY_SIZE(iovstack), iovstack, &iov); - if (ret <= 0) - goto out; + ret = import_iovec(type, uvector, nr_segs, + ARRAY_SIZE(iovstack), &iov, &iter); + if (ret < 0) + return ret; - tot_len = ret; + tot_len = iov_iter_count(&iter); + if (!tot_len) + goto out; ret = rw_verify_area(type, file, pos, tot_len); if (ret < 0) goto out; - fnv = NULL; if (type == READ) { fn = file->f_op->read; - fnv = file->f_op->aio_read; iter_fn = file->f_op->read_iter; } else { fn = (io_fn_t)file->f_op->write; - fnv = file->f_op->aio_write; iter_fn = file->f_op->write_iter; file_start_write(file); } if (iter_fn) - ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, - pos, iter_fn); - else if (fnv) - ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, - pos, fnv); + ret = do_iter_readv_writev(file, &iter, pos, iter_fn); else - ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); + ret = do_loop_readv_writev(file, &iter, pos, fn); if (type != READ) file_end_write(file); out: - if (iov != iovstack) - kfree(iov); + kfree(iov); if ((ret + (type == READ)) > 0) { if (type == READ) fsnotify_access(file); @@ -1043,48 +951,42 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, compat_ssize_t tot_len; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; + struct iov_iter iter; ssize_t ret; io_fn_t fn; - iov_fn_t fnv; iter_fn_t iter_fn; - ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, - UIO_FASTIOV, iovstack, &iov); - if (ret <= 0) - goto out; + ret = compat_import_iovec(type, uvector, nr_segs, + UIO_FASTIOV, &iov, &iter); + if (ret < 0) + return ret; - tot_len = ret; + tot_len = iov_iter_count(&iter); + if (!tot_len) + goto out; ret = rw_verify_area(type, file, pos, tot_len); if (ret < 0) goto out; - fnv = NULL; if (type == READ) { fn = file->f_op->read; - fnv = file->f_op->aio_read; iter_fn = file->f_op->read_iter; } else { fn = (io_fn_t)file->f_op->write; - fnv = file->f_op->aio_write; iter_fn = file->f_op->write_iter; file_start_write(file); } if (iter_fn) - ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, - pos, iter_fn); - else if (fnv) - ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, - pos, fnv); + ret = do_iter_readv_writev(file, &iter, pos, iter_fn); else - ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); + ret = do_loop_readv_writev(file, &iter, pos, fn); if (type != READ) file_end_write(file); out: - if (iov != iovstack) - kfree(iov); + kfree(iov); if ((ret + (type == READ)) > 0) { if (type == READ) fsnotify_access(file); diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 751dd3f4346b..96a1bcf33db4 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -243,8 +243,6 @@ drop_write_lock: } const struct file_operations reiserfs_file_operations = { - .read = new_sync_read, - .write = new_sync_write, .unlocked_ioctl = reiserfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = reiserfs_compat_ioctl, diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index e72401e1f995..9312b7842e03 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -18,7 +18,7 @@ #include <linux/writeback.h> #include <linux/quotaops.h> #include <linux/swap.h> -#include <linux/aio.h> +#include <linux/uio.h> int reiserfs_commit_write(struct file *f, struct page *page, unsigned from, unsigned to); diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c index 7da9e2153953..1118a0dc6b45 100644 --- a/fs/romfs/mmap-nommu.c +++ b/fs/romfs/mmap-nommu.c @@ -81,7 +81,6 @@ static unsigned romfs_mmap_capabilities(struct file *file) const struct file_operations romfs_ro_fops = { .llseek = generic_file_llseek, - .read = new_sync_read, .read_iter = generic_file_read_iter, .splice_read = generic_file_splice_read, .mmap = romfs_mmap, diff --git a/fs/splice.c b/fs/splice.c index 7968da96bebb..476024bb6546 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -32,7 +32,6 @@ #include <linux/gfp.h> #include <linux/socket.h> #include <linux/compat.h> -#include <linux/aio.h> #include "internal.h" /* @@ -524,6 +523,9 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, loff_t isize, left; int ret; + if (IS_DAX(in->f_mapping->host)) + return default_file_splice_read(in, ppos, pipe, len, flags); + isize = i_size_read(in->f_mapping->host); if (unlikely(*ppos >= isize)) return 0; @@ -1534,34 +1536,29 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov, struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; - ssize_t count; pipe = get_pipe_info(file); if (!pipe) return -EBADF; - ret = rw_copy_check_uvector(READ, uiov, nr_segs, - ARRAY_SIZE(iovstack), iovstack, &iov); - if (ret <= 0) - goto out; - - count = ret; - iov_iter_init(&iter, READ, iov, nr_segs, count); + ret = import_iovec(READ, uiov, nr_segs, + ARRAY_SIZE(iovstack), &iov, &iter); + if (ret < 0) + return ret; + sd.total_len = iov_iter_count(&iter); sd.len = 0; - sd.total_len = count; sd.flags = flags; sd.u.data = &iter; sd.pos = 0; - pipe_lock(pipe); - ret = __splice_from_pipe(pipe, &sd, pipe_to_user); - pipe_unlock(pipe); - -out: - if (iov != iovstack) - kfree(iov); + if (sd.total_len) { + pipe_lock(pipe); + ret = __splice_from_pipe(pipe, &sd, pipe_to_user); + pipe_unlock(pipe); + } + kfree(iov); return ret; } diff --git a/fs/stat.c b/fs/stat.c index ae0c3cef9927..19636af5e75c 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -66,7 +66,7 @@ int vfs_getattr(struct path *path, struct kstat *stat) { int retval; - retval = security_inode_getattr(path->mnt, path->dentry); + retval = security_inode_getattr(path); if (retval) return retval; return vfs_getattr_nosec(path, stat); diff --git a/fs/super.c b/fs/super.c index 2b7dc90ccdbb..928c20f47af9 100644 --- a/fs/super.c +++ b/fs/super.c @@ -224,7 +224,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) s->s_maxbytes = MAX_NON_LFS; s->s_op = &default_op; s->s_time_gran = 1000000000; - s->cleancache_poolid = -1; + s->cleancache_poolid = CLEANCACHE_NO_POOL; s->s_shrink.seeks = DEFAULT_SEEKS; s->s_shrink.scan_objects = super_cache_scan; diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 2554d8835b48..b400c04371f0 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -41,7 +41,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, if (grp->attrs) { for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) { - umode_t mode = 0; + umode_t mode = (*attr)->mode; /* * In update mode, we're changing the permissions or @@ -55,9 +55,14 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, if (!mode) continue; } + + WARN(mode & ~(SYSFS_PREALLOC | 0664), + "Attribute %s: Invalid permissions 0%o\n", + (*attr)->name, mode); + + mode &= SYSFS_PREALLOC | 0664; error = sysfs_add_file_mode_ns(parent, *attr, false, - (*attr)->mode | mode, - NULL); + mode, NULL); if (unlikely(error)) break; } diff --git a/fs/sysv/file.c b/fs/sysv/file.c index b00811c75b24..a48e30410ad1 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -21,9 +21,7 @@ */ const struct file_operations sysv_file_operations = { .llseek = generic_file_llseek, - .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = new_sync_write, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .fsync = generic_file_fsync, diff --git a/fs/tracefs/Makefile b/fs/tracefs/Makefile new file mode 100644 index 000000000000..82fa35b656c4 --- /dev/null +++ b/fs/tracefs/Makefile @@ -0,0 +1,4 @@ +tracefs-objs := inode.o + +obj-$(CONFIG_TRACING) += tracefs.o + diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c new file mode 100644 index 000000000000..d92bdf3b079a --- /dev/null +++ b/fs/tracefs/inode.c @@ -0,0 +1,650 @@ +/* + * inode.c - part of tracefs, a pseudo file system for activating tracing + * + * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com> + * + * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * tracefs is the file system that is used by the tracing infrastructure. + * + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/kobject.h> +#include <linux/namei.h> +#include <linux/tracefs.h> +#include <linux/fsnotify.h> +#include <linux/seq_file.h> +#include <linux/parser.h> +#include <linux/magic.h> +#include <linux/slab.h> + +#define TRACEFS_DEFAULT_MODE 0700 + +static struct vfsmount *tracefs_mount; +static int tracefs_mount_count; +static bool tracefs_registered; + +static ssize_t default_read_file(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return 0; +} + +static ssize_t default_write_file(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + return count; +} + +static const struct file_operations tracefs_file_operations = { + .read = default_read_file, + .write = default_write_file, + .open = simple_open, + .llseek = noop_llseek, +}; + +static struct tracefs_dir_ops { + int (*mkdir)(const char *name); + int (*rmdir)(const char *name); +} tracefs_ops; + +static char *get_dname(struct dentry *dentry) +{ + const char *dname; + char *name; + int len = dentry->d_name.len; + + dname = dentry->d_name.name; + name = kmalloc(len + 1, GFP_KERNEL); + if (!name) + return NULL; + memcpy(name, dname, len); + name[len] = 0; + return name; +} + +static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode) +{ + char *name; + int ret; + + name = get_dname(dentry); + if (!name) + return -ENOMEM; + + /* + * The mkdir call can call the generic functions that create + * the files within the tracefs system. It is up to the individual + * mkdir routine to handle races. + */ + mutex_unlock(&inode->i_mutex); + ret = tracefs_ops.mkdir(name); + mutex_lock(&inode->i_mutex); + + kfree(name); + + return ret; +} + +static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry) +{ + char *name; + int ret; + + name = get_dname(dentry); + if (!name) + return -ENOMEM; + + /* + * The rmdir call can call the generic functions that create + * the files within the tracefs system. It is up to the individual + * rmdir routine to handle races. + * This time we need to unlock not only the parent (inode) but + * also the directory that is being deleted. + */ + mutex_unlock(&inode->i_mutex); + mutex_unlock(&dentry->d_inode->i_mutex); + + ret = tracefs_ops.rmdir(name); + + mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); + mutex_lock(&dentry->d_inode->i_mutex); + + kfree(name); + + return ret; +} + +static const struct inode_operations tracefs_dir_inode_operations = { + .lookup = simple_lookup, + .mkdir = tracefs_syscall_mkdir, + .rmdir = tracefs_syscall_rmdir, +}; + +static struct inode *tracefs_get_inode(struct super_block *sb) +{ + struct inode *inode = new_inode(sb); + if (inode) { + inode->i_ino = get_next_ino(); + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + } + return inode; +} + +struct tracefs_mount_opts { + kuid_t uid; + kgid_t gid; + umode_t mode; +}; + +enum { + Opt_uid, + Opt_gid, + Opt_mode, + Opt_err +}; + +static const match_table_t tokens = { + {Opt_uid, "uid=%u"}, + {Opt_gid, "gid=%u"}, + {Opt_mode, "mode=%o"}, + {Opt_err, NULL} +}; + +struct tracefs_fs_info { + struct tracefs_mount_opts mount_opts; +}; + +static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) +{ + substring_t args[MAX_OPT_ARGS]; + int option; + int token; + kuid_t uid; + kgid_t gid; + char *p; + + opts->mode = TRACEFS_DEFAULT_MODE; + + while ((p = strsep(&data, ",")) != NULL) { + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_uid: + if (match_int(&args[0], &option)) + return -EINVAL; + uid = make_kuid(current_user_ns(), option); + if (!uid_valid(uid)) + return -EINVAL; + opts->uid = uid; + break; + case Opt_gid: + if (match_int(&args[0], &option)) + return -EINVAL; + gid = make_kgid(current_user_ns(), option); + if (!gid_valid(gid)) + return -EINVAL; + opts->gid = gid; + break; + case Opt_mode: + if (match_octal(&args[0], &option)) + return -EINVAL; + opts->mode = option & S_IALLUGO; + break; + /* + * We might like to report bad mount options here; + * but traditionally tracefs has ignored all mount options + */ + } + } + + return 0; +} + +static int tracefs_apply_options(struct super_block *sb) +{ + struct tracefs_fs_info *fsi = sb->s_fs_info; + struct inode *inode = sb->s_root->d_inode; + struct tracefs_mount_opts *opts = &fsi->mount_opts; + + inode->i_mode &= ~S_IALLUGO; + inode->i_mode |= opts->mode; + + inode->i_uid = opts->uid; + inode->i_gid = opts->gid; + + return 0; +} + +static int tracefs_remount(struct super_block *sb, int *flags, char *data) +{ + int err; + struct tracefs_fs_info *fsi = sb->s_fs_info; + + sync_filesystem(sb); + err = tracefs_parse_options(data, &fsi->mount_opts); + if (err) + goto fail; + + tracefs_apply_options(sb); + +fail: + return err; +} + +static int tracefs_show_options(struct seq_file *m, struct dentry *root) +{ + struct tracefs_fs_info *fsi = root->d_sb->s_fs_info; + struct tracefs_mount_opts *opts = &fsi->mount_opts; + + if (!uid_eq(opts->uid, GLOBAL_ROOT_UID)) + seq_printf(m, ",uid=%u", + from_kuid_munged(&init_user_ns, opts->uid)); + if (!gid_eq(opts->gid, GLOBAL_ROOT_GID)) + seq_printf(m, ",gid=%u", + from_kgid_munged(&init_user_ns, opts->gid)); + if (opts->mode != TRACEFS_DEFAULT_MODE) + seq_printf(m, ",mode=%o", opts->mode); + + return 0; +} + +static const struct super_operations tracefs_super_operations = { + .statfs = simple_statfs, + .remount_fs = tracefs_remount, + .show_options = tracefs_show_options, +}; + +static int trace_fill_super(struct super_block *sb, void *data, int silent) +{ + static struct tree_descr trace_files[] = {{""}}; + struct tracefs_fs_info *fsi; + int err; + + save_mount_options(sb, data); + + fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL); + sb->s_fs_info = fsi; + if (!fsi) { + err = -ENOMEM; + goto fail; + } + + err = tracefs_parse_options(data, &fsi->mount_opts); + if (err) + goto fail; + + err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files); + if (err) + goto fail; + + sb->s_op = &tracefs_super_operations; + + tracefs_apply_options(sb); + + return 0; + +fail: + kfree(fsi); + sb->s_fs_info = NULL; + return err; +} + +static struct dentry *trace_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data) +{ + return mount_single(fs_type, flags, data, trace_fill_super); +} + +static struct file_system_type trace_fs_type = { + .owner = THIS_MODULE, + .name = "tracefs", + .mount = trace_mount, + .kill_sb = kill_litter_super, +}; +MODULE_ALIAS_FS("tracefs"); + +static struct dentry *start_creating(const char *name, struct dentry *parent) +{ + struct dentry *dentry; + int error; + + pr_debug("tracefs: creating file '%s'\n",name); + + error = simple_pin_fs(&trace_fs_type, &tracefs_mount, + &tracefs_mount_count); + if (error) + return ERR_PTR(error); + + /* If the parent is not specified, we create it in the root. + * We need the root dentry to do this, which is in the super + * block. A pointer to that is in the struct vfsmount that we + * have around. + */ + if (!parent) + parent = tracefs_mount->mnt_root; + + mutex_lock(&parent->d_inode->i_mutex); + dentry = lookup_one_len(name, parent, strlen(name)); + if (!IS_ERR(dentry) && dentry->d_inode) { + dput(dentry); + dentry = ERR_PTR(-EEXIST); + } + if (IS_ERR(dentry)) + mutex_unlock(&parent->d_inode->i_mutex); + return dentry; +} + +static struct dentry *failed_creating(struct dentry *dentry) +{ + mutex_unlock(&dentry->d_parent->d_inode->i_mutex); + dput(dentry); + simple_release_fs(&tracefs_mount, &tracefs_mount_count); + return NULL; +} + +static struct dentry *end_creating(struct dentry *dentry) +{ + mutex_unlock(&dentry->d_parent->d_inode->i_mutex); + return dentry; +} + +/** + * tracefs_create_file - create a file in the tracefs filesystem + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have. + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is NULL, then the + * file will be created in the root of the tracefs filesystem. + * @data: a pointer to something that the caller will want to get to later + * on. The inode.i_private pointer will point to this value on + * the open() call. + * @fops: a pointer to a struct file_operations that should be used for + * this file. + * + * This is the basic "create a file" function for tracefs. It allows for a + * wide range of flexibility in creating a file, or a directory (if you want + * to create a directory, the tracefs_create_dir() function is + * recommended to be used instead.) + * + * This function will return a pointer to a dentry if it succeeds. This + * pointer must be passed to the tracefs_remove() function when the file is + * to be removed (no automatic cleanup happens if your module is unloaded, + * you are responsible here.) If an error occurs, %NULL will be returned. + * + * If tracefs is not enabled in the kernel, the value -%ENODEV will be + * returned. + */ +struct dentry *tracefs_create_file(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops) +{ + struct dentry *dentry; + struct inode *inode; + + if (!(mode & S_IFMT)) + mode |= S_IFREG; + BUG_ON(!S_ISREG(mode)); + dentry = start_creating(name, parent); + + if (IS_ERR(dentry)) + return NULL; + + inode = tracefs_get_inode(dentry->d_sb); + if (unlikely(!inode)) + return failed_creating(dentry); + + inode->i_mode = mode; + inode->i_fop = fops ? fops : &tracefs_file_operations; + inode->i_private = data; + d_instantiate(dentry, inode); + fsnotify_create(dentry->d_parent->d_inode, dentry); + return end_creating(dentry); +} + +static struct dentry *__create_dir(const char *name, struct dentry *parent, + const struct inode_operations *ops) +{ + struct dentry *dentry = start_creating(name, parent); + struct inode *inode; + + if (IS_ERR(dentry)) + return NULL; + + inode = tracefs_get_inode(dentry->d_sb); + if (unlikely(!inode)) + return failed_creating(dentry); + + inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + inode->i_op = ops; + inode->i_fop = &simple_dir_operations; + + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inc_nlink(inode); + d_instantiate(dentry, inode); + inc_nlink(dentry->d_parent->d_inode); + fsnotify_mkdir(dentry->d_parent->d_inode, dentry); + return end_creating(dentry); +} + +/** + * tracefs_create_dir - create a directory in the tracefs filesystem + * @name: a pointer to a string containing the name of the directory to + * create. + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is NULL, then the + * directory will be created in the root of the tracefs filesystem. + * + * This function creates a directory in tracefs with the given name. + * + * This function will return a pointer to a dentry if it succeeds. This + * pointer must be passed to the tracefs_remove() function when the file is + * to be removed. If an error occurs, %NULL will be returned. + * + * If tracing is not enabled in the kernel, the value -%ENODEV will be + * returned. + */ +struct dentry *tracefs_create_dir(const char *name, struct dentry *parent) +{ + return __create_dir(name, parent, &simple_dir_inode_operations); +} + +/** + * tracefs_create_instance_dir - create the tracing instances directory + * @name: The name of the instances directory to create + * @parent: The parent directory that the instances directory will exist + * @mkdir: The function to call when a mkdir is performed. + * @rmdir: The function to call when a rmdir is performed. + * + * Only one instances directory is allowed. + * + * The instances directory is special as it allows for mkdir and rmdir to + * to be done by userspace. When a mkdir or rmdir is performed, the inode + * locks are released and the methhods passed in (@mkdir and @rmdir) are + * called without locks and with the name of the directory being created + * within the instances directory. + * + * Returns the dentry of the instances directory. + */ +struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent, + int (*mkdir)(const char *name), + int (*rmdir)(const char *name)) +{ + struct dentry *dentry; + + /* Only allow one instance of the instances directory. */ + if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir)) + return NULL; + + dentry = __create_dir(name, parent, &tracefs_dir_inode_operations); + if (!dentry) + return NULL; + + tracefs_ops.mkdir = mkdir; + tracefs_ops.rmdir = rmdir; + + return dentry; +} + +static inline int tracefs_positive(struct dentry *dentry) +{ + return dentry->d_inode && !d_unhashed(dentry); +} + +static int __tracefs_remove(struct dentry *dentry, struct dentry *parent) +{ + int ret = 0; + + if (tracefs_positive(dentry)) { + if (dentry->d_inode) { + dget(dentry); + switch (dentry->d_inode->i_mode & S_IFMT) { + case S_IFDIR: + ret = simple_rmdir(parent->d_inode, dentry); + break; + default: + simple_unlink(parent->d_inode, dentry); + break; + } + if (!ret) + d_delete(dentry); + dput(dentry); + } + } + return ret; +} + +/** + * tracefs_remove - removes a file or directory from the tracefs filesystem + * @dentry: a pointer to a the dentry of the file or directory to be + * removed. + * + * This function removes a file or directory in tracefs that was previously + * created with a call to another tracefs function (like + * tracefs_create_file() or variants thereof.) + */ +void tracefs_remove(struct dentry *dentry) +{ + struct dentry *parent; + int ret; + + if (IS_ERR_OR_NULL(dentry)) + return; + + parent = dentry->d_parent; + if (!parent || !parent->d_inode) + return; + + mutex_lock(&parent->d_inode->i_mutex); + ret = __tracefs_remove(dentry, parent); + mutex_unlock(&parent->d_inode->i_mutex); + if (!ret) + simple_release_fs(&tracefs_mount, &tracefs_mount_count); +} + +/** + * tracefs_remove_recursive - recursively removes a directory + * @dentry: a pointer to a the dentry of the directory to be removed. + * + * This function recursively removes a directory tree in tracefs that + * was previously created with a call to another tracefs function + * (like tracefs_create_file() or variants thereof.) + */ +void tracefs_remove_recursive(struct dentry *dentry) +{ + struct dentry *child, *parent; + + if (IS_ERR_OR_NULL(dentry)) + return; + + parent = dentry->d_parent; + if (!parent || !parent->d_inode) + return; + + parent = dentry; + down: + mutex_lock(&parent->d_inode->i_mutex); + loop: + /* + * The parent->d_subdirs is protected by the d_lock. Outside that + * lock, the child can be unlinked and set to be freed which can + * use the d_u.d_child as the rcu head and corrupt this list. + */ + spin_lock(&parent->d_lock); + list_for_each_entry(child, &parent->d_subdirs, d_child) { + if (!tracefs_positive(child)) + continue; + + /* perhaps simple_empty(child) makes more sense */ + if (!list_empty(&child->d_subdirs)) { + spin_unlock(&parent->d_lock); + mutex_unlock(&parent->d_inode->i_mutex); + parent = child; + goto down; + } + + spin_unlock(&parent->d_lock); + + if (!__tracefs_remove(child, parent)) + simple_release_fs(&tracefs_mount, &tracefs_mount_count); + + /* + * The parent->d_lock protects agaist child from unlinking + * from d_subdirs. When releasing the parent->d_lock we can + * no longer trust that the next pointer is valid. + * Restart the loop. We'll skip this one with the + * tracefs_positive() check. + */ + goto loop; + } + spin_unlock(&parent->d_lock); + + mutex_unlock(&parent->d_inode->i_mutex); + child = parent; + parent = parent->d_parent; + mutex_lock(&parent->d_inode->i_mutex); + + if (child != dentry) + /* go up */ + goto loop; + + if (!__tracefs_remove(child, parent)) + simple_release_fs(&tracefs_mount, &tracefs_mount_count); + mutex_unlock(&parent->d_inode->i_mutex); +} + +/** + * tracefs_initialized - Tells whether tracefs has been registered + */ +bool tracefs_initialized(void) +{ + return tracefs_registered; +} + +static struct kobject *trace_kobj; + +static int __init tracefs_init(void) +{ + int retval; + + trace_kobj = kobject_create_and_add("tracing", kernel_kobj); + if (!trace_kobj) + return -EINVAL; + + retval = register_filesystem(&trace_fs_type); + if (!retval) + tracefs_registered = true; + + return retval; +} +core_initcall(tracefs_init); diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index eb997e9c4ab0..11a11b32a2a9 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -509,7 +509,7 @@ again: c->bi.nospace_rp = 1; smp_wmb(); } else - ubifs_err("cannot budget space, error %d", err); + ubifs_err(c, "cannot budget space, error %d", err); return err; } diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index 26b69b2d4a45..63f56619991d 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -225,7 +225,7 @@ out_cancel: out_up: up_write(&c->commit_sem); out: - ubifs_err("commit failed, error %d", err); + ubifs_err(c, "commit failed, error %d", err); spin_lock(&c->cs_lock); c->cmt_state = COMMIT_BROKEN; wake_up(&c->cmt_wq); @@ -289,7 +289,7 @@ int ubifs_bg_thread(void *info) int err; struct ubifs_info *c = info; - ubifs_msg("background thread \"%s\" started, PID %d", + ubifs_msg(c, "background thread \"%s\" started, PID %d", c->bgt_name, current->pid); set_freezable(); @@ -324,7 +324,7 @@ int ubifs_bg_thread(void *info) cond_resched(); } - ubifs_msg("background thread \"%s\" stops", c->bgt_name); + ubifs_msg(c, "background thread \"%s\" stops", c->bgt_name); return 0; } @@ -712,13 +712,13 @@ out: return 0; out_dump: - ubifs_err("dumping index node (iip=%d)", i->iip); + ubifs_err(c, "dumping index node (iip=%d)", i->iip); ubifs_dump_node(c, idx); list_del(&i->list); kfree(i); if (!list_empty(&list)) { i = list_entry(list.prev, struct idx_node, list); - ubifs_err("dumping parent index node"); + ubifs_err(c, "dumping parent index node"); ubifs_dump_node(c, &i->idx); } out_free: @@ -727,7 +727,7 @@ out_free: list_del(&i->list); kfree(i); } - ubifs_err("failed, error %d", err); + ubifs_err(c, "failed, error %d", err); if (err > 0) err = -EINVAL; return err; diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c index 2bfa0953335d..565cb56d7225 100644 --- a/fs/ubifs/compress.c +++ b/fs/ubifs/compress.c @@ -92,8 +92,8 @@ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; * Note, if the input buffer was not compressed, it is copied to the output * buffer and %UBIFS_COMPR_NONE is returned in @compr_type. */ -void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, - int *compr_type) +void ubifs_compress(const struct ubifs_info *c, const void *in_buf, + int in_len, void *out_buf, int *out_len, int *compr_type) { int err; struct ubifs_compressor *compr = ubifs_compressors[*compr_type]; @@ -112,9 +112,9 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, if (compr->comp_mutex) mutex_unlock(compr->comp_mutex); if (unlikely(err)) { - ubifs_warn("cannot compress %d bytes, compressor %s, error %d, leave data uncompressed", + ubifs_warn(c, "cannot compress %d bytes, compressor %s, error %d, leave data uncompressed", in_len, compr->name, err); - goto no_compr; + goto no_compr; } /* @@ -144,21 +144,21 @@ no_compr: * The length of the uncompressed data is returned in @out_len. This functions * returns %0 on success or a negative error code on failure. */ -int ubifs_decompress(const void *in_buf, int in_len, void *out_buf, - int *out_len, int compr_type) +int ubifs_decompress(const struct ubifs_info *c, const void *in_buf, + int in_len, void *out_buf, int *out_len, int compr_type) { int err; struct ubifs_compressor *compr; if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) { - ubifs_err("invalid compression type %d", compr_type); + ubifs_err(c, "invalid compression type %d", compr_type); return -EINVAL; } compr = ubifs_compressors[compr_type]; if (unlikely(!compr->capi_name)) { - ubifs_err("%s compression is not compiled in", compr->name); + ubifs_err(c, "%s compression is not compiled in", compr->name); return -EINVAL; } @@ -175,7 +175,7 @@ int ubifs_decompress(const void *in_buf, int in_len, void *out_buf, if (compr->decomp_mutex) mutex_unlock(compr->decomp_mutex); if (err) - ubifs_err("cannot decompress %d bytes, compressor %s, error %d", + ubifs_err(c, "cannot decompress %d bytes, compressor %s, error %d", in_len, compr->name, err); return err; @@ -193,8 +193,8 @@ static int __init compr_init(struct ubifs_compressor *compr) if (compr->capi_name) { compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0); if (IS_ERR(compr->cc)) { - ubifs_err("cannot initialize compressor %s, error %ld", - compr->name, PTR_ERR(compr->cc)); + pr_err("UBIFS error (pid %d): cannot initialize compressor %s, error %ld", + current->pid, compr->name, PTR_ERR(compr->cc)); return PTR_ERR(compr->cc); } } diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 4cfb3e82c56f..4c46a9865fa7 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -746,7 +746,7 @@ void ubifs_dump_lprops(struct ubifs_info *c) for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { err = ubifs_read_one_lp(c, lnum, &lp); if (err) { - ubifs_err("cannot read lprops for LEB %d", lnum); + ubifs_err(c, "cannot read lprops for LEB %d", lnum); continue; } @@ -819,13 +819,13 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum) buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); if (!buf) { - ubifs_err("cannot allocate memory for dumping LEB %d", lnum); + ubifs_err(c, "cannot allocate memory for dumping LEB %d", lnum); return; } sleb = ubifs_scan(c, lnum, 0, buf, 0); if (IS_ERR(sleb)) { - ubifs_err("scan error %d", (int)PTR_ERR(sleb)); + ubifs_err(c, "scan error %d", (int)PTR_ERR(sleb)); goto out; } @@ -1032,7 +1032,7 @@ int dbg_check_space_info(struct ubifs_info *c) spin_unlock(&c->space_lock); if (free != d->saved_free) { - ubifs_err("free space changed from %lld to %lld", + ubifs_err(c, "free space changed from %lld to %lld", d->saved_free, free); goto out; } @@ -1040,15 +1040,15 @@ int dbg_check_space_info(struct ubifs_info *c) return 0; out: - ubifs_msg("saved lprops statistics dump"); + ubifs_msg(c, "saved lprops statistics dump"); ubifs_dump_lstats(&d->saved_lst); - ubifs_msg("saved budgeting info dump"); + ubifs_msg(c, "saved budgeting info dump"); ubifs_dump_budg(c, &d->saved_bi); - ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt); - ubifs_msg("current lprops statistics dump"); + ubifs_msg(c, "saved idx_gc_cnt %d", d->saved_idx_gc_cnt); + ubifs_msg(c, "current lprops statistics dump"); ubifs_get_lp_stats(c, &lst); ubifs_dump_lstats(&lst); - ubifs_msg("current budgeting info dump"); + ubifs_msg(c, "current budgeting info dump"); ubifs_dump_budg(c, &c->bi); dump_stack(); return -EINVAL; @@ -1077,9 +1077,9 @@ int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode) mutex_lock(&ui->ui_mutex); spin_lock(&ui->ui_lock); if (ui->ui_size != ui->synced_i_size && !ui->dirty) { - ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode is clean", + ubifs_err(c, "ui_size is %lld, synced_i_size is %lld, but inode is clean", ui->ui_size, ui->synced_i_size); - ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino, + ubifs_err(c, "i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino, inode->i_mode, i_size_read(inode)); dump_stack(); err = -EINVAL; @@ -1140,7 +1140,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) kfree(pdent); if (i_size_read(dir) != size) { - ubifs_err("directory inode %lu has size %llu, but calculated size is %llu", + ubifs_err(c, "directory inode %lu has size %llu, but calculated size is %llu", dir->i_ino, (unsigned long long)i_size_read(dir), (unsigned long long)size); ubifs_dump_inode(c, dir); @@ -1148,7 +1148,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) return -EINVAL; } if (dir->i_nlink != nlink) { - ubifs_err("directory inode %lu has nlink %u, but calculated nlink is %u", + ubifs_err(c, "directory inode %lu has nlink %u, but calculated nlink is %u", dir->i_ino, dir->i_nlink, nlink); ubifs_dump_inode(c, dir); dump_stack(); @@ -1207,10 +1207,10 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, err = 1; key_read(c, &dent1->key, &key); if (keys_cmp(c, &zbr1->key, &key)) { - ubifs_err("1st entry at %d:%d has key %s", zbr1->lnum, + ubifs_err(c, "1st entry at %d:%d has key %s", zbr1->lnum, zbr1->offs, dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); - ubifs_err("but it should have key %s according to tnc", + ubifs_err(c, "but it should have key %s according to tnc", dbg_snprintf_key(c, &zbr1->key, key_buf, DBG_KEY_BUF_LEN)); ubifs_dump_node(c, dent1); @@ -1219,10 +1219,10 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, key_read(c, &dent2->key, &key); if (keys_cmp(c, &zbr2->key, &key)) { - ubifs_err("2nd entry at %d:%d has key %s", zbr1->lnum, + ubifs_err(c, "2nd entry at %d:%d has key %s", zbr1->lnum, zbr1->offs, dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); - ubifs_err("but it should have key %s according to tnc", + ubifs_err(c, "but it should have key %s according to tnc", dbg_snprintf_key(c, &zbr2->key, key_buf, DBG_KEY_BUF_LEN)); ubifs_dump_node(c, dent2); @@ -1238,14 +1238,14 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, goto out_free; } if (cmp == 0 && nlen1 == nlen2) - ubifs_err("2 xent/dent nodes with the same name"); + ubifs_err(c, "2 xent/dent nodes with the same name"); else - ubifs_err("bad order of colliding key %s", + ubifs_err(c, "bad order of colliding key %s", dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); - ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); + ubifs_msg(c, "first node at %d:%d\n", zbr1->lnum, zbr1->offs); ubifs_dump_node(c, dent1); - ubifs_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs); + ubifs_msg(c, "second node at %d:%d\n", zbr2->lnum, zbr2->offs); ubifs_dump_node(c, dent2); out_free: @@ -1447,11 +1447,11 @@ static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr) return 0; out: - ubifs_err("failed, error %d", err); - ubifs_msg("dump of the znode"); + ubifs_err(c, "failed, error %d", err); + ubifs_msg(c, "dump of the znode"); ubifs_dump_znode(c, znode); if (zp) { - ubifs_msg("dump of the parent znode"); + ubifs_msg(c, "dump of the parent znode"); ubifs_dump_znode(c, zp); } dump_stack(); @@ -1518,9 +1518,9 @@ int dbg_check_tnc(struct ubifs_info *c, int extra) if (err < 0) return err; if (err) { - ubifs_msg("first znode"); + ubifs_msg(c, "first znode"); ubifs_dump_znode(c, prev); - ubifs_msg("second znode"); + ubifs_msg(c, "second znode"); ubifs_dump_znode(c, znode); return -EINVAL; } @@ -1529,13 +1529,13 @@ int dbg_check_tnc(struct ubifs_info *c, int extra) if (extra) { if (clean_cnt != atomic_long_read(&c->clean_zn_cnt)) { - ubifs_err("incorrect clean_zn_cnt %ld, calculated %ld", + ubifs_err(c, "incorrect clean_zn_cnt %ld, calculated %ld", atomic_long_read(&c->clean_zn_cnt), clean_cnt); return -EINVAL; } if (dirty_cnt != atomic_long_read(&c->dirty_zn_cnt)) { - ubifs_err("incorrect dirty_zn_cnt %ld, calculated %ld", + ubifs_err(c, "incorrect dirty_zn_cnt %ld, calculated %ld", atomic_long_read(&c->dirty_zn_cnt), dirty_cnt); return -EINVAL; @@ -1608,7 +1608,7 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, if (znode_cb) { err = znode_cb(c, znode, priv); if (err) { - ubifs_err("znode checking function returned error %d", + ubifs_err(c, "znode checking function returned error %d", err); ubifs_dump_znode(c, znode); goto out_dump; @@ -1619,7 +1619,7 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, zbr = &znode->zbranch[idx]; err = leaf_cb(c, zbr, priv); if (err) { - ubifs_err("leaf checking function returned error %d, for leaf at LEB %d:%d", + ubifs_err(c, "leaf checking function returned error %d, for leaf at LEB %d:%d", err, zbr->lnum, zbr->offs); goto out_dump; } @@ -1675,7 +1675,7 @@ out_dump: zbr = &znode->parent->zbranch[znode->iip]; else zbr = &c->zroot; - ubifs_msg("dump of znode at LEB %d:%d", zbr->lnum, zbr->offs); + ubifs_msg(c, "dump of znode at LEB %d:%d", zbr->lnum, zbr->offs); ubifs_dump_znode(c, znode); out_unlock: mutex_unlock(&c->tnc_mutex); @@ -1722,12 +1722,12 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) err = dbg_walk_index(c, NULL, add_size, &calc); if (err) { - ubifs_err("error %d while walking the index", err); + ubifs_err(c, "error %d while walking the index", err); return err; } if (calc != idx_size) { - ubifs_err("index size check failed: calculated size is %lld, should be %lld", + ubifs_err(c, "index size check failed: calculated size is %lld, should be %lld", calc, idx_size); dump_stack(); return -EINVAL; @@ -1814,7 +1814,7 @@ static struct fsck_inode *add_inode(struct ubifs_info *c, } if (inum > c->highest_inum) { - ubifs_err("too high inode number, max. is %lu", + ubifs_err(c, "too high inode number, max. is %lu", (unsigned long)c->highest_inum); return ERR_PTR(-EINVAL); } @@ -1921,17 +1921,17 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c, ino_key_init(c, &key, inum); err = ubifs_lookup_level0(c, &key, &znode, &n); if (!err) { - ubifs_err("inode %lu not found in index", (unsigned long)inum); + ubifs_err(c, "inode %lu not found in index", (unsigned long)inum); return ERR_PTR(-ENOENT); } else if (err < 0) { - ubifs_err("error %d while looking up inode %lu", + ubifs_err(c, "error %d while looking up inode %lu", err, (unsigned long)inum); return ERR_PTR(err); } zbr = &znode->zbranch[n]; if (zbr->len < UBIFS_INO_NODE_SZ) { - ubifs_err("bad node %lu node length %d", + ubifs_err(c, "bad node %lu node length %d", (unsigned long)inum, zbr->len); return ERR_PTR(-EINVAL); } @@ -1942,7 +1942,7 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c, err = ubifs_tnc_read_node(c, zbr, ino); if (err) { - ubifs_err("cannot read inode node at LEB %d:%d, error %d", + ubifs_err(c, "cannot read inode node at LEB %d:%d, error %d", zbr->lnum, zbr->offs, err); kfree(ino); return ERR_PTR(err); @@ -1951,7 +1951,7 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c, fscki = add_inode(c, fsckd, ino); kfree(ino); if (IS_ERR(fscki)) { - ubifs_err("error %ld while adding inode %lu node", + ubifs_err(c, "error %ld while adding inode %lu node", PTR_ERR(fscki), (unsigned long)inum); return fscki; } @@ -1985,7 +1985,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, struct fsck_inode *fscki; if (zbr->len < UBIFS_CH_SZ) { - ubifs_err("bad leaf length %d (LEB %d:%d)", + ubifs_err(c, "bad leaf length %d (LEB %d:%d)", zbr->len, zbr->lnum, zbr->offs); return -EINVAL; } @@ -1996,7 +1996,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, err = ubifs_tnc_read_node(c, zbr, node); if (err) { - ubifs_err("cannot read leaf node at LEB %d:%d, error %d", + ubifs_err(c, "cannot read leaf node at LEB %d:%d, error %d", zbr->lnum, zbr->offs, err); goto out_free; } @@ -2006,7 +2006,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, fscki = add_inode(c, priv, node); if (IS_ERR(fscki)) { err = PTR_ERR(fscki); - ubifs_err("error %d while adding inode node", err); + ubifs_err(c, "error %d while adding inode node", err); goto out_dump; } goto out; @@ -2014,7 +2014,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, if (type != UBIFS_DENT_KEY && type != UBIFS_XENT_KEY && type != UBIFS_DATA_KEY) { - ubifs_err("unexpected node type %d at LEB %d:%d", + ubifs_err(c, "unexpected node type %d at LEB %d:%d", type, zbr->lnum, zbr->offs); err = -EINVAL; goto out_free; @@ -2022,7 +2022,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, ch = node; if (le64_to_cpu(ch->sqnum) > c->max_sqnum) { - ubifs_err("too high sequence number, max. is %llu", + ubifs_err(c, "too high sequence number, max. is %llu", c->max_sqnum); err = -EINVAL; goto out_dump; @@ -2042,7 +2042,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, fscki = read_add_inode(c, priv, inum); if (IS_ERR(fscki)) { err = PTR_ERR(fscki); - ubifs_err("error %d while processing data node and trying to find inode node %lu", + ubifs_err(c, "error %d while processing data node and trying to find inode node %lu", err, (unsigned long)inum); goto out_dump; } @@ -2052,7 +2052,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, blk_offs <<= UBIFS_BLOCK_SHIFT; blk_offs += le32_to_cpu(dn->size); if (blk_offs > fscki->size) { - ubifs_err("data node at LEB %d:%d is not within inode size %lld", + ubifs_err(c, "data node at LEB %d:%d is not within inode size %lld", zbr->lnum, zbr->offs, fscki->size); err = -EINVAL; goto out_dump; @@ -2076,7 +2076,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, fscki = read_add_inode(c, priv, inum); if (IS_ERR(fscki)) { err = PTR_ERR(fscki); - ubifs_err("error %d while processing entry node and trying to find inode node %lu", + ubifs_err(c, "error %d while processing entry node and trying to find inode node %lu", err, (unsigned long)inum); goto out_dump; } @@ -2088,7 +2088,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, fscki1 = read_add_inode(c, priv, inum); if (IS_ERR(fscki1)) { err = PTR_ERR(fscki1); - ubifs_err("error %d while processing entry node and trying to find parent inode node %lu", + ubifs_err(c, "error %d while processing entry node and trying to find parent inode node %lu", err, (unsigned long)inum); goto out_dump; } @@ -2111,7 +2111,7 @@ out: return 0; out_dump: - ubifs_msg("dump of node at LEB %d:%d", zbr->lnum, zbr->offs); + ubifs_msg(c, "dump of node at LEB %d:%d", zbr->lnum, zbr->offs); ubifs_dump_node(c, node); out_free: kfree(node); @@ -2162,52 +2162,52 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd) */ if (fscki->inum != UBIFS_ROOT_INO && fscki->references != 1) { - ubifs_err("directory inode %lu has %d direntries which refer it, but should be 1", + ubifs_err(c, "directory inode %lu has %d direntries which refer it, but should be 1", (unsigned long)fscki->inum, fscki->references); goto out_dump; } if (fscki->inum == UBIFS_ROOT_INO && fscki->references != 0) { - ubifs_err("root inode %lu has non-zero (%d) direntries which refer it", + ubifs_err(c, "root inode %lu has non-zero (%d) direntries which refer it", (unsigned long)fscki->inum, fscki->references); goto out_dump; } if (fscki->calc_sz != fscki->size) { - ubifs_err("directory inode %lu size is %lld, but calculated size is %lld", + ubifs_err(c, "directory inode %lu size is %lld, but calculated size is %lld", (unsigned long)fscki->inum, fscki->size, fscki->calc_sz); goto out_dump; } if (fscki->calc_cnt != fscki->nlink) { - ubifs_err("directory inode %lu nlink is %d, but calculated nlink is %d", + ubifs_err(c, "directory inode %lu nlink is %d, but calculated nlink is %d", (unsigned long)fscki->inum, fscki->nlink, fscki->calc_cnt); goto out_dump; } } else { if (fscki->references != fscki->nlink) { - ubifs_err("inode %lu nlink is %d, but calculated nlink is %d", + ubifs_err(c, "inode %lu nlink is %d, but calculated nlink is %d", (unsigned long)fscki->inum, fscki->nlink, fscki->references); goto out_dump; } } if (fscki->xattr_sz != fscki->calc_xsz) { - ubifs_err("inode %lu has xattr size %u, but calculated size is %lld", + ubifs_err(c, "inode %lu has xattr size %u, but calculated size is %lld", (unsigned long)fscki->inum, fscki->xattr_sz, fscki->calc_xsz); goto out_dump; } if (fscki->xattr_cnt != fscki->calc_xcnt) { - ubifs_err("inode %lu has %u xattrs, but calculated count is %lld", + ubifs_err(c, "inode %lu has %u xattrs, but calculated count is %lld", (unsigned long)fscki->inum, fscki->xattr_cnt, fscki->calc_xcnt); goto out_dump; } if (fscki->xattr_nms != fscki->calc_xnms) { - ubifs_err("inode %lu has xattr names' size %u, but calculated names' size is %lld", + ubifs_err(c, "inode %lu has xattr names' size %u, but calculated names' size is %lld", (unsigned long)fscki->inum, fscki->xattr_nms, fscki->calc_xnms); goto out_dump; @@ -2221,11 +2221,11 @@ out_dump: ino_key_init(c, &key, fscki->inum); err = ubifs_lookup_level0(c, &key, &znode, &n); if (!err) { - ubifs_err("inode %lu not found in index", + ubifs_err(c, "inode %lu not found in index", (unsigned long)fscki->inum); return -ENOENT; } else if (err < 0) { - ubifs_err("error %d while looking up inode %lu", + ubifs_err(c, "error %d while looking up inode %lu", err, (unsigned long)fscki->inum); return err; } @@ -2237,13 +2237,13 @@ out_dump: err = ubifs_tnc_read_node(c, zbr, ino); if (err) { - ubifs_err("cannot read inode node at LEB %d:%d, error %d", + ubifs_err(c, "cannot read inode node at LEB %d:%d, error %d", zbr->lnum, zbr->offs, err); kfree(ino); return err; } - ubifs_msg("dump of the inode %lu sitting in LEB %d:%d", + ubifs_msg(c, "dump of the inode %lu sitting in LEB %d:%d", (unsigned long)fscki->inum, zbr->lnum, zbr->offs); ubifs_dump_node(c, ino); kfree(ino); @@ -2284,7 +2284,7 @@ int dbg_check_filesystem(struct ubifs_info *c) return 0; out_free: - ubifs_err("file-system check failed with error %d", err); + ubifs_err(c, "file-system check failed with error %d", err); dump_stack(); free_inodes(&fsckd); return err; @@ -2315,12 +2315,12 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) sb = container_of(cur->next, struct ubifs_scan_node, list); if (sa->type != UBIFS_DATA_NODE) { - ubifs_err("bad node type %d", sa->type); + ubifs_err(c, "bad node type %d", sa->type); ubifs_dump_node(c, sa->node); return -EINVAL; } if (sb->type != UBIFS_DATA_NODE) { - ubifs_err("bad node type %d", sb->type); + ubifs_err(c, "bad node type %d", sb->type); ubifs_dump_node(c, sb->node); return -EINVAL; } @@ -2331,7 +2331,7 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) if (inuma < inumb) continue; if (inuma > inumb) { - ubifs_err("larger inum %lu goes before inum %lu", + ubifs_err(c, "larger inum %lu goes before inum %lu", (unsigned long)inuma, (unsigned long)inumb); goto error_dump; } @@ -2340,11 +2340,11 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) blkb = key_block(c, &sb->key); if (blka > blkb) { - ubifs_err("larger block %u goes before %u", blka, blkb); + ubifs_err(c, "larger block %u goes before %u", blka, blkb); goto error_dump; } if (blka == blkb) { - ubifs_err("two data nodes for the same block"); + ubifs_err(c, "two data nodes for the same block"); goto error_dump; } } @@ -2383,19 +2383,19 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && sa->type != UBIFS_XENT_NODE) { - ubifs_err("bad node type %d", sa->type); + ubifs_err(c, "bad node type %d", sa->type); ubifs_dump_node(c, sa->node); return -EINVAL; } if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && sa->type != UBIFS_XENT_NODE) { - ubifs_err("bad node type %d", sb->type); + ubifs_err(c, "bad node type %d", sb->type); ubifs_dump_node(c, sb->node); return -EINVAL; } if (sa->type != UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) { - ubifs_err("non-inode node goes before inode node"); + ubifs_err(c, "non-inode node goes before inode node"); goto error_dump; } @@ -2405,7 +2405,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) if (sa->type == UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) { /* Inode nodes are sorted in descending size order */ if (sa->len < sb->len) { - ubifs_err("smaller inode node goes first"); + ubifs_err(c, "smaller inode node goes first"); goto error_dump; } continue; @@ -2421,7 +2421,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) if (inuma < inumb) continue; if (inuma > inumb) { - ubifs_err("larger inum %lu goes before inum %lu", + ubifs_err(c, "larger inum %lu goes before inum %lu", (unsigned long)inuma, (unsigned long)inumb); goto error_dump; } @@ -2430,7 +2430,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) hashb = key_block(c, &sb->key); if (hasha > hashb) { - ubifs_err("larger hash %u goes before %u", + ubifs_err(c, "larger hash %u goes before %u", hasha, hashb); goto error_dump; } @@ -2439,9 +2439,9 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) return 0; error_dump: - ubifs_msg("dumping first node"); + ubifs_msg(c, "dumping first node"); ubifs_dump_node(c, sa->node); - ubifs_msg("dumping second node"); + ubifs_msg(c, "dumping second node"); ubifs_dump_node(c, sb->node); return -EINVAL; return 0; @@ -2470,13 +2470,13 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write) delay = prandom_u32() % 60000; d->pc_timeout = jiffies; d->pc_timeout += msecs_to_jiffies(delay); - ubifs_warn("failing after %lums", delay); + ubifs_warn(c, "failing after %lums", delay); } else { d->pc_delay = 2; delay = prandom_u32() % 10000; /* Fail within 10000 operations */ d->pc_cnt_max = delay; - ubifs_warn("failing after %lu calls", delay); + ubifs_warn(c, "failing after %lu calls", delay); } } @@ -2494,55 +2494,55 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write) return 0; if (chance(19, 20)) return 0; - ubifs_warn("failing in super block LEB %d", lnum); + ubifs_warn(c, "failing in super block LEB %d", lnum); } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) { if (chance(19, 20)) return 0; - ubifs_warn("failing in master LEB %d", lnum); + ubifs_warn(c, "failing in master LEB %d", lnum); } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) { if (write && chance(99, 100)) return 0; if (chance(399, 400)) return 0; - ubifs_warn("failing in log LEB %d", lnum); + ubifs_warn(c, "failing in log LEB %d", lnum); } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) { if (write && chance(7, 8)) return 0; if (chance(19, 20)) return 0; - ubifs_warn("failing in LPT LEB %d", lnum); + ubifs_warn(c, "failing in LPT LEB %d", lnum); } else if (lnum >= c->orph_first && lnum <= c->orph_last) { if (write && chance(1, 2)) return 0; if (chance(9, 10)) return 0; - ubifs_warn("failing in orphan LEB %d", lnum); + ubifs_warn(c, "failing in orphan LEB %d", lnum); } else if (lnum == c->ihead_lnum) { if (chance(99, 100)) return 0; - ubifs_warn("failing in index head LEB %d", lnum); + ubifs_warn(c, "failing in index head LEB %d", lnum); } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) { if (chance(9, 10)) return 0; - ubifs_warn("failing in GC head LEB %d", lnum); + ubifs_warn(c, "failing in GC head LEB %d", lnum); } else if (write && !RB_EMPTY_ROOT(&c->buds) && !ubifs_search_bud(c, lnum)) { if (chance(19, 20)) return 0; - ubifs_warn("failing in non-bud LEB %d", lnum); + ubifs_warn(c, "failing in non-bud LEB %d", lnum); } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND || c->cmt_state == COMMIT_RUNNING_REQUIRED) { if (chance(999, 1000)) return 0; - ubifs_warn("failing in bud LEB %d commit running", lnum); + ubifs_warn(c, "failing in bud LEB %d commit running", lnum); } else { if (chance(9999, 10000)) return 0; - ubifs_warn("failing in bud LEB %d commit not running", lnum); + ubifs_warn(c, "failing in bud LEB %d commit not running", lnum); } d->pc_happened = 1; - ubifs_warn("========== Power cut emulated =========="); + ubifs_warn(c, "========== Power cut emulated =========="); dump_stack(); return 1; } @@ -2557,7 +2557,7 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf, /* Corruption span max to end of write unit */ to = min(len, ALIGN(from + 1, c->max_write_size)); - ubifs_warn("filled bytes %u-%u with %s", from, to - 1, + ubifs_warn(c, "filled bytes %u-%u with %s", from, to - 1, ffs ? "0xFFs" : "random data"); if (ffs) @@ -2579,7 +2579,7 @@ int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, failing = power_cut_emulated(c, lnum, 1); if (failing) { len = corrupt_data(c, buf, len); - ubifs_warn("actually write %d bytes to LEB %d:%d (the buffer was corrupted)", + ubifs_warn(c, "actually write %d bytes to LEB %d:%d (the buffer was corrupted)", len, lnum, offs); } err = ubi_leb_write(c->ubi, lnum, buf, offs, len); @@ -2909,7 +2909,7 @@ out_remove: debugfs_remove_recursive(d->dfs_dir); out: err = dent ? PTR_ERR(dent) : -ENODEV; - ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", + ubifs_err(c, "cannot create \"%s\" debugfs file or directory, error %d\n", fname, err); return err; } @@ -3063,8 +3063,8 @@ out_remove: debugfs_remove_recursive(dfs_rootdir); out: err = dent ? PTR_ERR(dent) : -ENODEV; - ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", - fname, err); + pr_err("UBIFS error (pid %d): cannot create \"%s\" debugfs file or directory, error %d\n", + current->pid, fname, err); return err; } diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 0fa6c803992e..02d1ee778df0 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -146,12 +146,12 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, if (c->highest_inum >= INUM_WARN_WATERMARK) { if (c->highest_inum >= INUM_WATERMARK) { spin_unlock(&c->cnt_lock); - ubifs_err("out of inode numbers"); + ubifs_err(c, "out of inode numbers"); make_bad_inode(inode); iput(inode); return ERR_PTR(-EINVAL); } - ubifs_warn("running out of inode numbers (current %lu, max %d)", + ubifs_warn(c, "running out of inode numbers (current %lu, max %u)", (unsigned long)c->highest_inum, INUM_WATERMARK); } @@ -222,7 +222,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, * checking. */ err = PTR_ERR(inode); - ubifs_err("dead directory entry '%pd', error %d", + ubifs_err(c, "dead directory entry '%pd', error %d", dentry, err); ubifs_ro_mode(c, err); goto out; @@ -272,7 +272,7 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode, err = ubifs_init_security(dir, inode, &dentry->d_name); if (err) - goto out_cancel; + goto out_inode; mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; @@ -292,11 +292,12 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); +out_inode: make_bad_inode(inode); iput(inode); out_budg: ubifs_release_budget(c, &req); - ubifs_err("cannot create regular file, error %d", err); + ubifs_err(c, "cannot create regular file, error %d", err); return err; } @@ -449,7 +450,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) out: if (err != -ENOENT) { - ubifs_err("cannot find next direntry, error %d", err); + ubifs_err(c, "cannot find next direntry, error %d", err); return err; } @@ -732,7 +733,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) err = ubifs_init_security(dir, inode, &dentry->d_name); if (err) - goto out_cancel; + goto out_inode; mutex_lock(&dir_ui->ui_mutex); insert_inode_hash(inode); @@ -743,7 +744,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) dir->i_mtime = dir->i_ctime = inode->i_ctime; err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); if (err) { - ubifs_err("cannot create directory, error %d", err); + ubifs_err(c, "cannot create directory, error %d", err); goto out_cancel; } mutex_unlock(&dir_ui->ui_mutex); @@ -757,6 +758,7 @@ out_cancel: dir_ui->ui_size = dir->i_size; drop_nlink(dir); mutex_unlock(&dir_ui->ui_mutex); +out_inode: make_bad_inode(inode); iput(inode); out_budg: @@ -816,7 +818,7 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, err = ubifs_init_security(dir, inode, &dentry->d_name); if (err) - goto out_cancel; + goto out_inode; mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; @@ -836,6 +838,7 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); +out_inode: make_bad_inode(inode); iput(inode); out_budg: @@ -896,7 +899,7 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, err = ubifs_init_security(dir, inode, &dentry->d_name); if (err) - goto out_cancel; + goto out_inode; mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index e627c0acf626..3ba3fef64e9e 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -50,7 +50,6 @@ */ #include "ubifs.h" -#include <linux/aio.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/slab.h> @@ -80,7 +79,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block, dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; out_len = UBIFS_BLOCK_SIZE; - err = ubifs_decompress(&dn->data, dlen, addr, &out_len, + err = ubifs_decompress(c, &dn->data, dlen, addr, &out_len, le16_to_cpu(dn->compr_type)); if (err || len != out_len) goto dump; @@ -96,7 +95,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block, return 0; dump: - ubifs_err("bad data node (block %u, inode %lu)", + ubifs_err(c, "bad data node (block %u, inode %lu)", block, inode->i_ino); ubifs_dump_node(c, dn); return -EINVAL; @@ -161,13 +160,14 @@ static int do_readpage(struct page *page) addr += UBIFS_BLOCK_SIZE; } if (err) { + struct ubifs_info *c = inode->i_sb->s_fs_info; if (err == -ENOENT) { /* Not found, so it must be a hole */ SetPageChecked(page); dbg_gen("hole"); goto out_free; } - ubifs_err("cannot read page %lu of inode %lu, error %d", + ubifs_err(c, "cannot read page %lu of inode %lu, error %d", page->index, inode->i_ino, err); goto error; } @@ -650,7 +650,7 @@ static int populate_page(struct ubifs_info *c, struct page *page, dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; out_len = UBIFS_BLOCK_SIZE; - err = ubifs_decompress(&dn->data, dlen, addr, &out_len, + err = ubifs_decompress(c, &dn->data, dlen, addr, &out_len, le16_to_cpu(dn->compr_type)); if (err || len != out_len) goto out_err; @@ -698,7 +698,7 @@ out_err: SetPageError(page); flush_dcache_page(page); kunmap(page); - ubifs_err("bad data node (block %u, inode %lu)", + ubifs_err(c, "bad data node (block %u, inode %lu)", page_block, inode->i_ino); return -EINVAL; } @@ -802,7 +802,7 @@ out_free: return ret; out_warn: - ubifs_warn("ignoring error %d and skipping bulk-read", err); + ubifs_warn(c, "ignoring error %d and skipping bulk-read", err); goto out_free; out_bu_off: @@ -930,7 +930,7 @@ static int do_writepage(struct page *page, int len) } if (err) { SetPageError(page); - ubifs_err("cannot write page %lu of inode %lu, error %d", + ubifs_err(c, "cannot write page %lu of inode %lu, error %d", page->index, inode->i_ino, err); ubifs_ro_mode(c, err); } @@ -1485,7 +1485,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, err = ubifs_budget_space(c, &req); if (unlikely(err)) { if (err == -ENOSPC) - ubifs_warn("out of space for mmapped file (inode number %lu)", + ubifs_warn(c, "out of space for mmapped file (inode number %lu)", inode->i_ino); return VM_FAULT_SIGBUS; } @@ -1581,8 +1581,6 @@ const struct inode_operations ubifs_symlink_inode_operations = { const struct file_operations ubifs_file_operations = { .llseek = generic_file_llseek, - .read = new_sync_read, - .write = new_sync_write, .read_iter = generic_file_read_iter, .write_iter = ubifs_write_iter, .mmap = ubifs_file_mmap, diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index fb08b0c514b6..97be41215332 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -85,7 +85,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) c->ro_error = 1; c->no_chk_data_crc = 0; c->vfs_sb->s_flags |= MS_RDONLY; - ubifs_warn("switched to read-only mode, error %d", err); + ubifs_warn(c, "switched to read-only mode, error %d", err); dump_stack(); } } @@ -107,7 +107,7 @@ int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, * @even_ebadmsg is true. */ if (err && (err != -EBADMSG || even_ebadmsg)) { - ubifs_err("reading %d bytes from LEB %d:%d failed, error %d", + ubifs_err(c, "reading %d bytes from LEB %d:%d failed, error %d", len, lnum, offs, err); dump_stack(); } @@ -127,7 +127,7 @@ int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, else err = dbg_leb_write(c, lnum, buf, offs, len); if (err) { - ubifs_err("writing %d bytes to LEB %d:%d failed, error %d", + ubifs_err(c, "writing %d bytes to LEB %d:%d failed, error %d", len, lnum, offs, err); ubifs_ro_mode(c, err); dump_stack(); @@ -147,7 +147,7 @@ int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len) else err = dbg_leb_change(c, lnum, buf, len); if (err) { - ubifs_err("changing %d bytes in LEB %d failed, error %d", + ubifs_err(c, "changing %d bytes in LEB %d failed, error %d", len, lnum, err); ubifs_ro_mode(c, err); dump_stack(); @@ -167,7 +167,7 @@ int ubifs_leb_unmap(struct ubifs_info *c, int lnum) else err = dbg_leb_unmap(c, lnum); if (err) { - ubifs_err("unmap LEB %d failed, error %d", lnum, err); + ubifs_err(c, "unmap LEB %d failed, error %d", lnum, err); ubifs_ro_mode(c, err); dump_stack(); } @@ -186,7 +186,7 @@ int ubifs_leb_map(struct ubifs_info *c, int lnum) else err = dbg_leb_map(c, lnum); if (err) { - ubifs_err("mapping LEB %d failed, error %d", lnum, err); + ubifs_err(c, "mapping LEB %d failed, error %d", lnum, err); ubifs_ro_mode(c, err); dump_stack(); } @@ -199,7 +199,7 @@ int ubifs_is_mapped(const struct ubifs_info *c, int lnum) err = ubi_is_mapped(c->ubi, lnum); if (err < 0) { - ubifs_err("ubi_is_mapped failed for LEB %d, error %d", + ubifs_err(c, "ubi_is_mapped failed for LEB %d, error %d", lnum, err); dump_stack(); } @@ -247,7 +247,7 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, magic = le32_to_cpu(ch->magic); if (magic != UBIFS_NODE_MAGIC) { if (!quiet) - ubifs_err("bad magic %#08x, expected %#08x", + ubifs_err(c, "bad magic %#08x, expected %#08x", magic, UBIFS_NODE_MAGIC); err = -EUCLEAN; goto out; @@ -256,7 +256,7 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, type = ch->node_type; if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { if (!quiet) - ubifs_err("bad node type %d", type); + ubifs_err(c, "bad node type %d", type); goto out; } @@ -279,7 +279,7 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, node_crc = le32_to_cpu(ch->crc); if (crc != node_crc) { if (!quiet) - ubifs_err("bad CRC: calculated %#08x, read %#08x", + ubifs_err(c, "bad CRC: calculated %#08x, read %#08x", crc, node_crc); err = -EUCLEAN; goto out; @@ -289,10 +289,10 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, out_len: if (!quiet) - ubifs_err("bad node length %d", node_len); + ubifs_err(c, "bad node length %d", node_len); out: if (!quiet) { - ubifs_err("bad node at LEB %d:%d", lnum, offs); + ubifs_err(c, "bad node at LEB %d:%d", lnum, offs); ubifs_dump_node(c, buf); dump_stack(); } @@ -355,11 +355,11 @@ static unsigned long long next_sqnum(struct ubifs_info *c) if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) { if (sqnum >= SQNUM_WATERMARK) { - ubifs_err("sequence number overflow %llu, end of life", + ubifs_err(c, "sequence number overflow %llu, end of life", sqnum); ubifs_ro_mode(c, -EINVAL); } - ubifs_warn("running out of sequence numbers, end of life soon"); + ubifs_warn(c, "running out of sequence numbers, end of life soon"); } return sqnum; @@ -636,7 +636,7 @@ int ubifs_bg_wbufs_sync(struct ubifs_info *c) err = ubifs_wbuf_sync_nolock(wbuf); mutex_unlock(&wbuf->io_mutex); if (err) { - ubifs_err("cannot sync write-buffer, error %d", err); + ubifs_err(c, "cannot sync write-buffer, error %d", err); ubifs_ro_mode(c, err); goto out_timers; } @@ -833,7 +833,7 @@ exit: return 0; out: - ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", + ubifs_err(c, "cannot write %d bytes to LEB %d:%d, error %d", len, wbuf->lnum, wbuf->offs, err); ubifs_dump_node(c, buf); dump_stack(); @@ -932,27 +932,27 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, } if (type != ch->node_type) { - ubifs_err("bad node type (%d but expected %d)", + ubifs_err(c, "bad node type (%d but expected %d)", ch->node_type, type); goto out; } err = ubifs_check_node(c, buf, lnum, offs, 0, 0); if (err) { - ubifs_err("expected node type %d", type); + ubifs_err(c, "expected node type %d", type); return err; } rlen = le32_to_cpu(ch->len); if (rlen != len) { - ubifs_err("bad node length %d, expected %d", rlen, len); + ubifs_err(c, "bad node length %d, expected %d", rlen, len); goto out; } return 0; out: - ubifs_err("bad node at LEB %d:%d", lnum, offs); + ubifs_err(c, "bad node at LEB %d:%d", lnum, offs); ubifs_dump_node(c, buf); dump_stack(); return -EINVAL; diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 648b143606cc..3c7b29de0ca7 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -138,7 +138,7 @@ static int setflags(struct inode *inode, int flags) return err; out_unlock: - ubifs_err("can't modify inode %lu attributes", inode->i_ino); + ubifs_err(c, "can't modify inode %lu attributes", inode->i_ino); mutex_unlock(&ui->ui_mutex); ubifs_release_budget(c, &req); return err; diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index f6ac3f29323c..90ae1a8439d9 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -363,11 +363,11 @@ again: * This should not happen unless the journal size limitations * are too tough. */ - ubifs_err("stuck in space allocation"); + ubifs_err(c, "stuck in space allocation"); err = -ENOSPC; goto out; } else if (cmt_retries > 32) - ubifs_warn("too many space allocation re-tries (%d)", + ubifs_warn(c, "too many space allocation re-tries (%d)", cmt_retries); dbg_jnl("-EAGAIN, commit and retry (retried %d times)", @@ -380,7 +380,7 @@ again: goto again; out: - ubifs_err("cannot reserve %d bytes in jhead %d, error %d", + ubifs_err(c, "cannot reserve %d bytes in jhead %d, error %d", len, jhead, err); if (err == -ENOSPC) { /* This are some budgeting problems, print useful information */ @@ -731,7 +731,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, compr_type = ui->compr_type; out_len = dlen - UBIFS_DATA_NODE_SZ; - ubifs_compress(buf, len, &data->data, &out_len, &compr_type); + ubifs_compress(c, buf, len, &data->data, &out_len, &compr_type); ubifs_assert(out_len <= UBIFS_BLOCK_SIZE); dlen = UBIFS_DATA_NODE_SZ + out_len; @@ -1100,7 +1100,8 @@ out_free: * This function is used when an inode is truncated and the last data node of * the inode has to be re-compressed and re-written. */ -static int recomp_data_node(struct ubifs_data_node *dn, int *new_len) +static int recomp_data_node(const struct ubifs_info *c, + struct ubifs_data_node *dn, int *new_len) { void *buf; int err, len, compr_type, out_len; @@ -1112,11 +1113,11 @@ static int recomp_data_node(struct ubifs_data_node *dn, int *new_len) len = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; compr_type = le16_to_cpu(dn->compr_type); - err = ubifs_decompress(&dn->data, len, buf, &out_len, compr_type); + err = ubifs_decompress(c, &dn->data, len, buf, &out_len, compr_type); if (err) goto out; - ubifs_compress(buf, *new_len, &dn->data, &out_len, &compr_type); + ubifs_compress(c, buf, *new_len, &dn->data, &out_len, &compr_type); ubifs_assert(out_len <= UBIFS_BLOCK_SIZE); dn->compr_type = cpu_to_le16(compr_type); dn->size = cpu_to_le32(*new_len); @@ -1191,7 +1192,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, int compr_type = le16_to_cpu(dn->compr_type); if (compr_type != UBIFS_COMPR_NONE) { - err = recomp_data_node(dn, &dlen); + err = recomp_data_node(c, dn, &dlen); if (err) goto out_free; } else { diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index c14628fbeee2..8c795e6392b1 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -696,7 +696,7 @@ int ubifs_consolidate_log(struct ubifs_info *c) destroy_done_tree(&done_tree); vfree(buf); if (write_lnum == c->lhead_lnum) { - ubifs_err("log is too full"); + ubifs_err(c, "log is too full"); return -EINVAL; } /* Unmap remaining LEBs */ @@ -743,7 +743,7 @@ static int dbg_check_bud_bytes(struct ubifs_info *c) bud_bytes += c->leb_size - bud->start; if (c->bud_bytes != bud_bytes) { - ubifs_err("bad bud_bytes %lld, calculated %lld", + ubifs_err(c, "bad bud_bytes %lld, calculated %lld", c->bud_bytes, bud_bytes); err = -EINVAL; } diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 46190a7c42a6..a0011aa3a779 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -682,7 +682,7 @@ int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, out: ubifs_release_lprops(c); if (err) - ubifs_err("cannot change properties of LEB %d, error %d", + ubifs_err(c, "cannot change properties of LEB %d, error %d", lnum, err); return err; } @@ -721,7 +721,7 @@ int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, out: ubifs_release_lprops(c); if (err) - ubifs_err("cannot update properties of LEB %d, error %d", + ubifs_err(c, "cannot update properties of LEB %d, error %d", lnum, err); return err; } @@ -746,7 +746,7 @@ int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp) lpp = ubifs_lpt_lookup(c, lnum); if (IS_ERR(lpp)) { err = PTR_ERR(lpp); - ubifs_err("cannot read properties of LEB %d, error %d", + ubifs_err(c, "cannot read properties of LEB %d, error %d", lnum, err); goto out; } @@ -873,13 +873,13 @@ int dbg_check_cats(struct ubifs_info *c) list_for_each_entry(lprops, &c->empty_list, list) { if (lprops->free != c->leb_size) { - ubifs_err("non-empty LEB %d on empty list (free %d dirty %d flags %d)", + ubifs_err(c, "non-empty LEB %d on empty list (free %d dirty %d flags %d)", lprops->lnum, lprops->free, lprops->dirty, lprops->flags); return -EINVAL; } if (lprops->flags & LPROPS_TAKEN) { - ubifs_err("taken LEB %d on empty list (free %d dirty %d flags %d)", + ubifs_err(c, "taken LEB %d on empty list (free %d dirty %d flags %d)", lprops->lnum, lprops->free, lprops->dirty, lprops->flags); return -EINVAL; @@ -889,13 +889,13 @@ int dbg_check_cats(struct ubifs_info *c) i = 0; list_for_each_entry(lprops, &c->freeable_list, list) { if (lprops->free + lprops->dirty != c->leb_size) { - ubifs_err("non-freeable LEB %d on freeable list (free %d dirty %d flags %d)", + ubifs_err(c, "non-freeable LEB %d on freeable list (free %d dirty %d flags %d)", lprops->lnum, lprops->free, lprops->dirty, lprops->flags); return -EINVAL; } if (lprops->flags & LPROPS_TAKEN) { - ubifs_err("taken LEB %d on freeable list (free %d dirty %d flags %d)", + ubifs_err(c, "taken LEB %d on freeable list (free %d dirty %d flags %d)", lprops->lnum, lprops->free, lprops->dirty, lprops->flags); return -EINVAL; @@ -903,7 +903,7 @@ int dbg_check_cats(struct ubifs_info *c) i += 1; } if (i != c->freeable_cnt) { - ubifs_err("freeable list count %d expected %d", i, + ubifs_err(c, "freeable list count %d expected %d", i, c->freeable_cnt); return -EINVAL; } @@ -912,26 +912,26 @@ int dbg_check_cats(struct ubifs_info *c) list_for_each(pos, &c->idx_gc) i += 1; if (i != c->idx_gc_cnt) { - ubifs_err("idx_gc list count %d expected %d", i, + ubifs_err(c, "idx_gc list count %d expected %d", i, c->idx_gc_cnt); return -EINVAL; } list_for_each_entry(lprops, &c->frdi_idx_list, list) { if (lprops->free + lprops->dirty != c->leb_size) { - ubifs_err("non-freeable LEB %d on frdi_idx list (free %d dirty %d flags %d)", + ubifs_err(c, "non-freeable LEB %d on frdi_idx list (free %d dirty %d flags %d)", lprops->lnum, lprops->free, lprops->dirty, lprops->flags); return -EINVAL; } if (lprops->flags & LPROPS_TAKEN) { - ubifs_err("taken LEB %d on frdi_idx list (free %d dirty %d flags %d)", + ubifs_err(c, "taken LEB %d on frdi_idx list (free %d dirty %d flags %d)", lprops->lnum, lprops->free, lprops->dirty, lprops->flags); return -EINVAL; } if (!(lprops->flags & LPROPS_INDEX)) { - ubifs_err("non-index LEB %d on frdi_idx list (free %d dirty %d flags %d)", + ubifs_err(c, "non-index LEB %d on frdi_idx list (free %d dirty %d flags %d)", lprops->lnum, lprops->free, lprops->dirty, lprops->flags); return -EINVAL; @@ -944,15 +944,15 @@ int dbg_check_cats(struct ubifs_info *c) for (i = 0; i < heap->cnt; i++) { lprops = heap->arr[i]; if (!lprops) { - ubifs_err("null ptr in LPT heap cat %d", cat); + ubifs_err(c, "null ptr in LPT heap cat %d", cat); return -EINVAL; } if (lprops->hpos != i) { - ubifs_err("bad ptr in LPT heap cat %d", cat); + ubifs_err(c, "bad ptr in LPT heap cat %d", cat); return -EINVAL; } if (lprops->flags & LPROPS_TAKEN) { - ubifs_err("taken LEB in LPT heap cat %d", cat); + ubifs_err(c, "taken LEB in LPT heap cat %d", cat); return -EINVAL; } } @@ -988,7 +988,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, goto out; } if (lprops != lp) { - ubifs_err("lprops %zx lp %zx lprops->lnum %d lp->lnum %d", + ubifs_err(c, "lprops %zx lp %zx lprops->lnum %d lp->lnum %d", (size_t)lprops, (size_t)lp, lprops->lnum, lp->lnum); err = 4; @@ -1008,7 +1008,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, } out: if (err) { - ubifs_err("failed cat %d hpos %d err %d", cat, i, err); + ubifs_err(c, "failed cat %d hpos %d err %d", cat, i, err); dump_stack(); ubifs_dump_heap(c, heap, cat); } @@ -1039,7 +1039,7 @@ static int scan_check_cb(struct ubifs_info *c, if (cat != LPROPS_UNCAT) { cat = ubifs_categorize_lprops(c, lp); if (cat != (lp->flags & LPROPS_CAT_MASK)) { - ubifs_err("bad LEB category %d expected %d", + ubifs_err(c, "bad LEB category %d expected %d", (lp->flags & LPROPS_CAT_MASK), cat); return -EINVAL; } @@ -1074,7 +1074,7 @@ static int scan_check_cb(struct ubifs_info *c, } } if (!found) { - ubifs_err("bad LPT list (category %d)", cat); + ubifs_err(c, "bad LPT list (category %d)", cat); return -EINVAL; } } @@ -1086,7 +1086,7 @@ static int scan_check_cb(struct ubifs_info *c, if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || lp != heap->arr[lp->hpos]) { - ubifs_err("bad LPT heap (category %d)", cat); + ubifs_err(c, "bad LPT heap (category %d)", cat); return -EINVAL; } } @@ -1133,7 +1133,7 @@ static int scan_check_cb(struct ubifs_info *c, is_idx = (snod->type == UBIFS_IDX_NODE) ? 1 : 0; if (is_idx && snod->type != UBIFS_IDX_NODE) { - ubifs_err("indexing node in data LEB %d:%d", + ubifs_err(c, "indexing node in data LEB %d:%d", lnum, snod->offs); goto out_destroy; } @@ -1159,7 +1159,7 @@ static int scan_check_cb(struct ubifs_info *c, if (free > c->leb_size || free < 0 || dirty > c->leb_size || dirty < 0) { - ubifs_err("bad calculated accounting for LEB %d: free %d, dirty %d", + ubifs_err(c, "bad calculated accounting for LEB %d: free %d, dirty %d", lnum, free, dirty); goto out_destroy; } @@ -1206,13 +1206,13 @@ static int scan_check_cb(struct ubifs_info *c, /* Free but not unmapped LEB, it's fine */ is_idx = 0; else { - ubifs_err("indexing node without indexing flag"); + ubifs_err(c, "indexing node without indexing flag"); goto out_print; } } if (!is_idx && (lp->flags & LPROPS_INDEX)) { - ubifs_err("data node with indexing flag"); + ubifs_err(c, "data node with indexing flag"); goto out_print; } @@ -1241,7 +1241,7 @@ static int scan_check_cb(struct ubifs_info *c, return LPT_SCAN_CONTINUE; out_print: - ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, should be free %d, dirty %d", + ubifs_err(c, "bad accounting of LEB %d: free %d, dirty %d flags %#x, should be free %d, dirty %d", lnum, lp->free, lp->dirty, lp->flags, free, dirty); ubifs_dump_leb(c, lnum); out_destroy: @@ -1293,11 +1293,11 @@ int dbg_check_lprops(struct ubifs_info *c) lst.total_free != c->lst.total_free || lst.total_dirty != c->lst.total_dirty || lst.total_used != c->lst.total_used) { - ubifs_err("bad overall accounting"); - ubifs_err("calculated: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld", + ubifs_err(c, "bad overall accounting"); + ubifs_err(c, "calculated: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld", lst.empty_lebs, lst.idx_lebs, lst.total_free, lst.total_dirty, lst.total_used); - ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld", + ubifs_err(c, "read from lprops: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld", c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, c->lst.total_dirty, c->lst.total_used); err = -EINVAL; @@ -1306,10 +1306,10 @@ int dbg_check_lprops(struct ubifs_info *c) if (lst.total_dead != c->lst.total_dead || lst.total_dark != c->lst.total_dark) { - ubifs_err("bad dead/dark space accounting"); - ubifs_err("calculated: total_dead %lld, total_dark %lld", + ubifs_err(c, "bad dead/dark space accounting"); + ubifs_err(c, "calculated: total_dead %lld, total_dark %lld", lst.total_dead, lst.total_dark); - ubifs_err("read from lprops: total_dead %lld, total_dark %lld", + ubifs_err(c, "read from lprops: total_dead %lld, total_dark %lld", c->lst.total_dead, c->lst.total_dark); err = -EINVAL; goto out; diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index 421bd0a80424..dc9f27e9d61b 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -145,13 +145,13 @@ int ubifs_calc_lpt_geom(struct ubifs_info *c) sz = c->lpt_sz * 2; /* Must have at least 2 times the size */ lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size); if (lebs_needed > c->lpt_lebs) { - ubifs_err("too few LPT LEBs"); + ubifs_err(c, "too few LPT LEBs"); return -EINVAL; } /* Verify that ltab fits in a single LEB (since ltab is a single node */ if (c->ltab_sz > c->leb_size) { - ubifs_err("LPT ltab too big"); + ubifs_err(c, "LPT ltab too big"); return -EINVAL; } @@ -213,7 +213,7 @@ static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs, continue; } if (c->ltab_sz > c->leb_size) { - ubifs_err("LPT ltab too big"); + ubifs_err(c, "LPT ltab too big"); return -EINVAL; } *main_lebs = c->main_lebs; @@ -911,7 +911,7 @@ static void replace_cats(struct ubifs_info *c, struct ubifs_pnode *old_pnode, * * This function returns %0 on success and a negative error code on failure. */ -static int check_lpt_crc(void *buf, int len) +static int check_lpt_crc(const struct ubifs_info *c, void *buf, int len) { int pos = 0; uint8_t *addr = buf; @@ -921,8 +921,8 @@ static int check_lpt_crc(void *buf, int len) calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, len - UBIFS_LPT_CRC_BYTES); if (crc != calc_crc) { - ubifs_err("invalid crc in LPT node: crc %hx calc %hx", crc, - calc_crc); + ubifs_err(c, "invalid crc in LPT node: crc %hx calc %hx", + crc, calc_crc); dump_stack(); return -EINVAL; } @@ -938,14 +938,15 @@ static int check_lpt_crc(void *buf, int len) * * This function returns %0 on success and a negative error code on failure. */ -static int check_lpt_type(uint8_t **addr, int *pos, int type) +static int check_lpt_type(const struct ubifs_info *c, uint8_t **addr, + int *pos, int type) { int node_type; node_type = ubifs_unpack_bits(addr, pos, UBIFS_LPT_TYPE_BITS); if (node_type != type) { - ubifs_err("invalid type (%d) in LPT node type %d", node_type, - type); + ubifs_err(c, "invalid type (%d) in LPT node type %d", + node_type, type); dump_stack(); return -EINVAL; } @@ -966,7 +967,7 @@ static int unpack_pnode(const struct ubifs_info *c, void *buf, uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; int i, pos = 0, err; - err = check_lpt_type(&addr, &pos, UBIFS_LPT_PNODE); + err = check_lpt_type(c, &addr, &pos, UBIFS_LPT_PNODE); if (err) return err; if (c->big_lpt) @@ -985,7 +986,7 @@ static int unpack_pnode(const struct ubifs_info *c, void *buf, lprops->flags = 0; lprops->flags |= ubifs_categorize_lprops(c, lprops); } - err = check_lpt_crc(buf, c->pnode_sz); + err = check_lpt_crc(c, buf, c->pnode_sz); return err; } @@ -1003,7 +1004,7 @@ int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf, uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; int i, pos = 0, err; - err = check_lpt_type(&addr, &pos, UBIFS_LPT_NNODE); + err = check_lpt_type(c, &addr, &pos, UBIFS_LPT_NNODE); if (err) return err; if (c->big_lpt) @@ -1019,7 +1020,7 @@ int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf, nnode->nbranch[i].offs = ubifs_unpack_bits(&addr, &pos, c->lpt_offs_bits); } - err = check_lpt_crc(buf, c->nnode_sz); + err = check_lpt_crc(c, buf, c->nnode_sz); return err; } @@ -1035,7 +1036,7 @@ static int unpack_ltab(const struct ubifs_info *c, void *buf) uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; int i, pos = 0, err; - err = check_lpt_type(&addr, &pos, UBIFS_LPT_LTAB); + err = check_lpt_type(c, &addr, &pos, UBIFS_LPT_LTAB); if (err) return err; for (i = 0; i < c->lpt_lebs; i++) { @@ -1051,7 +1052,7 @@ static int unpack_ltab(const struct ubifs_info *c, void *buf) c->ltab[i].tgc = 0; c->ltab[i].cmt = 0; } - err = check_lpt_crc(buf, c->ltab_sz); + err = check_lpt_crc(c, buf, c->ltab_sz); return err; } @@ -1067,7 +1068,7 @@ static int unpack_lsave(const struct ubifs_info *c, void *buf) uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; int i, pos = 0, err; - err = check_lpt_type(&addr, &pos, UBIFS_LPT_LSAVE); + err = check_lpt_type(c, &addr, &pos, UBIFS_LPT_LSAVE); if (err) return err; for (i = 0; i < c->lsave_cnt; i++) { @@ -1077,7 +1078,7 @@ static int unpack_lsave(const struct ubifs_info *c, void *buf) return -EINVAL; c->lsave[i] = lnum; } - err = check_lpt_crc(buf, c->lsave_sz); + err = check_lpt_crc(c, buf, c->lsave_sz); return err; } @@ -1243,7 +1244,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) return 0; out: - ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); + ubifs_err(c, "error %d reading nnode at %d:%d", err, lnum, offs); dump_stack(); kfree(nnode); return err; @@ -1308,10 +1309,10 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) return 0; out: - ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); + ubifs_err(c, "error %d reading pnode at %d:%d", err, lnum, offs); ubifs_dump_pnode(c, pnode, parent, iip); dump_stack(); - ubifs_err("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); + ubifs_err(c, "calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); kfree(pnode); return err; } @@ -2095,7 +2096,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, int i; if (pnode->num != col) { - ubifs_err("pnode num %d expected %d parent num %d iip %d", + ubifs_err(c, "pnode num %d expected %d parent num %d iip %d", pnode->num, col, pnode->parent->num, pnode->iip); return -EINVAL; } @@ -2110,13 +2111,13 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, if (lnum >= c->leb_cnt) continue; if (lprops->lnum != lnum) { - ubifs_err("bad LEB number %d expected %d", + ubifs_err(c, "bad LEB number %d expected %d", lprops->lnum, lnum); return -EINVAL; } if (lprops->flags & LPROPS_TAKEN) { if (cat != LPROPS_UNCAT) { - ubifs_err("LEB %d taken but not uncat %d", + ubifs_err(c, "LEB %d taken but not uncat %d", lprops->lnum, cat); return -EINVAL; } @@ -2129,7 +2130,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, case LPROPS_FRDI_IDX: break; default: - ubifs_err("LEB %d index but cat %d", + ubifs_err(c, "LEB %d index but cat %d", lprops->lnum, cat); return -EINVAL; } @@ -2142,7 +2143,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, case LPROPS_FREEABLE: break; default: - ubifs_err("LEB %d not index but cat %d", + ubifs_err(c, "LEB %d not index but cat %d", lprops->lnum, cat); return -EINVAL; } @@ -2183,14 +2184,14 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, break; } if (!found) { - ubifs_err("LEB %d cat %d not found in cat heap/list", + ubifs_err(c, "LEB %d cat %d not found in cat heap/list", lprops->lnum, cat); return -EINVAL; } switch (cat) { case LPROPS_EMPTY: if (lprops->free != c->leb_size) { - ubifs_err("LEB %d cat %d free %d dirty %d", + ubifs_err(c, "LEB %d cat %d free %d dirty %d", lprops->lnum, cat, lprops->free, lprops->dirty); return -EINVAL; @@ -2199,7 +2200,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, case LPROPS_FREEABLE: case LPROPS_FRDI_IDX: if (lprops->free + lprops->dirty != c->leb_size) { - ubifs_err("LEB %d cat %d free %d dirty %d", + ubifs_err(c, "LEB %d cat %d free %d dirty %d", lprops->lnum, cat, lprops->free, lprops->dirty); return -EINVAL; @@ -2236,7 +2237,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, /* cnode is a nnode */ num = calc_nnode_num(row, col); if (cnode->num != num) { - ubifs_err("nnode num %d expected %d parent num %d iip %d", + ubifs_err(c, "nnode num %d expected %d parent num %d iip %d", cnode->num, num, (nnode ? nnode->num : 0), cnode->iip); return -EINVAL; diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index d9c02928e992..ce89bdc3eb02 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -319,7 +319,7 @@ static int layout_cnodes(struct ubifs_info *c) return 0; no_space: - ubifs_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, done_lsave %d", + ubifs_err(c, "LPT out of space at LEB %d:%d needing %d, done_ltab %d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave); ubifs_dump_lpt_info(c); ubifs_dump_lpt_lebs(c); @@ -543,7 +543,7 @@ static int write_cnodes(struct ubifs_info *c) return 0; no_space: - ubifs_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab %d, done_lsave %d", + ubifs_err(c, "LPT out of space mismatch at LEB %d:%d needing %d, done_ltab %d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave); ubifs_dump_lpt_info(c); ubifs_dump_lpt_lebs(c); @@ -1638,7 +1638,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); if (!buf) { - ubifs_err("cannot allocate memory for ltab checking"); + ubifs_err(c, "cannot allocate memory for ltab checking"); return 0; } @@ -1660,18 +1660,18 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) continue; } if (!dbg_is_all_ff(p, len)) { - ubifs_err("invalid empty space in LEB %d at %d", + ubifs_err(c, "invalid empty space in LEB %d at %d", lnum, c->leb_size - len); err = -EINVAL; } i = lnum - c->lpt_first; if (len != c->ltab[i].free) { - ubifs_err("invalid free space in LEB %d (free %d, expected %d)", + ubifs_err(c, "invalid free space in LEB %d (free %d, expected %d)", lnum, len, c->ltab[i].free); err = -EINVAL; } if (dirty != c->ltab[i].dirty) { - ubifs_err("invalid dirty space in LEB %d (dirty %d, expected %d)", + ubifs_err(c, "invalid dirty space in LEB %d (dirty %d, expected %d)", lnum, dirty, c->ltab[i].dirty); err = -EINVAL; } @@ -1725,7 +1725,7 @@ int dbg_check_ltab(struct ubifs_info *c) for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { err = dbg_check_ltab_lnum(c, lnum); if (err) { - ubifs_err("failed at LEB %d", lnum); + ubifs_err(c, "failed at LEB %d", lnum); return err; } } @@ -1757,7 +1757,7 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) free += c->leb_size; } if (free < c->lpt_sz) { - ubifs_err("LPT space error: free %lld lpt_sz %lld", + ubifs_err(c, "LPT space error: free %lld lpt_sz %lld", free, c->lpt_sz); ubifs_dump_lpt_info(c); ubifs_dump_lpt_lebs(c); @@ -1797,12 +1797,12 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) d->chk_lpt_lebs = 0; d->chk_lpt_wastage = 0; if (c->dirty_pn_cnt > c->pnode_cnt) { - ubifs_err("dirty pnodes %d exceed max %d", + ubifs_err(c, "dirty pnodes %d exceed max %d", c->dirty_pn_cnt, c->pnode_cnt); err = -EINVAL; } if (c->dirty_nn_cnt > c->nnode_cnt) { - ubifs_err("dirty nnodes %d exceed max %d", + ubifs_err(c, "dirty nnodes %d exceed max %d", c->dirty_nn_cnt, c->nnode_cnt); err = -EINVAL; } @@ -1820,22 +1820,22 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) chk_lpt_sz *= d->chk_lpt_lebs; chk_lpt_sz += len - c->nhead_offs; if (d->chk_lpt_sz != chk_lpt_sz) { - ubifs_err("LPT wrote %lld but space used was %lld", + ubifs_err(c, "LPT wrote %lld but space used was %lld", d->chk_lpt_sz, chk_lpt_sz); err = -EINVAL; } if (d->chk_lpt_sz > c->lpt_sz) { - ubifs_err("LPT wrote %lld but lpt_sz is %lld", + ubifs_err(c, "LPT wrote %lld but lpt_sz is %lld", d->chk_lpt_sz, c->lpt_sz); err = -EINVAL; } if (d->chk_lpt_sz2 && d->chk_lpt_sz != d->chk_lpt_sz2) { - ubifs_err("LPT layout size %lld but wrote %lld", + ubifs_err(c, "LPT layout size %lld but wrote %lld", d->chk_lpt_sz, d->chk_lpt_sz2); err = -EINVAL; } if (d->chk_lpt_sz2 && d->new_nhead_offs != len) { - ubifs_err("LPT new nhead offs: expected %d was %d", + ubifs_err(c, "LPT new nhead offs: expected %d was %d", d->new_nhead_offs, len); err = -EINVAL; } @@ -1845,7 +1845,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) if (c->big_lpt) lpt_sz += c->lsave_sz; if (d->chk_lpt_sz - d->chk_lpt_wastage > lpt_sz) { - ubifs_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld", + ubifs_err(c, "LPT chk_lpt_sz %lld + waste %lld exceeds %lld", d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz); err = -EINVAL; } @@ -1887,7 +1887,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum); buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); if (!buf) { - ubifs_err("cannot allocate memory to dump LPT"); + ubifs_err(c, "cannot allocate memory to dump LPT"); return; } @@ -1962,7 +1962,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) pr_err("LEB %d:%d, lsave len\n", lnum, offs); break; default: - ubifs_err("LPT node type %d not recognized", node_type); + ubifs_err(c, "LPT node type %d not recognized", node_type); goto out; } diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index 1a4bb9e8b3b8..c6a5e39e2ba5 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -82,7 +82,7 @@ out: return -EUCLEAN; out_dump: - ubifs_err("unexpected node type %d master LEB %d:%d", + ubifs_err(c, "unexpected node type %d master LEB %d:%d", snod->type, lnum, snod->offs); ubifs_scan_destroy(sleb); return -EINVAL; @@ -240,7 +240,7 @@ static int validate_master(const struct ubifs_info *c) return 0; out: - ubifs_err("bad master node at offset %d error %d", c->mst_offs, err); + ubifs_err(c, "bad master node at offset %d error %d", c->mst_offs, err); ubifs_dump_node(c, c->mst_node); return -EINVAL; } @@ -316,7 +316,7 @@ int ubifs_read_master(struct ubifs_info *c) if (c->leb_cnt < old_leb_cnt || c->leb_cnt < UBIFS_MIN_LEB_CNT) { - ubifs_err("bad leb_cnt on master node"); + ubifs_err(c, "bad leb_cnt on master node"); ubifs_dump_node(c, c->mst_node); return -EINVAL; } diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 4409f486ecef..caf2d123e9ee 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -88,7 +88,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) else if (inum > o->inum) p = &(*p)->rb_right; else { - ubifs_err("orphaned twice"); + ubifs_err(c, "orphaned twice"); spin_unlock(&c->orphan_lock); kfree(orphan); return 0; @@ -155,7 +155,7 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) } } spin_unlock(&c->orphan_lock); - ubifs_err("missing orphan ino %lu", (unsigned long)inum); + ubifs_err(c, "missing orphan ino %lu", (unsigned long)inum); dump_stack(); } @@ -287,7 +287,7 @@ static int write_orph_node(struct ubifs_info *c, int atomic) * We limit the number of orphans so that this should * never happen. */ - ubifs_err("out of space in orphan area"); + ubifs_err(c, "out of space in orphan area"); return -EINVAL; } } @@ -397,7 +397,7 @@ static int consolidate(struct ubifs_info *c) * We limit the number of orphans so that this should * never happen. */ - ubifs_err("out of space in orphan area"); + ubifs_err(c, "out of space in orphan area"); err = -EINVAL; } spin_unlock(&c->orphan_lock); @@ -569,7 +569,7 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, list_for_each_entry(snod, &sleb->nodes, list) { if (snod->type != UBIFS_ORPH_NODE) { - ubifs_err("invalid node type %d in orphan area at %d:%d", + ubifs_err(c, "invalid node type %d in orphan area at %d:%d", snod->type, sleb->lnum, snod->offs); ubifs_dump_node(c, snod->node); return -EINVAL; @@ -596,7 +596,7 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, * number. That makes this orphan node, out of date. */ if (!first) { - ubifs_err("out of order commit number %llu in orphan node at %d:%d", + ubifs_err(c, "out of order commit number %llu in orphan node at %d:%d", cmt_no, sleb->lnum, snod->offs); ubifs_dump_node(c, snod->node); return -EINVAL; @@ -831,20 +831,20 @@ static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr, if (inum != ci->last_ino) { /* Lowest node type is the inode node, so it comes first */ if (key_type(c, &zbr->key) != UBIFS_INO_KEY) - ubifs_err("found orphan node ino %lu, type %d", + ubifs_err(c, "found orphan node ino %lu, type %d", (unsigned long)inum, key_type(c, &zbr->key)); ci->last_ino = inum; ci->tot_inos += 1; err = ubifs_tnc_read_node(c, zbr, ci->node); if (err) { - ubifs_err("node read failed, error %d", err); + ubifs_err(c, "node read failed, error %d", err); return err; } if (ci->node->nlink == 0) /* Must be recorded as an orphan */ if (!dbg_find_check_orphan(&ci->root, inum) && !dbg_find_orphan(c, inum)) { - ubifs_err("missing orphan, ino %lu", + ubifs_err(c, "missing orphan, ino %lu", (unsigned long)inum); ci->missing += 1; } @@ -887,7 +887,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); if (!buf) { - ubifs_err("cannot allocate memory to check orphans"); + ubifs_err(c, "cannot allocate memory to check orphans"); return 0; } @@ -925,7 +925,7 @@ static int dbg_check_orphans(struct ubifs_info *c) ci.root = RB_ROOT; ci.node = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); if (!ci.node) { - ubifs_err("out of memory"); + ubifs_err(c, "out of memory"); return -ENOMEM; } @@ -935,12 +935,12 @@ static int dbg_check_orphans(struct ubifs_info *c) err = dbg_walk_index(c, &dbg_orphan_check, NULL, &ci); if (err) { - ubifs_err("cannot scan TNC, error %d", err); + ubifs_err(c, "cannot scan TNC, error %d", err); goto out; } if (ci.missing) { - ubifs_err("%lu missing orphan(s)", ci.missing); + ubifs_err(c, "%lu missing orphan(s)", ci.missing); err = -EINVAL; goto out; } diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index c640938f62f0..695fc71d5244 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -305,7 +305,7 @@ int ubifs_recover_master_node(struct ubifs_info *c) mst = mst2; } - ubifs_msg("recovered master node from LEB %d", + ubifs_msg(c, "recovered master node from LEB %d", (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); @@ -360,13 +360,13 @@ int ubifs_recover_master_node(struct ubifs_info *c) out_err: err = -EINVAL; out_free: - ubifs_err("failed to recover master node"); + ubifs_err(c, "failed to recover master node"); if (mst1) { - ubifs_err("dumping first master node"); + ubifs_err(c, "dumping first master node"); ubifs_dump_node(c, mst1); } if (mst2) { - ubifs_err("dumping second master node"); + ubifs_err(c, "dumping second master node"); ubifs_dump_node(c, mst2); } vfree(buf2); @@ -682,7 +682,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, ret, lnum, offs); break; } else { - ubifs_err("unexpected return value %d", ret); + ubifs_err(c, "unexpected return value %d", ret); err = -EINVAL; goto error; } @@ -702,7 +702,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, * See header comment for this file for more * explanations about the reasons we have this check. */ - ubifs_err("corrupt empty space LEB %d:%d, corruption starts at %d", + ubifs_err(c, "corrupt empty space LEB %d:%d, corruption starts at %d", lnum, offs, corruption); /* Make sure we dump interesting non-0xFF data */ offs += corruption; @@ -788,13 +788,13 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, corrupted_rescan: /* Re-scan the corrupted data with verbose messages */ - ubifs_err("corruption %d", ret); + ubifs_err(c, "corruption %d", ret); ubifs_scan_a_node(c, buf, len, lnum, offs, 1); corrupted: ubifs_scanned_corruption(c, lnum, offs, buf); err = -EUCLEAN; error: - ubifs_err("LEB %d scanning failed", lnum); + ubifs_err(c, "LEB %d scanning failed", lnum); ubifs_scan_destroy(sleb); return ERR_PTR(err); } @@ -826,15 +826,15 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, goto out_free; ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); if (ret != SCANNED_A_NODE) { - ubifs_err("Not a valid node"); + ubifs_err(c, "Not a valid node"); goto out_err; } if (cs_node->ch.node_type != UBIFS_CS_NODE) { - ubifs_err("Node a CS node, type is %d", cs_node->ch.node_type); + ubifs_err(c, "Node a CS node, type is %d", cs_node->ch.node_type); goto out_err; } if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) { - ubifs_err("CS node cmt_no %llu != current cmt_no %llu", + ubifs_err(c, "CS node cmt_no %llu != current cmt_no %llu", (unsigned long long)le64_to_cpu(cs_node->cmt_no), c->cmt_no); goto out_err; @@ -847,7 +847,7 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, out_err: err = -EINVAL; out_free: - ubifs_err("failed to get CS sqnum"); + ubifs_err(c, "failed to get CS sqnum"); kfree(cs_node); return err; } @@ -899,7 +899,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, } } if (snod->sqnum > cs_sqnum) { - ubifs_err("unrecoverable log corruption in LEB %d", + ubifs_err(c, "unrecoverable log corruption in LEB %d", lnum); ubifs_scan_destroy(sleb); return ERR_PTR(-EUCLEAN); @@ -975,11 +975,8 @@ int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf) return err; dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs); - err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf); - if (err) - return err; - return 0; + return recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf); } /** @@ -1004,10 +1001,7 @@ static int clean_an_unclean_leb(struct ubifs_info *c, if (len == 0) { /* Nothing to read, just unmap it */ - err = ubifs_leb_unmap(c, lnum); - if (err) - return err; - return 0; + return ubifs_leb_unmap(c, lnum); } err = ubifs_leb_read(c, lnum, buf, offs, len, 0); @@ -1043,7 +1037,7 @@ static int clean_an_unclean_leb(struct ubifs_info *c, } if (ret == SCANNED_EMPTY_SPACE) { - ubifs_err("unexpected empty space at %d:%d", + ubifs_err(c, "unexpected empty space at %d:%d", lnum, offs); return -EUCLEAN; } @@ -1137,7 +1131,7 @@ static int grab_empty_leb(struct ubifs_info *c) */ lnum = ubifs_find_free_leb_for_idx(c); if (lnum < 0) { - ubifs_err("could not find an empty LEB"); + ubifs_err(c, "could not find an empty LEB"); ubifs_dump_lprops(c); ubifs_dump_budg(c, &c->bi); return lnum; @@ -1217,7 +1211,7 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) } mutex_unlock(&wbuf->io_mutex); if (err < 0) { - ubifs_err("GC failed, error %d", err); + ubifs_err(c, "GC failed, error %d", err); if (err == -EAGAIN) err = -EINVAL; return err; @@ -1464,7 +1458,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) return 0; out: - ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d", + ubifs_warn(c, "inode %lu failed to fix size %lld -> %lld error %d", (unsigned long)e->inum, e->i_size, e->d_size, err); return err; } diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 9b40a1c5e160..3ca4540130b5 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -458,13 +458,13 @@ int ubifs_validate_entry(struct ubifs_info *c, nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || strnlen(dent->name, nlen) != nlen || le64_to_cpu(dent->inum) > MAX_INUM) { - ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ? + ubifs_err(c, "bad %s node", key_type == UBIFS_DENT_KEY ? "directory entry" : "extended attribute entry"); return -EINVAL; } if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { - ubifs_err("bad key type %d", key_type); + ubifs_err(c, "bad key type %d", key_type); return -EINVAL; } @@ -589,7 +589,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b) cond_resched(); if (snod->sqnum >= SQNUM_WATERMARK) { - ubifs_err("file system's life ended"); + ubifs_err(c, "file system's life ended"); goto out_dump; } @@ -647,7 +647,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b) if (old_size < 0 || old_size > c->max_inode_sz || new_size < 0 || new_size > c->max_inode_sz || old_size <= new_size) { - ubifs_err("bad truncation node"); + ubifs_err(c, "bad truncation node"); goto out_dump; } @@ -662,7 +662,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b) break; } default: - ubifs_err("unexpected node type %d in bud LEB %d:%d", + ubifs_err(c, "unexpected node type %d in bud LEB %d:%d", snod->type, lnum, snod->offs); err = -EINVAL; goto out_dump; @@ -685,7 +685,7 @@ out: return err; out_dump: - ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); + ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs); ubifs_dump_node(c, snod->node); ubifs_scan_destroy(sleb); return -EINVAL; @@ -805,7 +805,7 @@ static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) if (bud) { if (bud->jhead == jhead && bud->start <= offs) return 1; - ubifs_err("bud at LEB %d:%d was already referred", lnum, offs); + ubifs_err(c, "bud at LEB %d:%d was already referred", lnum, offs); return -EINVAL; } @@ -861,12 +861,12 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) * numbers. */ if (snod->type != UBIFS_CS_NODE) { - ubifs_err("first log node at LEB %d:%d is not CS node", + ubifs_err(c, "first log node at LEB %d:%d is not CS node", lnum, offs); goto out_dump; } if (le64_to_cpu(node->cmt_no) != c->cmt_no) { - ubifs_err("first CS node at LEB %d:%d has wrong commit number %llu expected %llu", + ubifs_err(c, "first CS node at LEB %d:%d has wrong commit number %llu expected %llu", lnum, offs, (unsigned long long)le64_to_cpu(node->cmt_no), c->cmt_no); @@ -891,7 +891,7 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) /* Make sure the first node sits at offset zero of the LEB */ if (snod->offs != 0) { - ubifs_err("first node is not at zero offset"); + ubifs_err(c, "first node is not at zero offset"); goto out_dump; } @@ -899,12 +899,12 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) cond_resched(); if (snod->sqnum >= SQNUM_WATERMARK) { - ubifs_err("file system's life ended"); + ubifs_err(c, "file system's life ended"); goto out_dump; } if (snod->sqnum < c->cs_sqnum) { - ubifs_err("bad sqnum %llu, commit sqnum %llu", + ubifs_err(c, "bad sqnum %llu, commit sqnum %llu", snod->sqnum, c->cs_sqnum); goto out_dump; } @@ -934,12 +934,12 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) case UBIFS_CS_NODE: /* Make sure it sits at the beginning of LEB */ if (snod->offs != 0) { - ubifs_err("unexpected node in log"); + ubifs_err(c, "unexpected node in log"); goto out_dump; } break; default: - ubifs_err("unexpected node in log"); + ubifs_err(c, "unexpected node in log"); goto out_dump; } } @@ -955,7 +955,7 @@ out: return err; out_dump: - ubifs_err("log error detected while replaying the log at LEB %d:%d", + ubifs_err(c, "log error detected while replaying the log at LEB %d:%d", lnum, offs + snod->offs); ubifs_dump_node(c, snod->node); ubifs_scan_destroy(sleb); @@ -1017,7 +1017,7 @@ int ubifs_replay_journal(struct ubifs_info *c) return free; /* Error code */ if (c->ihead_offs != c->leb_size - free) { - ubifs_err("bad index head LEB %d:%d", c->ihead_lnum, + ubifs_err(c, "bad index head LEB %d:%d", c->ihead_lnum, c->ihead_offs); return -EINVAL; } @@ -1040,7 +1040,7 @@ int ubifs_replay_journal(struct ubifs_info *c) * someting went wrong and we cannot proceed mounting * the file-system. */ - ubifs_err("no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted", + ubifs_err(c, "no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted", lnum, 0); err = -EINVAL; } diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 79c6dbbc0e04..f4fbc7b6b794 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -335,7 +335,7 @@ static int create_default_filesystem(struct ubifs_info *c) if (err) return err; - ubifs_msg("default file-system created"); + ubifs_msg(c, "default file-system created"); return 0; } @@ -365,13 +365,13 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) } if (le32_to_cpu(sup->min_io_size) != c->min_io_size) { - ubifs_err("min. I/O unit mismatch: %d in superblock, %d real", + ubifs_err(c, "min. I/O unit mismatch: %d in superblock, %d real", le32_to_cpu(sup->min_io_size), c->min_io_size); goto failed; } if (le32_to_cpu(sup->leb_size) != c->leb_size) { - ubifs_err("LEB size mismatch: %d in superblock, %d real", + ubifs_err(c, "LEB size mismatch: %d in superblock, %d real", le32_to_cpu(sup->leb_size), c->leb_size); goto failed; } @@ -393,33 +393,33 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6; if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) { - ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, %d minimum required", + ubifs_err(c, "bad LEB count: %d in superblock, %d on UBI volume, %d minimum required", c->leb_cnt, c->vi.size, min_leb_cnt); goto failed; } if (c->max_leb_cnt < c->leb_cnt) { - ubifs_err("max. LEB count %d less than LEB count %d", + ubifs_err(c, "max. LEB count %d less than LEB count %d", c->max_leb_cnt, c->leb_cnt); goto failed; } if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { - ubifs_err("too few main LEBs count %d, must be at least %d", + ubifs_err(c, "too few main LEBs count %d, must be at least %d", c->main_lebs, UBIFS_MIN_MAIN_LEBS); goto failed; } max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS; if (c->max_bud_bytes < max_bytes) { - ubifs_err("too small journal (%lld bytes), must be at least %lld bytes", + ubifs_err(c, "too small journal (%lld bytes), must be at least %lld bytes", c->max_bud_bytes, max_bytes); goto failed; } max_bytes = (long long)c->leb_size * c->main_lebs; if (c->max_bud_bytes > max_bytes) { - ubifs_err("too large journal size (%lld bytes), only %lld bytes available in the main area", + ubifs_err(c, "too large journal size (%lld bytes), only %lld bytes available in the main area", c->max_bud_bytes, max_bytes); goto failed; } @@ -468,7 +468,7 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) return 0; failed: - ubifs_err("bad superblock, error %d", err); + ubifs_err(c, "bad superblock, error %d", err); ubifs_dump_node(c, sup); return -EINVAL; } @@ -549,12 +549,12 @@ int ubifs_read_superblock(struct ubifs_info *c) ubifs_assert(!c->ro_media || c->ro_mount); if (!c->ro_mount || c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { - ubifs_err("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d", + ubifs_err(c, "on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d", c->fmt_version, c->ro_compat_version, UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) { - ubifs_msg("only R/O mounting is possible"); + ubifs_msg(c, "only R/O mounting is possible"); err = -EROFS; } else err = -EINVAL; @@ -570,7 +570,7 @@ int ubifs_read_superblock(struct ubifs_info *c) } if (c->fmt_version < 3) { - ubifs_err("on-flash format version %d is not supported", + ubifs_err(c, "on-flash format version %d is not supported", c->fmt_version); err = -EINVAL; goto out; @@ -595,7 +595,7 @@ int ubifs_read_superblock(struct ubifs_info *c) c->key_len = UBIFS_SK_LEN; break; default: - ubifs_err("unsupported key format"); + ubifs_err(c, "unsupported key format"); err = -EINVAL; goto out; } @@ -785,7 +785,7 @@ int ubifs_fixup_free_space(struct ubifs_info *c) ubifs_assert(c->space_fixup); ubifs_assert(!c->ro_mount); - ubifs_msg("start fixing up free space"); + ubifs_msg(c, "start fixing up free space"); err = fixup_free_space(c); if (err) @@ -804,6 +804,6 @@ int ubifs_fixup_free_space(struct ubifs_info *c) if (err) return err; - ubifs_msg("free space fixup complete"); + ubifs_msg(c, "free space fixup complete"); return err; } diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 89adbc4d08ac..aab87340d3de 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -100,7 +100,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, if (pad_len < 0 || offs + node_len + pad_len > c->leb_size) { if (!quiet) { - ubifs_err("bad pad node at LEB %d:%d", + ubifs_err(c, "bad pad node at LEB %d:%d", lnum, offs); ubifs_dump_node(c, pad); } @@ -110,7 +110,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, /* Make the node pads to 8-byte boundary */ if ((node_len + pad_len) & 7) { if (!quiet) - ubifs_err("bad padding length %d - %d", + ubifs_err(c, "bad padding length %d - %d", offs, offs + node_len + pad_len); return SCANNED_A_BAD_PAD_NODE; } @@ -152,7 +152,7 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0); if (err && err != -EBADMSG) { - ubifs_err("cannot read %d bytes from LEB %d:%d, error %d", + ubifs_err(c, "cannot read %d bytes from LEB %d:%d, error %d", c->leb_size - offs, lnum, offs, err); kfree(sleb); return ERR_PTR(err); @@ -240,11 +240,11 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, { int len; - ubifs_err("corruption at LEB %d:%d", lnum, offs); + ubifs_err(c, "corruption at LEB %d:%d", lnum, offs); len = c->leb_size - offs; if (len > 8192) len = 8192; - ubifs_err("first %d bytes from LEB %d:%d", len, lnum, offs); + ubifs_err(c, "first %d bytes from LEB %d:%d", len, lnum, offs); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); } @@ -299,16 +299,16 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, switch (ret) { case SCANNED_GARBAGE: - ubifs_err("garbage"); + ubifs_err(c, "garbage"); goto corrupted; case SCANNED_A_NODE: break; case SCANNED_A_CORRUPT_NODE: case SCANNED_A_BAD_PAD_NODE: - ubifs_err("bad node"); + ubifs_err(c, "bad node"); goto corrupted; default: - ubifs_err("unknown"); + ubifs_err(c, "unknown"); err = -EINVAL; goto error; } @@ -325,7 +325,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, if (offs % c->min_io_size) { if (!quiet) - ubifs_err("empty space starts at non-aligned offset %d", + ubifs_err(c, "empty space starts at non-aligned offset %d", offs); goto corrupted; } @@ -338,7 +338,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, for (; len; offs++, buf++, len--) if (*(uint8_t *)buf != 0xff) { if (!quiet) - ubifs_err("corrupt empty space at LEB %d:%d", + ubifs_err(c, "corrupt empty space at LEB %d:%d", lnum, offs); goto corrupted; } @@ -348,14 +348,14 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, corrupted: if (!quiet) { ubifs_scanned_corruption(c, lnum, offs, buf); - ubifs_err("LEB %d scanning failed", lnum); + ubifs_err(c, "LEB %d scanning failed", lnum); } err = -EUCLEAN; ubifs_scan_destroy(sleb); return ERR_PTR(err); error: - ubifs_err("LEB %d scanning failed, error %d", lnum, err); + ubifs_err(c, "LEB %d scanning failed, error %d", lnum, err); ubifs_scan_destroy(sleb); return ERR_PTR(err); } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 93e946561c5c..75e6f04bb795 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -70,13 +70,13 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) const struct ubifs_inode *ui = ubifs_inode(inode); if (inode->i_size > c->max_inode_sz) { - ubifs_err("inode is too large (%lld)", + ubifs_err(c, "inode is too large (%lld)", (long long)inode->i_size); return 1; } if (ui->compr_type >= UBIFS_COMPR_TYPES_CNT) { - ubifs_err("unknown compression type %d", ui->compr_type); + ubifs_err(c, "unknown compression type %d", ui->compr_type); return 2; } @@ -90,7 +90,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) return 5; if (!ubifs_compr_present(ui->compr_type)) { - ubifs_warn("inode %lu uses '%s' compression, but it was not compiled in", + ubifs_warn(c, "inode %lu uses '%s' compression, but it was not compiled in", inode->i_ino, ubifs_compr_name(ui->compr_type)); } @@ -242,14 +242,14 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) return inode; out_invalid: - ubifs_err("inode %lu validation failed, error %d", inode->i_ino, err); + ubifs_err(c, "inode %lu validation failed, error %d", inode->i_ino, err); ubifs_dump_node(c, ino); ubifs_dump_inode(c, inode); err = -EINVAL; out_ino: kfree(ino); out: - ubifs_err("failed to read inode %lu, error %d", inode->i_ino, err); + ubifs_err(c, "failed to read inode %lu, error %d", inode->i_ino, err); iget_failed(inode); return ERR_PTR(err); } @@ -319,7 +319,7 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc) if (inode->i_nlink) { err = ubifs_jnl_write_inode(c, inode); if (err) - ubifs_err("can't write inode %lu, error %d", + ubifs_err(c, "can't write inode %lu, error %d", inode->i_ino, err); else err = dbg_check_inode_size(c, inode, ui->ui_size); @@ -363,7 +363,7 @@ static void ubifs_evict_inode(struct inode *inode) * Worst case we have a lost orphan inode wasting space, so a * simple error message is OK here. */ - ubifs_err("can't delete inode %lu, error %d", + ubifs_err(c, "can't delete inode %lu, error %d", inode->i_ino, err); out: @@ -492,17 +492,17 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) static int init_constants_early(struct ubifs_info *c) { if (c->vi.corrupted) { - ubifs_warn("UBI volume is corrupted - read-only mode"); + ubifs_warn(c, "UBI volume is corrupted - read-only mode"); c->ro_media = 1; } if (c->di.ro_mode) { - ubifs_msg("read-only UBI device"); + ubifs_msg(c, "read-only UBI device"); c->ro_media = 1; } if (c->vi.vol_type == UBI_STATIC_VOLUME) { - ubifs_msg("static UBI volume - read-only mode"); + ubifs_msg(c, "static UBI volume - read-only mode"); c->ro_media = 1; } @@ -516,19 +516,19 @@ static int init_constants_early(struct ubifs_info *c) c->max_write_shift = fls(c->max_write_size) - 1; if (c->leb_size < UBIFS_MIN_LEB_SZ) { - ubifs_err("too small LEBs (%d bytes), min. is %d bytes", + ubifs_err(c, "too small LEBs (%d bytes), min. is %d bytes", c->leb_size, UBIFS_MIN_LEB_SZ); return -EINVAL; } if (c->leb_cnt < UBIFS_MIN_LEB_CNT) { - ubifs_err("too few LEBs (%d), min. is %d", + ubifs_err(c, "too few LEBs (%d), min. is %d", c->leb_cnt, UBIFS_MIN_LEB_CNT); return -EINVAL; } if (!is_power_of_2(c->min_io_size)) { - ubifs_err("bad min. I/O size %d", c->min_io_size); + ubifs_err(c, "bad min. I/O size %d", c->min_io_size); return -EINVAL; } @@ -539,7 +539,7 @@ static int init_constants_early(struct ubifs_info *c) if (c->max_write_size < c->min_io_size || c->max_write_size % c->min_io_size || !is_power_of_2(c->max_write_size)) { - ubifs_err("bad write buffer size %d for %d min. I/O unit", + ubifs_err(c, "bad write buffer size %d for %d min. I/O unit", c->max_write_size, c->min_io_size); return -EINVAL; } @@ -665,7 +665,7 @@ static int init_constants_sb(struct ubifs_info *c) tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt; tmp = ALIGN(tmp, c->min_io_size); if (tmp > c->leb_size) { - ubifs_err("too small LEB size %d, at least %d needed", + ubifs_err(c, "too small LEB size %d, at least %d needed", c->leb_size, tmp); return -EINVAL; } @@ -680,7 +680,7 @@ static int init_constants_sb(struct ubifs_info *c) tmp /= c->leb_size; tmp += 1; if (c->log_lebs < tmp) { - ubifs_err("too small log %d LEBs, required min. %d LEBs", + ubifs_err(c, "too small log %d LEBs, required min. %d LEBs", c->log_lebs, tmp); return -EINVAL; } @@ -772,7 +772,7 @@ static int take_gc_lnum(struct ubifs_info *c) int err; if (c->gc_lnum == -1) { - ubifs_err("no LEB for GC"); + ubifs_err(c, "no LEB for GC"); return -EINVAL; } @@ -857,7 +857,7 @@ static void free_orphans(struct ubifs_info *c) orph = list_entry(c->orph_list.next, struct ubifs_orphan, list); list_del(&orph->list); kfree(orph); - ubifs_err("orphan list not empty at unmount"); + ubifs_err(c, "orphan list not empty at unmount"); } vfree(c->orph_buf); @@ -954,7 +954,8 @@ static const match_table_t tokens = { */ static int parse_standard_option(const char *option) { - ubifs_msg("parse %s", option); + + pr_notice("UBIFS: parse %s\n", option); if (!strcmp(option, "sync")) return MS_SYNCHRONOUS; return 0; @@ -1026,7 +1027,7 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, else if (!strcmp(name, "zlib")) c->mount_opts.compr_type = UBIFS_COMPR_ZLIB; else { - ubifs_err("unknown compressor \"%s\"", name); + ubifs_err(c, "unknown compressor \"%s\"", name); //FIXME: is c ready? kfree(name); return -EINVAL; } @@ -1042,7 +1043,7 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, flag = parse_standard_option(p); if (!flag) { - ubifs_err("unrecognized mount option \"%s\" or missing value", + ubifs_err(c, "unrecognized mount option \"%s\" or missing value", p); return -EINVAL; } @@ -1105,7 +1106,7 @@ again: } /* Just disable bulk-read */ - ubifs_warn("cannot allocate %d bytes of memory for bulk-read, disabling it", + ubifs_warn(c, "cannot allocate %d bytes of memory for bulk-read, disabling it", c->max_bu_buf_len); c->mount_opts.bulk_read = 1; c->bulk_read = 0; @@ -1124,7 +1125,7 @@ static int check_free_space(struct ubifs_info *c) { ubifs_assert(c->dark_wm > 0); if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { - ubifs_err("insufficient free space to mount in R/W mode"); + ubifs_err(c, "insufficient free space to mount in R/W mode"); ubifs_dump_budg(c, &c->bi); ubifs_dump_lprops(c); return -ENOSPC; @@ -1166,14 +1167,14 @@ static int mount_ubifs(struct ubifs_info *c) * This UBI volume is empty, and read-only, or the file system * is mounted read-only - we cannot format it. */ - ubifs_err("can't format empty UBI volume: read-only %s", + ubifs_err(c, "can't format empty UBI volume: read-only %s", c->ro_media ? "UBI volume" : "mount"); err = -EROFS; goto out_free; } if (c->ro_media && !c->ro_mount) { - ubifs_err("cannot mount read-write - read-only media"); + ubifs_err(c, "cannot mount read-write - read-only media"); err = -EROFS; goto out_free; } @@ -1221,7 +1222,7 @@ static int mount_ubifs(struct ubifs_info *c) * or overridden by mount options is actually compiled in. */ if (!ubifs_compr_present(c->default_compr)) { - ubifs_err("'compressor \"%s\" is not compiled in", + ubifs_err(c, "'compressor \"%s\" is not compiled in", ubifs_compr_name(c->default_compr)); err = -ENOTSUPP; goto out_free; @@ -1250,7 +1251,7 @@ static int mount_ubifs(struct ubifs_info *c) if (IS_ERR(c->bgt)) { err = PTR_ERR(c->bgt); c->bgt = NULL; - ubifs_err("cannot spawn \"%s\", error %d", + ubifs_err(c, "cannot spawn \"%s\", error %d", c->bgt_name, err); goto out_wbufs; } @@ -1264,7 +1265,7 @@ static int mount_ubifs(struct ubifs_info *c) init_constants_master(c); if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { - ubifs_msg("recovery needed"); + ubifs_msg(c, "recovery needed"); c->need_recovery = 1; } @@ -1284,7 +1285,7 @@ static int mount_ubifs(struct ubifs_info *c) goto out_lpt; } - if (!c->ro_mount) { + if (!c->ro_mount && !c->need_recovery) { /* * Set the "dirty" flag so that if we reboot uncleanly we * will notice this immediately on the next mount. @@ -1373,10 +1374,10 @@ static int mount_ubifs(struct ubifs_info *c) if (c->need_recovery) { if (c->ro_mount) - ubifs_msg("recovery deferred"); + ubifs_msg(c, "recovery deferred"); else { c->need_recovery = 0; - ubifs_msg("recovery completed"); + ubifs_msg(c, "recovery completed"); /* * GC LEB has to be empty and taken at this point. But * the journal head LEBs may also be accounted as @@ -1397,20 +1398,20 @@ static int mount_ubifs(struct ubifs_info *c) c->mounting = 0; - ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s", + ubifs_msg(c, "UBIFS: mounted UBI device %d, volume %d, name \"%s\"%s", c->vi.ubi_num, c->vi.vol_id, c->vi.name, c->ro_mount ? ", R/O mode" : ""); x = (long long)c->main_lebs * c->leb_size; y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; - ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes", + ubifs_msg(c, "LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes", c->leb_size, c->leb_size >> 10, c->min_io_size, c->max_write_size); - ubifs_msg("FS size: %lld bytes (%lld MiB, %d LEBs), journal size %lld bytes (%lld MiB, %d LEBs)", + ubifs_msg(c, "FS size: %lld bytes (%lld MiB, %d LEBs), journal size %lld bytes (%lld MiB, %d LEBs)", x, x >> 20, c->main_lebs, y, y >> 20, c->log_lebs + c->max_bud_cnt); - ubifs_msg("reserved for root: %llu bytes (%llu KiB)", + ubifs_msg(c, "reserved for root: %llu bytes (%llu KiB)", c->report_rp_size, c->report_rp_size >> 10); - ubifs_msg("media format: w%d/r%d (latest is w%d/r%d), UUID %pUB%s", + ubifs_msg(c, "media format: w%d/r%d (latest is w%d/r%d), UUID %pUB%s", c->fmt_version, c->ro_compat_version, UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION, c->uuid, c->big_lpt ? ", big LPT model" : ", small LPT model"); @@ -1543,8 +1544,8 @@ static int ubifs_remount_rw(struct ubifs_info *c) int err, lnum; if (c->rw_incompat) { - ubifs_err("the file-system is not R/W-compatible"); - ubifs_msg("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d", + ubifs_err(c, "the file-system is not R/W-compatible"); + ubifs_msg(c, "on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d", c->fmt_version, c->ro_compat_version, UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); return -EROFS; @@ -1581,7 +1582,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) } if (c->need_recovery) { - ubifs_msg("completing deferred recovery"); + ubifs_msg(c, "completing deferred recovery"); err = ubifs_write_rcvrd_mst_node(c); if (err) goto out; @@ -1630,7 +1631,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) if (IS_ERR(c->bgt)) { err = PTR_ERR(c->bgt); c->bgt = NULL; - ubifs_err("cannot spawn \"%s\", error %d", + ubifs_err(c, "cannot spawn \"%s\", error %d", c->bgt_name, err); goto out; } @@ -1664,7 +1665,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) if (c->need_recovery) { c->need_recovery = 0; - ubifs_msg("deferred recovery completed"); + ubifs_msg(c, "deferred recovery completed"); } else { /* * Do not run the debugging space check if the were doing @@ -1752,8 +1753,7 @@ static void ubifs_put_super(struct super_block *sb) int i; struct ubifs_info *c = sb->s_fs_info; - ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, - c->vi.vol_id); + ubifs_msg(c, "un-mount UBI device %d", c->vi.ubi_num); /* * The following asserts are only valid if there has not been a failure @@ -1809,7 +1809,7 @@ static void ubifs_put_super(struct super_block *sb) * next mount, so we just print a message and * continue to unmount normally. */ - ubifs_err("failed to write master node, error %d", + ubifs_err(c, "failed to write master node, error %d", err); } else { for (i = 0; i < c->jhead_cnt; i++) @@ -1834,17 +1834,17 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) err = ubifs_parse_options(c, data, 1); if (err) { - ubifs_err("invalid or unknown remount parameter"); + ubifs_err(c, "invalid or unknown remount parameter"); return err; } if (c->ro_mount && !(*flags & MS_RDONLY)) { if (c->ro_error) { - ubifs_msg("cannot re-mount R/W due to prior errors"); + ubifs_msg(c, "cannot re-mount R/W due to prior errors"); return -EROFS; } if (c->ro_media) { - ubifs_msg("cannot re-mount R/W - UBI volume is R/O"); + ubifs_msg(c, "cannot re-mount R/W - UBI volume is R/O"); return -EROFS; } err = ubifs_remount_rw(c); @@ -1852,7 +1852,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) return err; } else if (!c->ro_mount && (*flags & MS_RDONLY)) { if (c->ro_error) { - ubifs_msg("cannot re-mount R/O due to prior errors"); + ubifs_msg(c, "cannot re-mount R/O due to prior errors"); return -EROFS; } ubifs_remount_ro(c); @@ -2104,8 +2104,8 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, */ ubi = open_ubi(name, UBI_READONLY); if (IS_ERR(ubi)) { - ubifs_err("cannot open \"%s\", error %d", - name, (int)PTR_ERR(ubi)); + pr_err("UBIFS error (pid: %d): cannot open \"%s\", error %d", + current->pid, name, (int)PTR_ERR(ubi)); return ERR_CAST(ubi); } @@ -2233,8 +2233,8 @@ static int __init ubifs_init(void) * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. */ if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) { - ubifs_err("VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes", - (unsigned int)PAGE_CACHE_SIZE); + pr_err("UBIFS error (pid %d): VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes", + current->pid, (unsigned int)PAGE_CACHE_SIZE); return -EINVAL; } @@ -2257,7 +2257,8 @@ static int __init ubifs_init(void) err = register_filesystem(&ubifs_fs_type); if (err) { - ubifs_err("cannot register file system, error %d", err); + pr_err("UBIFS error (pid %d): cannot register file system, error %d", + current->pid, err); goto out_dbg; } return 0; diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 6793db0754f6..957f5757f374 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -98,7 +98,7 @@ static int insert_old_idx(struct ubifs_info *c, int lnum, int offs) else if (offs > o->offs) p = &(*p)->rb_right; else { - ubifs_err("old idx added twice!"); + ubifs_err(c, "old idx added twice!"); kfree(old_idx); return 0; } @@ -447,7 +447,7 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, err = ubifs_leb_read(c, lnum, buf, offs, len, 1); if (err) { - ubifs_err("cannot read node type %d from LEB %d:%d, error %d", + ubifs_err(c, "cannot read node type %d from LEB %d:%d, error %d", type, lnum, offs, err); return err; } @@ -1684,27 +1684,27 @@ static int validate_data_node(struct ubifs_info *c, void *buf, int err, len; if (ch->node_type != UBIFS_DATA_NODE) { - ubifs_err("bad node type (%d but expected %d)", + ubifs_err(c, "bad node type (%d but expected %d)", ch->node_type, UBIFS_DATA_NODE); goto out_err; } err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0); if (err) { - ubifs_err("expected node type %d", UBIFS_DATA_NODE); + ubifs_err(c, "expected node type %d", UBIFS_DATA_NODE); goto out; } len = le32_to_cpu(ch->len); if (len != zbr->len) { - ubifs_err("bad node length %d, expected %d", len, zbr->len); + ubifs_err(c, "bad node length %d, expected %d", len, zbr->len); goto out_err; } /* Make sure the key of the read node is correct */ key_read(c, buf + UBIFS_KEY_OFFSET, &key1); if (!keys_eq(c, &zbr->key, &key1)) { - ubifs_err("bad key in node at LEB %d:%d", + ubifs_err(c, "bad key in node at LEB %d:%d", zbr->lnum, zbr->offs); dbg_tnck(&zbr->key, "looked for key "); dbg_tnck(&key1, "found node's key "); @@ -1716,7 +1716,7 @@ static int validate_data_node(struct ubifs_info *c, void *buf, out_err: err = -EINVAL; out: - ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs); + ubifs_err(c, "bad node at LEB %d:%d", zbr->lnum, zbr->offs); ubifs_dump_node(c, buf); dump_stack(); return err; @@ -1741,7 +1741,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) len = bu->zbranch[bu->cnt - 1].offs; len += bu->zbranch[bu->cnt - 1].len - offs; if (len > bu->buf_len) { - ubifs_err("buffer too small %d vs %d", bu->buf_len, len); + ubifs_err(c, "buffer too small %d vs %d", bu->buf_len, len); return -EINVAL; } @@ -1757,7 +1757,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) return -EAGAIN; if (err && err != -EBADMSG) { - ubifs_err("failed to read from LEB %d:%d, error %d", + ubifs_err(c, "failed to read from LEB %d:%d, error %d", lnum, offs, err); dump_stack(); dbg_tnck(&bu->key, "key "); @@ -3313,7 +3313,7 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, out_dump: block = key_block(c, key); - ubifs_err("inode %lu has size %lld, but there are data at offset %lld", + ubifs_err(c, "inode %lu has size %lld, but there are data at offset %lld", (unsigned long)inode->i_ino, size, ((loff_t)block) << UBIFS_BLOCK_SHIFT); mutex_unlock(&c->tnc_mutex); diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 7a205e046776..b45345d701e7 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -53,7 +53,7 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx, br->offs = cpu_to_le32(zbr->offs); br->len = cpu_to_le32(zbr->len); if (!zbr->lnum || !zbr->len) { - ubifs_err("bad ref in znode"); + ubifs_err(c, "bad ref in znode"); ubifs_dump_znode(c, znode); if (zbr->znode) ubifs_dump_znode(c, zbr->znode); @@ -384,7 +384,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) * Do not print scary warnings if the debugging * option which forces in-the-gaps is enabled. */ - ubifs_warn("out of space"); + ubifs_warn(c, "out of space"); ubifs_dump_budg(c, &c->bi); ubifs_dump_lprops(c); } @@ -441,7 +441,7 @@ static int layout_in_empty_space(struct ubifs_info *c) /* Determine the index node position */ if (lnum == -1) { if (c->ileb_nxt >= c->ileb_cnt) { - ubifs_err("out of space"); + ubifs_err(c, "out of space"); return -ENOSPC; } lnum = c->ilebs[c->ileb_nxt++]; @@ -855,7 +855,7 @@ static int write_index(struct ubifs_info *c) br->offs = cpu_to_le32(zbr->offs); br->len = cpu_to_le32(zbr->len); if (!zbr->lnum || !zbr->len) { - ubifs_err("bad ref in znode"); + ubifs_err(c, "bad ref in znode"); ubifs_dump_znode(c, znode); if (zbr->znode) ubifs_dump_znode(c, zbr->znode); @@ -875,7 +875,7 @@ static int write_index(struct ubifs_info *c) if (lnum != znode->lnum || offs != znode->offs || len != znode->len) { - ubifs_err("inconsistent znode posn"); + ubifs_err(c, "inconsistent znode posn"); return -EINVAL; } @@ -973,7 +973,7 @@ static int write_index(struct ubifs_info *c) if (lnum != c->dbg->new_ihead_lnum || buf_offs != c->dbg->new_ihead_offs) { - ubifs_err("inconsistent ihead"); + ubifs_err(c, "inconsistent ihead"); return -EINVAL; } diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c index f6bf8995c7b1..93f5b7859e6f 100644 --- a/fs/ubifs/tnc_misc.c +++ b/fs/ubifs/tnc_misc.c @@ -293,9 +293,9 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, lnum, offs, znode->level, znode->child_cnt); if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) { - ubifs_err("current fanout %d, branch count %d", + ubifs_err(c, "current fanout %d, branch count %d", c->fanout, znode->child_cnt); - ubifs_err("max levels %d, znode level %d", + ubifs_err(c, "max levels %d, znode level %d", UBIFS_MAX_LEVELS, znode->level); err = 1; goto out_dump; @@ -316,7 +316,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, if (zbr->lnum < c->main_first || zbr->lnum >= c->leb_cnt || zbr->offs < 0 || zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) { - ubifs_err("bad branch %d", i); + ubifs_err(c, "bad branch %d", i); err = 2; goto out_dump; } @@ -328,7 +328,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, case UBIFS_XENT_KEY: break; default: - ubifs_err("bad key type at slot %d: %d", + ubifs_err(c, "bad key type at slot %d: %d", i, key_type(c, &zbr->key)); err = 3; goto out_dump; @@ -340,17 +340,17 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, type = key_type(c, &zbr->key); if (c->ranges[type].max_len == 0) { if (zbr->len != c->ranges[type].len) { - ubifs_err("bad target node (type %d) length (%d)", + ubifs_err(c, "bad target node (type %d) length (%d)", type, zbr->len); - ubifs_err("have to be %d", c->ranges[type].len); + ubifs_err(c, "have to be %d", c->ranges[type].len); err = 4; goto out_dump; } } else if (zbr->len < c->ranges[type].min_len || zbr->len > c->ranges[type].max_len) { - ubifs_err("bad target node (type %d) length (%d)", + ubifs_err(c, "bad target node (type %d) length (%d)", type, zbr->len); - ubifs_err("have to be in range of %d-%d", + ubifs_err(c, "have to be in range of %d-%d", c->ranges[type].min_len, c->ranges[type].max_len); err = 5; @@ -370,12 +370,12 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, cmp = keys_cmp(c, key1, key2); if (cmp > 0) { - ubifs_err("bad key order (keys %d and %d)", i, i + 1); + ubifs_err(c, "bad key order (keys %d and %d)", i, i + 1); err = 6; goto out_dump; } else if (cmp == 0 && !is_hash_key(c, key1)) { /* These can only be keys with colliding hash */ - ubifs_err("keys %d and %d are not hashed but equivalent", + ubifs_err(c, "keys %d and %d are not hashed but equivalent", i, i + 1); err = 7; goto out_dump; @@ -386,7 +386,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, return 0; out_dump: - ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err); + ubifs_err(c, "bad indexing node at LEB %d:%d, error %d", lnum, offs, err); ubifs_dump_node(c, idx); kfree(idx); return -EINVAL; @@ -482,7 +482,7 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, /* Make sure the key of the read node is correct */ key_read(c, node + UBIFS_KEY_OFFSET, &key1); if (!keys_eq(c, key, &key1)) { - ubifs_err("bad key in node at LEB %d:%d", + ubifs_err(c, "bad key in node at LEB %d:%d", zbr->lnum, zbr->offs); dbg_tnck(key, "looked for key "); dbg_tnck(&key1, "but found node's key "); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index bc04b9c69891..de759022f3d6 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -43,15 +43,19 @@ #define UBIFS_VERSION 1 /* Normal UBIFS messages */ -#define ubifs_msg(fmt, ...) pr_notice("UBIFS: " fmt "\n", ##__VA_ARGS__) +#define ubifs_msg(c, fmt, ...) \ + pr_notice("UBIFS (ubi%d:%d): " fmt "\n", \ + (c)->vi.ubi_num, (c)->vi.vol_id, ##__VA_ARGS__) /* UBIFS error messages */ -#define ubifs_err(fmt, ...) \ - pr_err("UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ +#define ubifs_err(c, fmt, ...) \ + pr_err("UBIFS error (ubi%d:%d pid %d): %s: " fmt "\n", \ + (c)->vi.ubi_num, (c)->vi.vol_id, current->pid, \ __func__, ##__VA_ARGS__) /* UBIFS warning messages */ -#define ubifs_warn(fmt, ...) \ - pr_warn("UBIFS warning (pid %d): %s: " fmt "\n", \ - current->pid, __func__, ##__VA_ARGS__) +#define ubifs_warn(c, fmt, ...) \ + pr_warn("UBIFS warning (ubi%d:%d pid %d): %s: " fmt "\n", \ + (c)->vi.ubi_num, (c)->vi.vol_id, current->pid, \ + __func__, ##__VA_ARGS__) /* * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description * object as an argument. @@ -59,7 +63,7 @@ #define ubifs_errc(c, fmt, ...) \ do { \ if (!(c)->probing) \ - ubifs_err(fmt, ##__VA_ARGS__); \ + ubifs_err(c, fmt, ##__VA_ARGS__); \ } while (0) /* UBIFS file system VFS magic number */ @@ -158,7 +162,7 @@ #define WORST_COMPR_FACTOR 2 /* - * How much memory is needed for a buffer where we comress a data node. + * How much memory is needed for a buffer where we compress a data node. */ #define COMPRESSED_DATA_NODE_BUF_SZ \ (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR) @@ -664,7 +668,7 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes * fields * @softlimit: soft write-buffer timeout interval - * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit + * @delta: hard and soft timeouts delta (the timer expire interval is @softlimit * and @softlimit + @delta) * @timer: write-buffer timer * @no_timer: non-zero if this write-buffer does not have a timer @@ -930,9 +934,9 @@ struct ubifs_orphan { /** * struct ubifs_mount_opts - UBIFS-specific mount options information. * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) - * @bulk_read: enable/disable bulk-reads (%0 default, %1 disabe, %2 enable) + * @bulk_read: enable/disable bulk-reads (%0 default, %1 disable, %2 enable) * @chk_data_crc: enable/disable CRC data checking when reading data nodes - * (%0 default, %1 disabe, %2 enable) + * (%0 default, %1 disable, %2 enable) * @override_compr: override default compressor (%0 - do not override and use * superblock compressor, %1 - override and use compressor * specified in @compr_type) @@ -962,9 +966,9 @@ struct ubifs_mount_opts { * optimization) * @nospace_rp: the same as @nospace, but additionally means that even reserved * pool is full - * @page_budget: budget for a page (constant, nenver changed after mount) - * @inode_budget: budget for an inode (constant, nenver changed after mount) - * @dent_budget: budget for a directory entry (constant, nenver changed after + * @page_budget: budget for a page (constant, never changed after mount) + * @inode_budget: budget for an inode (constant, never changed after mount) + * @dent_budget: budget for a directory entry (constant, never changed after * mount) */ struct ubifs_budg_info { @@ -1787,10 +1791,10 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); /* compressor.c */ int __init ubifs_compressors_init(void); void ubifs_compressors_exit(void); -void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, - int *compr_type); -int ubifs_decompress(const void *buf, int len, void *out, int *out_len, - int compr_type); +void ubifs_compress(const struct ubifs_info *c, const void *in_buf, int in_len, + void *out_buf, int *out_len, int *compr_type); +int ubifs_decompress(const struct ubifs_info *c, const void *buf, int len, + void *out, int *out_len, int compr_type); #include "debug.h" #include "misc.h" diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index a92be244a6fb..3659b1934500 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -108,7 +108,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, .dirtied_ino_d = ALIGN(host_ui->data_len, 8) }; if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) { - ubifs_err("inode %lu already has too many xattrs (%d), cannot create more", + ubifs_err(c, "inode %lu already has too many xattrs (%d), cannot create more", host->i_ino, host_ui->xattr_cnt); return -ENOSPC; } @@ -120,7 +120,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, */ names_len = host_ui->xattr_names + host_ui->xattr_cnt + nm->len + 1; if (names_len > XATTR_LIST_MAX) { - ubifs_err("cannot add one more xattr name to inode %lu, total names length would become %d, max. is %d", + ubifs_err(c, "cannot add one more xattr name to inode %lu, total names length would become %d, max. is %d", host->i_ino, names_len, XATTR_LIST_MAX); return -ENOSPC; } @@ -288,13 +288,13 @@ static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum) inode = ubifs_iget(c->vfs_sb, inum); if (IS_ERR(inode)) { - ubifs_err("dead extended attribute entry, error %d", + ubifs_err(c, "dead extended attribute entry, error %d", (int)PTR_ERR(inode)); return inode; } if (ubifs_inode(inode)->xattr) return inode; - ubifs_err("corrupt extended attribute entry"); + ubifs_err(c, "corrupt extended attribute entry"); iput(inode); return ERR_PTR(-EINVAL); } @@ -412,7 +412,7 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, if (buf) { /* If @buf is %NULL we are supposed to return the length */ if (ui->data_len > size) { - ubifs_err("buffer size %zd, xattr len %d", + ubifs_err(c, "buffer size %zd, xattr len %d", size, ui->data_len); err = -ERANGE; goto out_iput; @@ -485,7 +485,7 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) kfree(pxent); if (err != -ENOENT) { - ubifs_err("cannot find next direntry, error %d", err); + ubifs_err(c, "cannot find next direntry, error %d", err); return err; } @@ -657,8 +657,10 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode, &init_xattrs, 0); mutex_unlock(&inode->i_mutex); - if (err) - ubifs_err("cannot initialize security for inode %lu, error %d", + if (err) { + struct ubifs_info *c = dentry->i_sb->s_fs_info; + ubifs_err(c, "cannot initialize security for inode %lu, error %d", inode->i_ino, err); + } return err; } diff --git a/fs/udf/file.c b/fs/udf/file.c index dda8ea7012c6..f77f7681288f 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -33,7 +33,7 @@ #include <linux/capability.h> #include <linux/errno.h> #include <linux/pagemap.h> -#include <linux/aio.h> +#include <linux/uio.h> #include "udf_i.h" #include "udf_sb.h" @@ -121,7 +121,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); int err, pos; - size_t count = iocb->ki_nbytes; + size_t count = iov_iter_count(from); struct udf_inode_info *iinfo = UDF_I(inode); mutex_lock(&inode->i_mutex); @@ -239,12 +239,10 @@ static int udf_release_file(struct inode *inode, struct file *filp) } const struct file_operations udf_file_operations = { - .read = new_sync_read, .read_iter = generic_file_read_iter, .unlocked_ioctl = udf_ioctl, .open = generic_file_open, .mmap = generic_file_mmap, - .write = new_sync_write, .write_iter = udf_file_write_iter, .release = udf_release_file, .fsync = generic_file_fsync, diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 52577a8e4179..9e3d780e5eff 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -37,7 +37,7 @@ #include <linux/slab.h> #include <linux/crc-itu-t.h> #include <linux/mpage.h> -#include <linux/aio.h> +#include <linux/uio.h> #include "udf_i.h" #include "udf_sb.h" diff --git a/fs/ufs/file.c b/fs/ufs/file.c index c84ec010a676..042ddbf110cc 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -35,9 +35,7 @@ const struct file_operations ufs_file_operations = { .llseek = generic_file_llseek, - .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = new_sync_write, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .open = generic_file_open, diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 3a9b7a1b8704..4f8cdc59bc38 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -31,7 +31,6 @@ #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_bmap_btree.h" -#include <linux/aio.h> #include <linux/gfp.h> #include <linux/mpage.h> #include <linux/pagevec.h> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a2e1cb8a568b..44856c3b9617 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -38,7 +38,6 @@ #include "xfs_icache.h" #include "xfs_pnfs.h" -#include <linux/aio.h> #include <linux/dcache.h> #include <linux/falloc.h> #include <linux/pagevec.h> @@ -1387,8 +1386,6 @@ xfs_file_llseek( const struct file_operations xfs_file_operations = { .llseek = xfs_file_llseek, - .read = new_sync_read, - .write = new_sync_write, .read_iter = xfs_file_read_iter, .write_iter = xfs_file_write_iter, .splice_read = xfs_file_splice_read, |