summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/autofs/dirhash.c34
-rw-r--r--fs/autofs4/dev-ioctl.c12
-rw-r--r--fs/btrfs/async-thread.c60
-rw-r--r--fs/btrfs/async-thread.h2
-rw-r--r--fs/btrfs/ctree.c17
-rw-r--r--fs/btrfs/disk-io.c9
-rw-r--r--fs/btrfs/extent_io.c86
-rw-r--r--fs/btrfs/file.c6
-rw-r--r--fs/btrfs/inode.c36
-rw-r--r--fs/btrfs/ioctl.c49
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/super.c13
-rw-r--r--fs/btrfs/volumes.c124
-rw-r--r--fs/btrfs/volumes.h13
-rw-r--r--fs/compat.c37
-rw-r--r--fs/compat_ioctl.c7
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/ecryptfs/miscdev.c15
-rw-r--r--fs/filesystems.c2
-rw-r--r--fs/gfs2/glops.c6
-rw-r--r--fs/gfs2/ops_file.c4
-rw-r--r--fs/hugetlbfs/inode.c3
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/ncpfs/ioctl.c21
-rw-r--r--fs/nfs/nfs3xdr.c3
-rw-r--r--fs/nfsd/nfs4recover.c46
-rw-r--r--fs/nfsd/vfs.c34
-rw-r--r--fs/stat.c137
-rw-r--r--fs/sysfs/bin.c13
-rw-r--r--fs/xattr.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c23
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c12
33 files changed, 467 insertions, 374 deletions
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index bf8c8af98004..4eb4d8dfb2f1 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -39,10 +39,12 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
{
struct autofs_dirhash *dh = &sbi->dirhash;
struct autofs_dir_ent *ent;
- struct dentry *dentry;
unsigned long timeout = sbi->exp_timeout;
while (1) {
+ struct path path;
+ int umount_ok;
+
if ( list_empty(&dh->expiry_head) || sbi->catatonic )
return NULL; /* No entries */
/* We keep the list sorted by last_usage and want old stuff */
@@ -57,17 +59,17 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
return ent; /* Symlinks are always expirable */
/* Get the dentry for the autofs subdirectory */
- dentry = ent->dentry;
+ path.dentry = ent->dentry;
- if ( !dentry ) {
+ if (!path.dentry) {
/* Should only happen in catatonic mode */
printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name);
autofs_delete_usage(ent);
continue;
}
- if ( !dentry->d_inode ) {
- dput(dentry);
+ if (!path.dentry->d_inode) {
+ dput(path.dentry);
printk("autofs: negative dentry on expiry queue: %s\n",
ent->name);
autofs_delete_usage(ent);
@@ -76,29 +78,29 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
/* Make sure entry is mounted and unused; note that dentry will
point to the mounted-on-top root. */
- if (!S_ISDIR(dentry->d_inode->i_mode)||!d_mountpoint(dentry)) {
+ if (!S_ISDIR(path.dentry->d_inode->i_mode) ||
+ !d_mountpoint(path.dentry)) {
DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
continue;
}
- mntget(mnt);
- dget(dentry);
- if (!follow_down(&mnt, &dentry)) {
- dput(dentry);
- mntput(mnt);
+ path.mnt = mnt;
+ path_get(&path);
+ if (!follow_down(&path.mnt, &path.dentry)) {
+ path_put(&path);
DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
continue;
}
- while (d_mountpoint(dentry) && follow_down(&mnt, &dentry))
+ while (d_mountpoint(path.dentry) &&
+ follow_down(&path.mnt, &path.dentry))
;
- dput(dentry);
+ umount_ok = may_umount(path.mnt);
+ path_put(&path);
- if ( may_umount(mnt) ) {
- mntput(mnt);
+ if (umount_ok) {
DPRINTK(("autofs: signaling expire on %s\n", ent->name));
return ent; /* Expirable! */
}
DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name));
- mntput(mnt);
}
return NULL; /* No expirable entries */
}
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 9e5ae8a4f5c8..84168c0dcc2d 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -54,11 +54,10 @@ static int check_name(const char *name)
* Check a string doesn't overrun the chunk of
* memory we copied from user land.
*/
-static int invalid_str(char *str, void *end)
+static int invalid_str(char *str, size_t size)
{
- while ((void *) str <= end)
- if (!*str++)
- return 0;
+ if (memchr(str, 0, size))
+ return 0;
return -EINVAL;
}
@@ -138,8 +137,7 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
}
if (param->size > sizeof(*param)) {
- err = invalid_str(param->path,
- (void *) ((size_t) param + param->size));
+ err = invalid_str(param->path, param->size - sizeof(*param));
if (err) {
AUTOFS_WARN(
"path string terminator missing for cmd(0x%08x)",
@@ -488,7 +486,7 @@ static int autofs_dev_ioctl_requester(struct file *fp,
}
path = param->path;
- devid = sbi->sb->s_dev;
+ devid = new_encode_dev(sbi->sb->s_dev);
param->requester.uid = param->requester.gid = -1;
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 51bfdfc8fcda..502c3d61de62 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -25,6 +25,7 @@
#define WORK_QUEUED_BIT 0
#define WORK_DONE_BIT 1
#define WORK_ORDER_DONE_BIT 2
+#define WORK_HIGH_PRIO_BIT 3
/*
* container for the kthread task pointer and the list of pending work
@@ -36,6 +37,7 @@ struct btrfs_worker_thread {
/* list of struct btrfs_work that are waiting for service */
struct list_head pending;
+ struct list_head prio_pending;
/* list of worker threads from struct btrfs_workers */
struct list_head worker_list;
@@ -103,10 +105,16 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
spin_lock_irqsave(&workers->lock, flags);
- while (!list_empty(&workers->order_list)) {
- work = list_entry(workers->order_list.next,
- struct btrfs_work, order_list);
-
+ while (1) {
+ if (!list_empty(&workers->prio_order_list)) {
+ work = list_entry(workers->prio_order_list.next,
+ struct btrfs_work, order_list);
+ } else if (!list_empty(&workers->order_list)) {
+ work = list_entry(workers->order_list.next,
+ struct btrfs_work, order_list);
+ } else {
+ break;
+ }
if (!test_bit(WORK_DONE_BIT, &work->flags))
break;
@@ -143,8 +151,14 @@ static int worker_loop(void *arg)
do {
spin_lock_irq(&worker->lock);
again_locked:
- while (!list_empty(&worker->pending)) {
- cur = worker->pending.next;
+ while (1) {
+ if (!list_empty(&worker->prio_pending))
+ cur = worker->prio_pending.next;
+ else if (!list_empty(&worker->pending))
+ cur = worker->pending.next;
+ else
+ break;
+
work = list_entry(cur, struct btrfs_work, list);
list_del(&work->list);
clear_bit(WORK_QUEUED_BIT, &work->flags);
@@ -163,7 +177,6 @@ again_locked:
spin_lock_irq(&worker->lock);
check_idle_worker(worker);
-
}
if (freezing(current)) {
worker->working = 0;
@@ -178,7 +191,8 @@ again_locked:
* jump_in?
*/
smp_mb();
- if (!list_empty(&worker->pending))
+ if (!list_empty(&worker->pending) ||
+ !list_empty(&worker->prio_pending))
continue;
/*
@@ -191,7 +205,8 @@ again_locked:
*/
schedule_timeout(1);
smp_mb();
- if (!list_empty(&worker->pending))
+ if (!list_empty(&worker->pending) ||
+ !list_empty(&worker->prio_pending))
continue;
if (kthread_should_stop())
@@ -200,7 +215,8 @@ again_locked:
/* still no more work?, sleep for real */
spin_lock_irq(&worker->lock);
set_current_state(TASK_INTERRUPTIBLE);
- if (!list_empty(&worker->pending))
+ if (!list_empty(&worker->pending) ||
+ !list_empty(&worker->prio_pending))
goto again_locked;
/*
@@ -248,6 +264,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
INIT_LIST_HEAD(&workers->worker_list);
INIT_LIST_HEAD(&workers->idle_list);
INIT_LIST_HEAD(&workers->order_list);
+ INIT_LIST_HEAD(&workers->prio_order_list);
spin_lock_init(&workers->lock);
workers->max_workers = max;
workers->idle_thresh = 32;
@@ -273,6 +290,7 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
}
INIT_LIST_HEAD(&worker->pending);
+ INIT_LIST_HEAD(&worker->prio_pending);
INIT_LIST_HEAD(&worker->worker_list);
spin_lock_init(&worker->lock);
atomic_set(&worker->num_pending, 0);
@@ -396,7 +414,10 @@ int btrfs_requeue_work(struct btrfs_work *work)
goto out;
spin_lock_irqsave(&worker->lock, flags);
- list_add_tail(&work->list, &worker->pending);
+ if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
+ list_add_tail(&work->list, &worker->prio_pending);
+ else
+ list_add_tail(&work->list, &worker->pending);
atomic_inc(&worker->num_pending);
/* by definition we're busy, take ourselves off the idle
@@ -422,6 +443,11 @@ out:
return 0;
}
+void btrfs_set_work_high_prio(struct btrfs_work *work)
+{
+ set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
+}
+
/*
* places a struct btrfs_work into the pending queue of one of the kthreads
*/
@@ -438,7 +464,12 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
worker = find_worker(workers);
if (workers->ordered) {
spin_lock_irqsave(&workers->lock, flags);
- list_add_tail(&work->order_list, &workers->order_list);
+ if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
+ list_add_tail(&work->order_list,
+ &workers->prio_order_list);
+ } else {
+ list_add_tail(&work->order_list, &workers->order_list);
+ }
spin_unlock_irqrestore(&workers->lock, flags);
} else {
INIT_LIST_HEAD(&work->order_list);
@@ -446,7 +477,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
spin_lock_irqsave(&worker->lock, flags);
- list_add_tail(&work->list, &worker->pending);
+ if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
+ list_add_tail(&work->list, &worker->prio_pending);
+ else
+ list_add_tail(&work->list, &worker->pending);
atomic_inc(&worker->num_pending);
check_busy_worker(worker);
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 31be4ed8b63e..1b511c109db6 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -85,6 +85,7 @@ struct btrfs_workers {
* of work items waiting for completion
*/
struct list_head order_list;
+ struct list_head prio_order_list;
/* lock for finding the next worker thread to queue on */
spinlock_t lock;
@@ -98,4 +99,5 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers);
int btrfs_stop_workers(struct btrfs_workers *workers);
void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max);
int btrfs_requeue_work(struct btrfs_work *work);
+void btrfs_set_work_high_prio(struct btrfs_work *work);
#endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index e5b2533b691a..a99f1c2a710d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1325,12 +1325,12 @@ static noinline int reada_for_balance(struct btrfs_root *root,
int ret = 0;
int blocksize;
- parent = path->nodes[level - 1];
+ parent = path->nodes[level + 1];
if (!parent)
return 0;
nritems = btrfs_header_nritems(parent);
- slot = path->slots[level];
+ slot = path->slots[level + 1];
blocksize = btrfs_level_size(root, level);
if (slot > 0) {
@@ -1341,7 +1341,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
block1 = 0;
free_extent_buffer(eb);
}
- if (slot < nritems) {
+ if (slot + 1 < nritems) {
block2 = btrfs_node_blockptr(parent, slot + 1);
gen = btrfs_node_ptr_generation(parent, slot + 1);
eb = btrfs_find_tree_block(root, block2, blocksize);
@@ -1351,7 +1351,11 @@ static noinline int reada_for_balance(struct btrfs_root *root,
}
if (block1 || block2) {
ret = -EAGAIN;
+
+ /* release the whole path */
btrfs_release_path(root, path);
+
+ /* read the blocks */
if (block1)
readahead_tree_block(root, block1, blocksize, 0);
if (block2)
@@ -1361,7 +1365,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
eb = read_tree_block(root, block1, blocksize, 0);
free_extent_buffer(eb);
}
- if (block1) {
+ if (block2) {
eb = read_tree_block(root, block2, blocksize, 0);
free_extent_buffer(eb);
}
@@ -1481,12 +1485,15 @@ read_block_for_search(struct btrfs_trans_handle *trans,
* of the btree by dropping locks before
* we read.
*/
- btrfs_release_path(NULL, p);
+ btrfs_unlock_up_safe(p, level + 1);
+ btrfs_set_path_blocking(p);
+
if (tmp)
free_extent_buffer(tmp);
if (p->reada)
reada_for_search(root, p, level, slot, key->objectid);
+ btrfs_release_path(NULL, p);
tmp = read_tree_block(root, blocknr, blocksize, gen);
if (tmp)
free_extent_buffer(tmp);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 92caa8035f36..a6b83744b05d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -579,6 +579,10 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
async->bio_flags = bio_flags;
atomic_inc(&fs_info->nr_async_submits);
+
+ if (rw & (1 << BIO_RW_SYNCIO))
+ btrfs_set_work_high_prio(&async->work);
+
btrfs_queue_worker(&fs_info->workers, &async->work);
#if 0
int limit = btrfs_async_submit_limit(fs_info);
@@ -656,6 +660,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
mirror_num, 0);
}
+
/*
* kthread helpers are used to submit writes so that checksumming
* can happen in parallel across all CPUs
@@ -2095,10 +2100,10 @@ static int write_dev_supers(struct btrfs_device *device,
device->barriers = 0;
get_bh(bh);
lock_buffer(bh);
- ret = submit_bh(WRITE, bh);
+ ret = submit_bh(WRITE_SYNC, bh);
}
} else {
- ret = submit_bh(WRITE, bh);
+ ret = submit_bh(WRITE_SYNC, bh);
}
if (!ret && wait) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index eb2bee8b7fbf..05a1c42e25bf 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -50,7 +50,10 @@ struct extent_page_data {
/* tells writepage not to lock the state bits for this range
* it still does the unlocking
*/
- int extent_locked;
+ unsigned int extent_locked:1;
+
+ /* tells the submit_bio code to use a WRITE_SYNC */
+ unsigned int sync_io:1;
};
int __init extent_io_init(void)
@@ -2101,6 +2104,16 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
return ret;
}
+static noinline void update_nr_written(struct page *page,
+ struct writeback_control *wbc,
+ unsigned long nr_written)
+{
+ wbc->nr_to_write -= nr_written;
+ if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
+ wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
+ page->mapping->writeback_index = page->index + nr_written;
+}
+
/*
* the writepage semantics are similar to regular writepage. extent
* records are inserted to lock ranges in the tree, and as dirty areas
@@ -2136,8 +2149,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
u64 delalloc_end;
int page_started;
int compressed;
+ int write_flags;
unsigned long nr_written = 0;
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ write_flags = WRITE_SYNC_PLUG;
+ else
+ write_flags = WRITE;
+
WARN_ON(!PageLocked(page));
pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
if (page->index > end_index ||
@@ -2164,6 +2183,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
delalloc_end = 0;
page_started = 0;
if (!epd->extent_locked) {
+ /*
+ * make sure the wbc mapping index is at least updated
+ * to this page.
+ */
+ update_nr_written(page, wbc, 0);
+
while (delalloc_end < page_end) {
nr_delalloc = find_lock_delalloc_range(inode, tree,
page,
@@ -2185,7 +2210,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
*/
if (page_started) {
ret = 0;
- goto update_nr_written;
+ /*
+ * we've unlocked the page, so we can't update
+ * the mapping's writeback index, just update
+ * nr_to_write.
+ */
+ wbc->nr_to_write -= nr_written;
+ goto done_unlocked;
}
}
lock_extent(tree, start, page_end, GFP_NOFS);
@@ -2198,13 +2229,18 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (ret == -EAGAIN) {
unlock_extent(tree, start, page_end, GFP_NOFS);
redirty_page_for_writepage(wbc, page);
+ update_nr_written(page, wbc, nr_written);
unlock_page(page);
ret = 0;
- goto update_nr_written;
+ goto done_unlocked;
}
}
- nr_written++;
+ /*
+ * we don't want to touch the inode after unlocking the page,
+ * so we update the mapping writeback index now
+ */
+ update_nr_written(page, wbc, nr_written + 1);
end = page_end;
if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
@@ -2314,9 +2350,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
(unsigned long long)end);
}
- ret = submit_extent_page(WRITE, tree, page, sector,
- iosize, pg_offset, bdev,
- &epd->bio, max_nr,
+ ret = submit_extent_page(write_flags, tree, page,
+ sector, iosize, pg_offset,
+ bdev, &epd->bio, max_nr,
end_bio_extent_writepage,
0, 0, 0);
if (ret)
@@ -2336,11 +2372,8 @@ done:
unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
unlock_page(page);
-update_nr_written:
- wbc->nr_to_write -= nr_written;
- if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
- wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
- page->mapping->writeback_index = page->index + nr_written;
+done_unlocked:
+
return 0;
}
@@ -2460,15 +2493,23 @@ retry:
return ret;
}
-static noinline void flush_write_bio(void *data)
+static void flush_epd_write_bio(struct extent_page_data *epd)
{
- struct extent_page_data *epd = data;
if (epd->bio) {
- submit_one_bio(WRITE, epd->bio, 0, 0);
+ if (epd->sync_io)
+ submit_one_bio(WRITE_SYNC, epd->bio, 0, 0);
+ else
+ submit_one_bio(WRITE, epd->bio, 0, 0);
epd->bio = NULL;
}
}
+static noinline void flush_write_bio(void *data)
+{
+ struct extent_page_data *epd = data;
+ flush_epd_write_bio(epd);
+}
+
int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent,
struct writeback_control *wbc)
@@ -2480,23 +2521,22 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.tree = tree,
.get_extent = get_extent,
.extent_locked = 0,
+ .sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
.bdi = wbc->bdi,
- .sync_mode = WB_SYNC_NONE,
+ .sync_mode = wbc->sync_mode,
.older_than_this = NULL,
.nr_to_write = 64,
.range_start = page_offset(page) + PAGE_CACHE_SIZE,
.range_end = (loff_t)-1,
};
-
ret = __extent_writepage(page, wbc, &epd);
extent_write_cache_pages(tree, mapping, &wbc_writepages,
__extent_writepage, &epd, flush_write_bio);
- if (epd.bio)
- submit_one_bio(WRITE, epd.bio, 0, 0);
+ flush_epd_write_bio(&epd);
return ret;
}
@@ -2515,6 +2555,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
.tree = tree,
.get_extent = get_extent,
.extent_locked = 1,
+ .sync_io = mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
.bdi = inode->i_mapping->backing_dev_info,
@@ -2540,8 +2581,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
start += PAGE_CACHE_SIZE;
}
- if (epd.bio)
- submit_one_bio(WRITE, epd.bio, 0, 0);
+ flush_epd_write_bio(&epd);
return ret;
}
@@ -2556,13 +2596,13 @@ int extent_writepages(struct extent_io_tree *tree,
.tree = tree,
.get_extent = get_extent,
.extent_locked = 0,
+ .sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
ret = extent_write_cache_pages(tree, mapping, wbc,
__extent_writepage, &epd,
flush_write_bio);
- if (epd.bio)
- submit_one_bio(WRITE, epd.bio, 0, 0);
+ flush_epd_write_bio(&epd);
return ret;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9c9fb46ccd08..482f8db2cfd0 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -830,7 +830,7 @@ again:
ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
BUG_ON(ret);
- goto done;
+ goto release;
} else if (split == start) {
if (locked_end < extent_end) {
ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
@@ -926,6 +926,8 @@ again:
}
done:
btrfs_mark_buffer_dirty(leaf);
+
+release:
btrfs_release_path(root, path);
if (split_end && split == start) {
split = end;
@@ -1131,7 +1133,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
if (will_write) {
btrfs_fdatawrite_range(inode->i_mapping, pos,
pos + write_bytes - 1,
- WB_SYNC_NONE);
+ WB_SYNC_ALL);
} else {
balance_dirty_pages_ratelimited_nr(inode->i_mapping,
num_pages);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a0d1dd492a58..65219f6a16a1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4970,10 +4970,10 @@ out_fail:
return err;
}
-static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
+static int prealloc_file_range(struct btrfs_trans_handle *trans,
+ struct inode *inode, u64 start, u64 end,
u64 alloc_hint, int mode)
{
- struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key ins;
u64 alloc_size;
@@ -4981,10 +4981,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
u64 num_bytes = end - start;
int ret = 0;
- trans = btrfs_join_transaction(root, 1);
- BUG_ON(!trans);
- btrfs_set_trans_block_group(trans, inode);
-
while (num_bytes > 0) {
alloc_size = min(num_bytes, root->fs_info->max_extent);
ret = btrfs_reserve_extent(trans, root, alloc_size,
@@ -5015,7 +5011,6 @@ out:
BUG_ON(ret);
}
- btrfs_end_transaction(trans, root);
return ret;
}
@@ -5029,11 +5024,18 @@ static long btrfs_fallocate(struct inode *inode, int mode,
u64 alloc_hint = 0;
u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
struct extent_map *em;
+ struct btrfs_trans_handle *trans;
int ret;
alloc_start = offset & ~mask;
alloc_end = (offset + len + mask) & ~mask;
+ /*
+ * wait for ordered IO before we have any locks. We'll loop again
+ * below with the locks held.
+ */
+ btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
+
mutex_lock(&inode->i_mutex);
if (alloc_start > inode->i_size) {
ret = btrfs_cont_expand(inode, alloc_start);
@@ -5043,6 +5045,16 @@ static long btrfs_fallocate(struct inode *inode, int mode,
while (1) {
struct btrfs_ordered_extent *ordered;
+
+ trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
+ if (!trans) {
+ ret = -EIO;
+ goto out;
+ }
+
+ /* the extent lock is ordered inside the running
+ * transaction
+ */
lock_extent(&BTRFS_I(inode)->io_tree, alloc_start,
alloc_end - 1, GFP_NOFS);
ordered = btrfs_lookup_first_ordered_extent(inode,
@@ -5053,6 +5065,12 @@ static long btrfs_fallocate(struct inode *inode, int mode,
btrfs_put_ordered_extent(ordered);
unlock_extent(&BTRFS_I(inode)->io_tree,
alloc_start, alloc_end - 1, GFP_NOFS);
+ btrfs_end_transaction(trans, BTRFS_I(inode)->root);
+
+ /*
+ * we can't wait on the range with the transaction
+ * running or with the extent lock held
+ */
btrfs_wait_ordered_range(inode, alloc_start,
alloc_end - alloc_start);
} else {
@@ -5070,7 +5088,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
last_byte = min(extent_map_end(em), alloc_end);
last_byte = (last_byte + mask) & ~mask;
if (em->block_start == EXTENT_MAP_HOLE) {
- ret = prealloc_file_range(inode, cur_offset,
+ ret = prealloc_file_range(trans, inode, cur_offset,
last_byte, alloc_hint, mode);
if (ret < 0) {
free_extent_map(em);
@@ -5089,6 +5107,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
}
unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1,
GFP_NOFS);
+
+ btrfs_end_transaction(trans, BTRFS_I(inode)->root);
out:
mutex_unlock(&inode->i_mutex);
return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7594bec1be10..9f135e878507 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -461,15 +461,9 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
-
- if (!vol_args)
- return -ENOMEM;
-
- if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
- ret = -EFAULT;
- goto out;
- }
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
namelen = strlen(vol_args->name);
@@ -545,7 +539,6 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
out_unlock:
mutex_unlock(&root->fs_info->volume_mutex);
-out:
kfree(vol_args);
return ret;
}
@@ -565,15 +558,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
- vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
-
- if (!vol_args)
- return -ENOMEM;
-
- if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
- ret = -EFAULT;
- goto out;
- }
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
namelen = strlen(vol_args->name);
@@ -675,19 +662,13 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
- if (!vol_args)
- return -ENOMEM;
-
- if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
- ret = -EFAULT;
- goto out;
- }
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
ret = btrfs_init_new_device(root, vol_args->name);
-out:
kfree(vol_args);
return ret;
}
@@ -703,19 +684,13 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
- vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
- if (!vol_args)
- return -ENOMEM;
-
- if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
- ret = -EFAULT;
- goto out;
- }
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
ret = btrfs_rm_device(root, vol_args->name);
-out:
kfree(vol_args);
return ret;
}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 53c87b197d70..d6f0806c682f 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -489,7 +489,7 @@ again:
/* start IO across the range first to instantiate any delalloc
* extents
*/
- btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE);
+ btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL);
/* The compression code will leave pages locked but return from
* writepage without setting the page writeback. Starting again
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9744af9d71e9..a7acfe639a44 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -635,14 +635,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- vol = kmalloc(sizeof(*vol), GFP_KERNEL);
- if (!vol)
- return -ENOMEM;
-
- if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) {
- ret = -EFAULT;
- goto out;
- }
+ vol = memdup_user((void __user *)arg, sizeof(*vol));
+ if (IS_ERR(vol))
+ return PTR_ERR(vol);
switch (cmd) {
case BTRFS_IOC_SCAN_DEV:
@@ -650,7 +645,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
&btrfs_fs_type, &fs_devices);
break;
}
-out:
+
kfree(vol);
return ret;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e0913e469728..e53835b88594 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
return NULL;
}
+static void requeue_list(struct btrfs_pending_bios *pending_bios,
+ struct bio *head, struct bio *tail)
+{
+
+ struct bio *old_head;
+
+ old_head = pending_bios->head;
+ pending_bios->head = head;
+ if (pending_bios->tail)
+ tail->bi_next = old_head;
+ else
+ pending_bios->tail = tail;
+}
+
/*
* we try to collect pending bios for a device so we don't get a large
* number of procs sending bios down to the same device. This greatly
@@ -141,10 +155,12 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
struct bio *pending;
struct backing_dev_info *bdi;
struct btrfs_fs_info *fs_info;
+ struct btrfs_pending_bios *pending_bios;
struct bio *tail;
struct bio *cur;
int again = 0;
- unsigned long num_run = 0;
+ unsigned long num_run;
+ unsigned long num_sync_run;
unsigned long limit;
unsigned long last_waited = 0;
@@ -153,20 +169,30 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
limit = btrfs_async_submit_limit(fs_info);
limit = limit * 2 / 3;
+ /* we want to make sure that every time we switch from the sync
+ * list to the normal list, we unplug
+ */
+ num_sync_run = 0;
+
loop:
spin_lock(&device->io_lock);
+ num_run = 0;
loop_lock:
+
/* take all the bios off the list at once and process them
* later on (without the lock held). But, remember the
* tail and other pointers so the bios can be properly reinserted
* into the list if we hit congestion
*/
- pending = device->pending_bios;
- tail = device->pending_bio_tail;
+ if (device->pending_sync_bios.head)
+ pending_bios = &device->pending_sync_bios;
+ else
+ pending_bios = &device->pending_bios;
+
+ pending = pending_bios->head;
+ tail = pending_bios->tail;
WARN_ON(pending && !tail);
- device->pending_bios = NULL;
- device->pending_bio_tail = NULL;
/*
* if pending was null this time around, no bios need processing
@@ -176,16 +202,41 @@ loop_lock:
* device->running_pending is used to synchronize with the
* schedule_bio code.
*/
- if (pending) {
- again = 1;
- device->running_pending = 1;
- } else {
+ if (device->pending_sync_bios.head == NULL &&
+ device->pending_bios.head == NULL) {
again = 0;
device->running_pending = 0;
+ } else {
+ again = 1;
+ device->running_pending = 1;
}
+
+ pending_bios->head = NULL;
+ pending_bios->tail = NULL;
+
spin_unlock(&device->io_lock);
+ /*
+ * if we're doing the regular priority list, make sure we unplug
+ * for any high prio bios we've sent down
+ */
+ if (pending_bios == &device->pending_bios && num_sync_run > 0) {
+ num_sync_run = 0;
+ blk_run_backing_dev(bdi, NULL);
+ }
+
while (pending) {
+
+ rmb();
+ if (pending_bios != &device->pending_sync_bios &&
+ device->pending_sync_bios.head &&
+ num_run > 16) {
+ cond_resched();
+ spin_lock(&device->io_lock);
+ requeue_list(pending_bios, pending, tail);
+ goto loop_lock;
+ }
+
cur = pending;
pending = pending->bi_next;
cur->bi_next = NULL;
@@ -196,10 +247,18 @@ loop_lock:
wake_up(&fs_info->async_submit_wait);
BUG_ON(atomic_read(&cur->bi_cnt) == 0);
- bio_get(cur);
submit_bio(cur->bi_rw, cur);
- bio_put(cur);
num_run++;
+ if (bio_sync(cur))
+ num_sync_run++;
+
+ if (need_resched()) {
+ if (num_sync_run) {
+ blk_run_backing_dev(bdi, NULL);
+ num_sync_run = 0;
+ }
+ cond_resched();
+ }
/*
* we made progress, there is more work to do and the bdi
@@ -208,7 +267,6 @@ loop_lock:
*/
if (pending && bdi_write_congested(bdi) && num_run > 16 &&
fs_info->fs_devices->open_devices > 1) {
- struct bio *old_head;
struct io_context *ioc;
ioc = current->io_context;
@@ -233,17 +291,17 @@ loop_lock:
* against it before looping
*/
last_waited = ioc->last_waited;
+ if (need_resched()) {
+ if (num_sync_run) {
+ blk_run_backing_dev(bdi, NULL);
+ num_sync_run = 0;
+ }
+ cond_resched();
+ }
continue;
}
spin_lock(&device->io_lock);
-
- old_head = device->pending_bios;
- device->pending_bios = pending;
- if (device->pending_bio_tail)
- tail->bi_next = old_head;
- else
- device->pending_bio_tail = tail;
-
+ requeue_list(pending_bios, pending, tail);
device->running_pending = 1;
spin_unlock(&device->io_lock);
@@ -251,11 +309,18 @@ loop_lock:
goto done;
}
}
+
+ if (num_sync_run) {
+ num_sync_run = 0;
+ blk_run_backing_dev(bdi, NULL);
+ }
+
+ cond_resched();
if (again)
goto loop;
spin_lock(&device->io_lock);
- if (device->pending_bios)
+ if (device->pending_bios.head || device->pending_sync_bios.head)
goto loop_lock;
spin_unlock(&device->io_lock);
@@ -2497,7 +2562,7 @@ again:
max_errors = 1;
}
}
- if (multi_ret && rw == WRITE &&
+ if (multi_ret && (rw & (1 << BIO_RW)) &&
stripes_allocated < stripes_required) {
stripes_allocated = map->num_stripes;
free_extent_map(em);
@@ -2762,6 +2827,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
int rw, struct bio *bio)
{
int should_queue = 1;
+ struct btrfs_pending_bios *pending_bios;
/* don't bother with additional async steps for reads, right now */
if (!(rw & (1 << BIO_RW))) {
@@ -2783,13 +2849,17 @@ static noinline int schedule_bio(struct btrfs_root *root,
bio->bi_rw |= rw;
spin_lock(&device->io_lock);
+ if (bio_sync(bio))
+ pending_bios = &device->pending_sync_bios;
+ else
+ pending_bios = &device->pending_bios;
- if (device->pending_bio_tail)
- device->pending_bio_tail->bi_next = bio;
+ if (pending_bios->tail)
+ pending_bios->tail->bi_next = bio;
- device->pending_bio_tail = bio;
- if (!device->pending_bios)
- device->pending_bios = bio;
+ pending_bios->tail = bio;
+ if (!pending_bios->head)
+ pending_bios->head = bio;
if (device->running_pending)
should_queue = 0;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2185de72ff7d..5836327ba5dd 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -23,13 +23,22 @@
#include "async-thread.h"
struct buffer_head;
+struct btrfs_pending_bios {
+ struct bio *head;
+ struct bio *tail;
+};
+
struct btrfs_device {
struct list_head dev_list;
struct list_head dev_alloc_list;
struct btrfs_fs_devices *fs_devices;
struct btrfs_root *dev_root;
- struct bio *pending_bios;
- struct bio *pending_bio_tail;
+
+ /* regular prio bios */
+ struct btrfs_pending_bios pending_bios;
+ /* WRITE_SYNC bios */
+ struct btrfs_pending_bios pending_sync_bios;
+
int running_pending;
u64 generation;
diff --git a/fs/compat.c b/fs/compat.c
index 3f84d5f15889..379a399bf5c3 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -181,22 +181,24 @@ asmlinkage long compat_sys_newstat(char __user * filename,
struct compat_stat __user *statbuf)
{
struct kstat stat;
- int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
+ int error;
- if (!error)
- error = cp_compat_stat(&stat, statbuf);
- return error;
+ error = vfs_stat(filename, &stat);
+ if (error)
+ return error;
+ return cp_compat_stat(&stat, statbuf);
}
asmlinkage long compat_sys_newlstat(char __user * filename,
struct compat_stat __user *statbuf)
{
struct kstat stat;
- int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
+ int error;
- if (!error)
- error = cp_compat_stat(&stat, statbuf);
- return error;
+ error = vfs_lstat(filename, &stat);
+ if (error)
+ return error;
+ return cp_compat_stat(&stat, statbuf);
}
#ifndef __ARCH_WANT_STAT64
@@ -204,21 +206,12 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename,
struct compat_stat __user *statbuf, int flag)
{
struct kstat stat;
- int error = -EINVAL;
-
- if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
- goto out;
-
- if (flag & AT_SYMLINK_NOFOLLOW)
- error = vfs_lstat_fd(dfd, filename, &stat);
- else
- error = vfs_stat_fd(dfd, filename, &stat);
-
- if (!error)
- error = cp_compat_stat(&stat, statbuf);
+ int error;
-out:
- return error;
+ error = vfs_fstatat(dfd, filename, &stat, flag);
+ if (error)
+ return error;
+ return cp_compat_stat(&stat, statbuf);
}
#endif
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 3e87ce443ea2..b83f6bcfa51a 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -58,7 +58,6 @@
#include <linux/i2c.h>
#include <linux/i2c-dev.h>
#include <linux/atalk.h>
-#include <linux/loop.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci.h>
@@ -68,6 +67,7 @@
#include <linux/gigaset_dev.h>
#ifdef CONFIG_BLOCK
+#include <linux/loop.h>
#include <scsi/scsi.h>
#include <scsi/scsi_ioctl.h>
#include <scsi/sg.h>
@@ -2660,6 +2660,8 @@ HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
/* block stuff */
#ifdef CONFIG_BLOCK
+/* loop */
+IGNORE_IOCTL(LOOP_CLR_FD)
/* Raw devices */
HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
@@ -2728,9 +2730,6 @@ HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans)
IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32)
IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32)
-/* loop */
-IGNORE_IOCTL(LOOP_CLR_FD)
-
#ifdef CONFIG_SPARC
/* Sparc framebuffers, handled in sbusfb_compat_ioctl() */
IGNORE_IOCTL(FBIOGTYPE)
diff --git a/fs/dcache.c b/fs/dcache.c
index 761d30be2683..1fcffebfb44f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2149,7 +2149,6 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
int result;
unsigned long seq;
- /* FIXME: This is old behavior, needed? Please check callers. */
if (new_dentry == old_dentry)
return 1;
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index a67fea655f49..dda3c58eefc0 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -418,18 +418,13 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
if (count == 0)
goto out;
- data = kmalloc(count, GFP_KERNEL);
- if (!data) {
- printk(KERN_ERR "%s: Out of memory whilst attempting to "
- "kmalloc([%zd], GFP_KERNEL)\n", __func__, count);
+
+ data = memdup_user(buf, count);
+ if (IS_ERR(data)) {
+ printk(KERN_ERR "%s: memdup_user returned error [%ld]\n",
+ __func__, PTR_ERR(data));
goto out;
}
- rc = copy_from_user(data, buf, count);
- if (rc) {
- printk(KERN_ERR "%s: copy_from_user returned error [%d]\n",
- __func__, rc);
- goto out_free;
- }
sz = count;
i = 0;
switch (data[i++]) {
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 1aa70260e6d1..a24c58e181db 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -199,7 +199,7 @@ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2)
return retval;
}
-int get_filesystem_list(char * buf)
+int __init get_filesystem_list(char *buf)
{
int len = 0;
struct file_system_type * tmp;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index bf23a62aa925..70f87f43afa2 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -156,6 +156,12 @@ static void inode_go_sync(struct gfs2_glock *gl)
error = filemap_fdatawait(metamapping);
mapping_set_error(metamapping, error);
gfs2_ail_empty_gl(gl);
+ /*
+ * Writeback of the data mapping may cause the dirty flag to be set
+ * so we have to clear it again here.
+ */
+ smp_mb__before_clear_bit();
+ clear_bit(GLF_DIRTY, &gl->gl_flags);
}
/**
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 101caf3ee861..5d82e91887e3 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -413,7 +413,9 @@ out_unlock:
gfs2_glock_dq(&gh);
out:
gfs2_holder_uninit(&gh);
- if (ret)
+ if (ret == -ENOMEM)
+ ret = VM_FAULT_OOM;
+ else if (ret)
ret = VM_FAULT_SIGBUS;
return ret;
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 23a3c76711e0..153d9681192b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -26,7 +26,6 @@
#include <linux/pagevec.h>
#include <linux/parser.h>
#include <linux/mman.h>
-#include <linux/quotaops.h>
#include <linux/slab.h>
#include <linux/dnotify.h>
#include <linux/statfs.h>
@@ -842,7 +841,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
bad_val:
printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n",
args[0].from, p);
- return 1;
+ return -EINVAL;
}
static int
diff --git a/fs/namei.c b/fs/namei.c
index b8433ebfae05..78f253cd2d4f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1248,6 +1248,8 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
int err;
struct qstr this;
+ WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
+
err = __lookup_one_len(name, &this, base, len);
if (err)
return ERR_PTR(err);
diff --git a/fs/namespace.c b/fs/namespace.c
index d9138f81ec10..41196209a906 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1377,7 +1377,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
if (parent_path) {
detach_mnt(source_mnt, parent_path);
attach_mnt(source_mnt, path);
- touch_mnt_namespace(current->nsproxy->mnt_ns);
+ touch_mnt_namespace(parent_path->mnt->mnt_ns);
} else {
mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
commit_tree(source_mnt);
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index f54360f50a9c..fa038df63ac8 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -660,13 +660,10 @@ outrel:
if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN)
return -ENOMEM;
if (user.object_name_len) {
- newname = kmalloc(user.object_name_len, GFP_USER);
- if (!newname)
- return -ENOMEM;
- if (copy_from_user(newname, user.object_name, user.object_name_len)) {
- kfree(newname);
- return -EFAULT;
- }
+ newname = memdup_user(user.object_name,
+ user.object_name_len);
+ if (IS_ERR(newname))
+ return PTR_ERR(newname);
} else {
newname = NULL;
}
@@ -760,13 +757,9 @@ outrel:
if (user.len > NCP_PRIVATE_DATA_MAX_LEN)
return -ENOMEM;
if (user.len) {
- new = kmalloc(user.len, GFP_USER);
- if (!new)
- return -ENOMEM;
- if (copy_from_user(new, user.data, user.len)) {
- kfree(new);
- return -EFAULT;
- }
+ new = memdup_user(user.data, user.len);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
} else {
new = NULL;
}
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index e6a1932c7110..35869a4921f1 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -713,7 +713,8 @@ nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p,
if (args->npages != 0)
xdr_encode_pages(buf, args->pages, 0, args->len);
else
- req->rq_slen += args->len;
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec,
+ p + XDR_QUADLEN(args->len));
err = nfsacl_encode(buf, base, args->inode,
(args->mask & NFS_ACL) ?
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 3444c0052a87..5275097a7565 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -229,21 +229,23 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
goto out;
status = vfs_readdir(filp, nfsd4_build_namelist, &names);
fput(filp);
+ mutex_lock(&dir->d_inode->i_mutex);
while (!list_empty(&names)) {
entry = list_entry(names.next, struct name_list, list);
dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
- goto out;
+ break;
}
status = f(dir, dentry);
dput(dentry);
if (status)
- goto out;
+ break;
list_del(&entry->list);
kfree(entry);
}
+ mutex_unlock(&dir->d_inode->i_mutex);
out:
while (!list_empty(&names)) {
entry = list_entry(names.next, struct name_list, list);
@@ -255,36 +257,6 @@ out:
}
static int
-nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry)
-{
- int status;
-
- if (!S_ISREG(dir->d_inode->i_mode)) {
- printk("nfsd4: non-file found in client recovery directory\n");
- return -EINVAL;
- }
- mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
- status = vfs_unlink(dir->d_inode, dentry);
- mutex_unlock(&dir->d_inode->i_mutex);
- return status;
-}
-
-static int
-nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
-{
- int status;
-
- /* For now this directory should already be empty, but we empty it of
- * any regular files anyway, just in case the directory was created by
- * a kernel from the future.... */
- nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
- mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
- status = vfs_rmdir(dir->d_inode, dentry);
- mutex_unlock(&dir->d_inode->i_mutex);
- return status;
-}
-
-static int
nfsd4_unlink_clid_dir(char *name, int namlen)
{
struct dentry *dentry;
@@ -294,18 +266,18 @@ nfsd4_unlink_clid_dir(char *name, int namlen)
mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
dentry = lookup_one_len(name, rec_dir.dentry, namlen);
- mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
- return status;
+ goto out_unlock;
}
status = -ENOENT;
if (!dentry->d_inode)
goto out;
-
- status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry);
+ status = vfs_rmdir(rec_dir.dentry->d_inode, dentry);
out:
dput(dentry);
+out_unlock:
+ mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
return status;
}
@@ -348,7 +320,7 @@ purge_old(struct dentry *parent, struct dentry *child)
if (nfs4_has_reclaimed_state(child->d_name.name, false))
return 0;
- status = nfsd4_clear_clid_dir(parent, child);
+ status = vfs_rmdir(parent->d_inode, child);
if (status)
printk("failed to remove client recovery directory %s\n",
child->d_name.name);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ab93fcfef254..6c68ffd6b4bb 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -116,10 +116,15 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
}
if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
/* successfully crossed mount point */
- exp_put(exp);
- *expp = exp2;
+ /*
+ * This is subtle: dentry is *not* under mnt at this point.
+ * The only reason we are safe is that original mnt is pinned
+ * down by exp, so we should dput before putting exp.
+ */
dput(dentry);
*dpp = mounts;
+ exp_put(exp);
+ *expp = exp2;
} else {
exp_put(exp2);
dput(mounts);
@@ -1885,8 +1890,8 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen,
return 0;
}
-static int nfsd_buffered_readdir(struct file *file, filldir_t func,
- struct readdir_cd *cdp, loff_t *offsetp)
+static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
+ struct readdir_cd *cdp, loff_t *offsetp)
{
struct readdir_data buf;
struct buffered_dirent *de;
@@ -1896,11 +1901,12 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func,
buf.dirent = (void *)__get_free_page(GFP_KERNEL);
if (!buf.dirent)
- return -ENOMEM;
+ return nfserrno(-ENOMEM);
offset = *offsetp;
while (1) {
+ struct inode *dir_inode = file->f_path.dentry->d_inode;
unsigned int reclen;
cdp->err = nfserr_eof; /* will be cleared on successful read */
@@ -1919,26 +1925,38 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func,
if (!size)
break;
+ /*
+ * Various filldir functions may end up calling back into
+ * lookup_one_len() and the file system's ->lookup() method.
+ * These expect i_mutex to be held, as it would within readdir.
+ */
+ host_err = mutex_lock_killable(&dir_inode->i_mutex);
+ if (host_err)
+ break;
+
de = (struct buffered_dirent *)buf.dirent;
while (size > 0) {
offset = de->offset;
if (func(cdp, de->name, de->namlen, de->offset,
de->ino, de->d_type))
- goto done;
+ break;
if (cdp->err != nfs_ok)
- goto done;
+ break;
reclen = ALIGN(sizeof(*de) + de->namlen,
sizeof(u64));
size -= reclen;
de = (struct buffered_dirent *)((char *)de + reclen);
}
+ mutex_unlock(&dir_inode->i_mutex);
+ if (size > 0) /* We bailed out early */
+ break;
+
offset = vfs_llseek(file, 0, SEEK_CUR);
}
- done:
free_page((unsigned long)(buf.dirent));
if (host_err)
diff --git a/fs/stat.c b/fs/stat.c
index 2db740a0cfb5..075694e31d8b 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -55,59 +55,54 @@ int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
EXPORT_SYMBOL(vfs_getattr);
-int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat)
+int vfs_fstat(unsigned int fd, struct kstat *stat)
{
- struct path path;
- int error;
+ struct file *f = fget(fd);
+ int error = -EBADF;
- error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path);
- if (!error) {
- error = vfs_getattr(path.mnt, path.dentry, stat);
- path_put(&path);
+ if (f) {
+ error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
+ fput(f);
}
return error;
}
+EXPORT_SYMBOL(vfs_fstat);
-int vfs_stat(char __user *name, struct kstat *stat)
+int vfs_fstatat(int dfd, char __user *filename, struct kstat *stat, int flag)
{
- return vfs_stat_fd(AT_FDCWD, name, stat);
-}
+ struct path path;
+ int error = -EINVAL;
+ int lookup_flags = 0;
-EXPORT_SYMBOL(vfs_stat);
+ if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
+ goto out;
-int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat)
-{
- struct path path;
- int error;
+ if (!(flag & AT_SYMLINK_NOFOLLOW))
+ lookup_flags |= LOOKUP_FOLLOW;
- error = user_path_at(dfd, name, 0, &path);
- if (!error) {
- error = vfs_getattr(path.mnt, path.dentry, stat);
- path_put(&path);
- }
+ error = user_path_at(dfd, filename, lookup_flags, &path);
+ if (error)
+ goto out;
+
+ error = vfs_getattr(path.mnt, path.dentry, stat);
+ path_put(&path);
+out:
return error;
}
+EXPORT_SYMBOL(vfs_fstatat);
-int vfs_lstat(char __user *name, struct kstat *stat)
+int vfs_stat(char __user *name, struct kstat *stat)
{
- return vfs_lstat_fd(AT_FDCWD, name, stat);
+ return vfs_fstatat(AT_FDCWD, name, stat, 0);
}
+EXPORT_SYMBOL(vfs_stat);
-EXPORT_SYMBOL(vfs_lstat);
-
-int vfs_fstat(unsigned int fd, struct kstat *stat)
+int vfs_lstat(char __user *name, struct kstat *stat)
{
- struct file *f = fget(fd);
- int error = -EBADF;
-
- if (f) {
- error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
- fput(f);
- }
- return error;
+ return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW);
}
+EXPORT_SYMBOL(vfs_lstat);
-EXPORT_SYMBOL(vfs_fstat);
#ifdef __ARCH_WANT_OLD_STAT
@@ -155,23 +150,25 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta
SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf)
{
struct kstat stat;
- int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
+ int error;
- if (!error)
- error = cp_old_stat(&stat, statbuf);
+ error = vfs_stat(filename, &stat);
+ if (error)
+ return error;
- return error;
+ return cp_old_stat(&stat, statbuf);
}
SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf)
{
struct kstat stat;
- int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
+ int error;
- if (!error)
- error = cp_old_stat(&stat, statbuf);
+ error = vfs_lstat(filename, &stat);
+ if (error)
+ return error;
- return error;
+ return cp_old_stat(&stat, statbuf);
}
SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf)
@@ -240,23 +237,23 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf)
{
struct kstat stat;
- int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
-
- if (!error)
- error = cp_new_stat(&stat, statbuf);
+ int error = vfs_stat(filename, &stat);
- return error;
+ if (error)
+ return error;
+ return cp_new_stat(&stat, statbuf);
}
SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf)
{
struct kstat stat;
- int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
+ int error;
- if (!error)
- error = cp_new_stat(&stat, statbuf);
+ error = vfs_lstat(filename, &stat);
+ if (error)
+ return error;
- return error;
+ return cp_new_stat(&stat, statbuf);
}
#if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT)
@@ -264,21 +261,12 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, char __user *, filename,
struct stat __user *, statbuf, int, flag)
{
struct kstat stat;
- int error = -EINVAL;
-
- if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
- goto out;
-
- if (flag & AT_SYMLINK_NOFOLLOW)
- error = vfs_lstat_fd(dfd, filename, &stat);
- else
- error = vfs_stat_fd(dfd, filename, &stat);
-
- if (!error)
- error = cp_new_stat(&stat, statbuf);
+ int error;
-out:
- return error;
+ error = vfs_fstatat(dfd, filename, &stat, flag);
+ if (error)
+ return error;
+ return cp_new_stat(&stat, statbuf);
}
#endif
@@ -404,21 +392,12 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename,
struct stat64 __user *, statbuf, int, flag)
{
struct kstat stat;
- int error = -EINVAL;
-
- if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
- goto out;
-
- if (flag & AT_SYMLINK_NOFOLLOW)
- error = vfs_lstat_fd(dfd, filename, &stat);
- else
- error = vfs_stat_fd(dfd, filename, &stat);
-
- if (!error)
- error = cp_new_stat64(&stat, statbuf);
+ int error;
-out:
- return error;
+ error = vfs_fstatat(dfd, filename, &stat, flag);
+ if (error)
+ return error;
+ return cp_new_stat64(&stat, statbuf);
}
#endif /* __ARCH_WANT_STAT64 */
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 93e0c0281d45..9345806c8853 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -157,14 +157,9 @@ static ssize_t write(struct file *file, const char __user *userbuf,
count = size - offs;
}
- temp = kmalloc(count, GFP_KERNEL);
- if (!temp)
- return -ENOMEM;
-
- if (copy_from_user(temp, userbuf, count)) {
- count = -EFAULT;
- goto out_free;
- }
+ temp = memdup_user(userbuf, count);
+ if (IS_ERR(temp))
+ return PTR_ERR(temp);
mutex_lock(&bb->mutex);
@@ -176,8 +171,6 @@ static ssize_t write(struct file *file, const char __user *userbuf,
if (count > 0)
*off = offs + count;
-out_free:
- kfree(temp);
return count;
}
diff --git a/fs/xattr.c b/fs/xattr.c
index 197c4fcac032..d51b8f9db921 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -237,13 +237,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
if (size) {
if (size > XATTR_SIZE_MAX)
return -E2BIG;
- kvalue = kmalloc(size, GFP_KERNEL);
- if (!kvalue)
- return -ENOMEM;
- if (copy_from_user(kvalue, value, size)) {
- kfree(kvalue);
- return -EFAULT;
- }
+ kvalue = memdup_user(value, size);
+ if (IS_ERR(kvalue))
+ return PTR_ERR(kvalue);
}
error = vfs_setxattr(d, kname, kvalue, size, flags);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index d0b499418a7d..34eaab608e6e 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -489,17 +489,12 @@ xfs_attrmulti_attr_set(
if (len > XATTR_SIZE_MAX)
return EINVAL;
- kbuf = kmalloc(len, GFP_KERNEL);
- if (!kbuf)
- return ENOMEM;
-
- if (copy_from_user(kbuf, ubuf, len))
- goto out_kfree;
+ kbuf = memdup_user(ubuf, len);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
- out_kfree:
- kfree(kbuf);
return error;
}
@@ -540,20 +535,16 @@ xfs_attrmulti_by_handle(
if (!size || size > 16 * PAGE_SIZE)
goto out_dput;
- error = ENOMEM;
- ops = kmalloc(size, GFP_KERNEL);
- if (!ops)
+ ops = memdup_user(am_hreq.ops, size);
+ if (IS_ERR(ops)) {
+ error = PTR_ERR(ops);
goto out_dput;
-
- error = EFAULT;
- if (copy_from_user(ops, am_hreq.ops, size))
- goto out_kfree_ops;
+ }
attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
if (!attr_name)
goto out_kfree_ops;
-
error = 0;
for (i = 0; i < am_hreq.opcount; i++) {
ops[i].am_error = strncpy_from_user(attr_name,
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index c70c4e3db790..0882d166239a 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -427,20 +427,16 @@ xfs_compat_attrmulti_by_handle(
if (!size || size > 16 * PAGE_SIZE)
goto out_dput;
- error = ENOMEM;
- ops = kmalloc(size, GFP_KERNEL);
- if (!ops)
+ ops = memdup_user(compat_ptr(am_hreq.ops), size);
+ if (IS_ERR(ops)) {
+ error = PTR_ERR(ops);
goto out_dput;
-
- error = EFAULT;
- if (copy_from_user(ops, compat_ptr(am_hreq.ops), size))
- goto out_kfree_ops;
+ }
attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
if (!attr_name)
goto out_kfree_ops;
-
error = 0;
for (i = 0; i < am_hreq.opcount; i++) {
ops[i].am_error = strncpy_from_user(attr_name,