diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/autofs/dirhash.c | 34 | ||||
-rw-r--r-- | fs/autofs4/dev-ioctl.c | 12 | ||||
-rw-r--r-- | fs/btrfs/async-thread.c | 60 | ||||
-rw-r--r-- | fs/btrfs/async-thread.h | 2 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 17 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 9 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 86 | ||||
-rw-r--r-- | fs/btrfs/file.c | 6 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 36 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 49 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 2 | ||||
-rw-r--r-- | fs/btrfs/super.c | 13 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 124 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 13 | ||||
-rw-r--r-- | fs/compat.c | 37 | ||||
-rw-r--r-- | fs/compat_ioctl.c | 7 | ||||
-rw-r--r-- | fs/dcache.c | 1 | ||||
-rw-r--r-- | fs/ecryptfs/miscdev.c | 15 | ||||
-rw-r--r-- | fs/filesystems.c | 2 | ||||
-rw-r--r-- | fs/gfs2/glops.c | 6 | ||||
-rw-r--r-- | fs/gfs2/ops_file.c | 4 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 3 | ||||
-rw-r--r-- | fs/namei.c | 2 | ||||
-rw-r--r-- | fs/namespace.c | 2 | ||||
-rw-r--r-- | fs/ncpfs/ioctl.c | 21 | ||||
-rw-r--r-- | fs/nfs/nfs3xdr.c | 3 | ||||
-rw-r--r-- | fs/nfsd/nfs4recover.c | 46 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 34 | ||||
-rw-r--r-- | fs/stat.c | 137 | ||||
-rw-r--r-- | fs/sysfs/bin.c | 13 | ||||
-rw-r--r-- | fs/xattr.c | 10 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_ioctl.c | 23 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_ioctl32.c | 12 |
33 files changed, 467 insertions, 374 deletions
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c index bf8c8af98004..4eb4d8dfb2f1 100644 --- a/fs/autofs/dirhash.c +++ b/fs/autofs/dirhash.c @@ -39,10 +39,12 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, { struct autofs_dirhash *dh = &sbi->dirhash; struct autofs_dir_ent *ent; - struct dentry *dentry; unsigned long timeout = sbi->exp_timeout; while (1) { + struct path path; + int umount_ok; + if ( list_empty(&dh->expiry_head) || sbi->catatonic ) return NULL; /* No entries */ /* We keep the list sorted by last_usage and want old stuff */ @@ -57,17 +59,17 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, return ent; /* Symlinks are always expirable */ /* Get the dentry for the autofs subdirectory */ - dentry = ent->dentry; + path.dentry = ent->dentry; - if ( !dentry ) { + if (!path.dentry) { /* Should only happen in catatonic mode */ printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name); autofs_delete_usage(ent); continue; } - if ( !dentry->d_inode ) { - dput(dentry); + if (!path.dentry->d_inode) { + dput(path.dentry); printk("autofs: negative dentry on expiry queue: %s\n", ent->name); autofs_delete_usage(ent); @@ -76,29 +78,29 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, /* Make sure entry is mounted and unused; note that dentry will point to the mounted-on-top root. */ - if (!S_ISDIR(dentry->d_inode->i_mode)||!d_mountpoint(dentry)) { + if (!S_ISDIR(path.dentry->d_inode->i_mode) || + !d_mountpoint(path.dentry)) { DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); continue; } - mntget(mnt); - dget(dentry); - if (!follow_down(&mnt, &dentry)) { - dput(dentry); - mntput(mnt); + path.mnt = mnt; + path_get(&path); + if (!follow_down(&path.mnt, &path.dentry)) { + path_put(&path); DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); continue; } - while (d_mountpoint(dentry) && follow_down(&mnt, &dentry)) + while (d_mountpoint(path.dentry) && + follow_down(&path.mnt, &path.dentry)) ; - dput(dentry); + umount_ok = may_umount(path.mnt); + path_put(&path); - if ( may_umount(mnt) ) { - mntput(mnt); + if (umount_ok) { DPRINTK(("autofs: signaling expire on %s\n", ent->name)); return ent; /* Expirable! */ } DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name)); - mntput(mnt); } return NULL; /* No expirable entries */ } diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 9e5ae8a4f5c8..84168c0dcc2d 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -54,11 +54,10 @@ static int check_name(const char *name) * Check a string doesn't overrun the chunk of * memory we copied from user land. */ -static int invalid_str(char *str, void *end) +static int invalid_str(char *str, size_t size) { - while ((void *) str <= end) - if (!*str++) - return 0; + if (memchr(str, 0, size)) + return 0; return -EINVAL; } @@ -138,8 +137,7 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) } if (param->size > sizeof(*param)) { - err = invalid_str(param->path, - (void *) ((size_t) param + param->size)); + err = invalid_str(param->path, param->size - sizeof(*param)); if (err) { AUTOFS_WARN( "path string terminator missing for cmd(0x%08x)", @@ -488,7 +486,7 @@ static int autofs_dev_ioctl_requester(struct file *fp, } path = param->path; - devid = sbi->sb->s_dev; + devid = new_encode_dev(sbi->sb->s_dev); param->requester.uid = param->requester.gid = -1; diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 51bfdfc8fcda..502c3d61de62 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -25,6 +25,7 @@ #define WORK_QUEUED_BIT 0 #define WORK_DONE_BIT 1 #define WORK_ORDER_DONE_BIT 2 +#define WORK_HIGH_PRIO_BIT 3 /* * container for the kthread task pointer and the list of pending work @@ -36,6 +37,7 @@ struct btrfs_worker_thread { /* list of struct btrfs_work that are waiting for service */ struct list_head pending; + struct list_head prio_pending; /* list of worker threads from struct btrfs_workers */ struct list_head worker_list; @@ -103,10 +105,16 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers, spin_lock_irqsave(&workers->lock, flags); - while (!list_empty(&workers->order_list)) { - work = list_entry(workers->order_list.next, - struct btrfs_work, order_list); - + while (1) { + if (!list_empty(&workers->prio_order_list)) { + work = list_entry(workers->prio_order_list.next, + struct btrfs_work, order_list); + } else if (!list_empty(&workers->order_list)) { + work = list_entry(workers->order_list.next, + struct btrfs_work, order_list); + } else { + break; + } if (!test_bit(WORK_DONE_BIT, &work->flags)) break; @@ -143,8 +151,14 @@ static int worker_loop(void *arg) do { spin_lock_irq(&worker->lock); again_locked: - while (!list_empty(&worker->pending)) { - cur = worker->pending.next; + while (1) { + if (!list_empty(&worker->prio_pending)) + cur = worker->prio_pending.next; + else if (!list_empty(&worker->pending)) + cur = worker->pending.next; + else + break; + work = list_entry(cur, struct btrfs_work, list); list_del(&work->list); clear_bit(WORK_QUEUED_BIT, &work->flags); @@ -163,7 +177,6 @@ again_locked: spin_lock_irq(&worker->lock); check_idle_worker(worker); - } if (freezing(current)) { worker->working = 0; @@ -178,7 +191,8 @@ again_locked: * jump_in? */ smp_mb(); - if (!list_empty(&worker->pending)) + if (!list_empty(&worker->pending) || + !list_empty(&worker->prio_pending)) continue; /* @@ -191,7 +205,8 @@ again_locked: */ schedule_timeout(1); smp_mb(); - if (!list_empty(&worker->pending)) + if (!list_empty(&worker->pending) || + !list_empty(&worker->prio_pending)) continue; if (kthread_should_stop()) @@ -200,7 +215,8 @@ again_locked: /* still no more work?, sleep for real */ spin_lock_irq(&worker->lock); set_current_state(TASK_INTERRUPTIBLE); - if (!list_empty(&worker->pending)) + if (!list_empty(&worker->pending) || + !list_empty(&worker->prio_pending)) goto again_locked; /* @@ -248,6 +264,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) INIT_LIST_HEAD(&workers->worker_list); INIT_LIST_HEAD(&workers->idle_list); INIT_LIST_HEAD(&workers->order_list); + INIT_LIST_HEAD(&workers->prio_order_list); spin_lock_init(&workers->lock); workers->max_workers = max; workers->idle_thresh = 32; @@ -273,6 +290,7 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) } INIT_LIST_HEAD(&worker->pending); + INIT_LIST_HEAD(&worker->prio_pending); INIT_LIST_HEAD(&worker->worker_list); spin_lock_init(&worker->lock); atomic_set(&worker->num_pending, 0); @@ -396,7 +414,10 @@ int btrfs_requeue_work(struct btrfs_work *work) goto out; spin_lock_irqsave(&worker->lock, flags); - list_add_tail(&work->list, &worker->pending); + if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) + list_add_tail(&work->list, &worker->prio_pending); + else + list_add_tail(&work->list, &worker->pending); atomic_inc(&worker->num_pending); /* by definition we're busy, take ourselves off the idle @@ -422,6 +443,11 @@ out: return 0; } +void btrfs_set_work_high_prio(struct btrfs_work *work) +{ + set_bit(WORK_HIGH_PRIO_BIT, &work->flags); +} + /* * places a struct btrfs_work into the pending queue of one of the kthreads */ @@ -438,7 +464,12 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) worker = find_worker(workers); if (workers->ordered) { spin_lock_irqsave(&workers->lock, flags); - list_add_tail(&work->order_list, &workers->order_list); + if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { + list_add_tail(&work->order_list, + &workers->prio_order_list); + } else { + list_add_tail(&work->order_list, &workers->order_list); + } spin_unlock_irqrestore(&workers->lock, flags); } else { INIT_LIST_HEAD(&work->order_list); @@ -446,7 +477,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) spin_lock_irqsave(&worker->lock, flags); - list_add_tail(&work->list, &worker->pending); + if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) + list_add_tail(&work->list, &worker->prio_pending); + else + list_add_tail(&work->list, &worker->pending); atomic_inc(&worker->num_pending); check_busy_worker(worker); diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 31be4ed8b63e..1b511c109db6 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -85,6 +85,7 @@ struct btrfs_workers { * of work items waiting for completion */ struct list_head order_list; + struct list_head prio_order_list; /* lock for finding the next worker thread to queue on */ spinlock_t lock; @@ -98,4 +99,5 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); int btrfs_stop_workers(struct btrfs_workers *workers); void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); int btrfs_requeue_work(struct btrfs_work *work); +void btrfs_set_work_high_prio(struct btrfs_work *work); #endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e5b2533b691a..a99f1c2a710d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1325,12 +1325,12 @@ static noinline int reada_for_balance(struct btrfs_root *root, int ret = 0; int blocksize; - parent = path->nodes[level - 1]; + parent = path->nodes[level + 1]; if (!parent) return 0; nritems = btrfs_header_nritems(parent); - slot = path->slots[level]; + slot = path->slots[level + 1]; blocksize = btrfs_level_size(root, level); if (slot > 0) { @@ -1341,7 +1341,7 @@ static noinline int reada_for_balance(struct btrfs_root *root, block1 = 0; free_extent_buffer(eb); } - if (slot < nritems) { + if (slot + 1 < nritems) { block2 = btrfs_node_blockptr(parent, slot + 1); gen = btrfs_node_ptr_generation(parent, slot + 1); eb = btrfs_find_tree_block(root, block2, blocksize); @@ -1351,7 +1351,11 @@ static noinline int reada_for_balance(struct btrfs_root *root, } if (block1 || block2) { ret = -EAGAIN; + + /* release the whole path */ btrfs_release_path(root, path); + + /* read the blocks */ if (block1) readahead_tree_block(root, block1, blocksize, 0); if (block2) @@ -1361,7 +1365,7 @@ static noinline int reada_for_balance(struct btrfs_root *root, eb = read_tree_block(root, block1, blocksize, 0); free_extent_buffer(eb); } - if (block1) { + if (block2) { eb = read_tree_block(root, block2, blocksize, 0); free_extent_buffer(eb); } @@ -1481,12 +1485,15 @@ read_block_for_search(struct btrfs_trans_handle *trans, * of the btree by dropping locks before * we read. */ - btrfs_release_path(NULL, p); + btrfs_unlock_up_safe(p, level + 1); + btrfs_set_path_blocking(p); + if (tmp) free_extent_buffer(tmp); if (p->reada) reada_for_search(root, p, level, slot, key->objectid); + btrfs_release_path(NULL, p); tmp = read_tree_block(root, blocknr, blocksize, gen); if (tmp) free_extent_buffer(tmp); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 92caa8035f36..a6b83744b05d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -579,6 +579,10 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->bio_flags = bio_flags; atomic_inc(&fs_info->nr_async_submits); + + if (rw & (1 << BIO_RW_SYNCIO)) + btrfs_set_work_high_prio(&async->work); + btrfs_queue_worker(&fs_info->workers, &async->work); #if 0 int limit = btrfs_async_submit_limit(fs_info); @@ -656,6 +660,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 0); } + /* * kthread helpers are used to submit writes so that checksumming * can happen in parallel across all CPUs @@ -2095,10 +2100,10 @@ static int write_dev_supers(struct btrfs_device *device, device->barriers = 0; get_bh(bh); lock_buffer(bh); - ret = submit_bh(WRITE, bh); + ret = submit_bh(WRITE_SYNC, bh); } } else { - ret = submit_bh(WRITE, bh); + ret = submit_bh(WRITE_SYNC, bh); } if (!ret && wait) { diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index eb2bee8b7fbf..05a1c42e25bf 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -50,7 +50,10 @@ struct extent_page_data { /* tells writepage not to lock the state bits for this range * it still does the unlocking */ - int extent_locked; + unsigned int extent_locked:1; + + /* tells the submit_bio code to use a WRITE_SYNC */ + unsigned int sync_io:1; }; int __init extent_io_init(void) @@ -2101,6 +2104,16 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, return ret; } +static noinline void update_nr_written(struct page *page, + struct writeback_control *wbc, + unsigned long nr_written) +{ + wbc->nr_to_write -= nr_written; + if (wbc->range_cyclic || (wbc->nr_to_write > 0 && + wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) + page->mapping->writeback_index = page->index + nr_written; +} + /* * the writepage semantics are similar to regular writepage. extent * records are inserted to lock ranges in the tree, and as dirty areas @@ -2136,8 +2149,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, u64 delalloc_end; int page_started; int compressed; + int write_flags; unsigned long nr_written = 0; + if (wbc->sync_mode == WB_SYNC_ALL) + write_flags = WRITE_SYNC_PLUG; + else + write_flags = WRITE; + WARN_ON(!PageLocked(page)); pg_offset = i_size & (PAGE_CACHE_SIZE - 1); if (page->index > end_index || @@ -2164,6 +2183,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, delalloc_end = 0; page_started = 0; if (!epd->extent_locked) { + /* + * make sure the wbc mapping index is at least updated + * to this page. + */ + update_nr_written(page, wbc, 0); + while (delalloc_end < page_end) { nr_delalloc = find_lock_delalloc_range(inode, tree, page, @@ -2185,7 +2210,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, */ if (page_started) { ret = 0; - goto update_nr_written; + /* + * we've unlocked the page, so we can't update + * the mapping's writeback index, just update + * nr_to_write. + */ + wbc->nr_to_write -= nr_written; + goto done_unlocked; } } lock_extent(tree, start, page_end, GFP_NOFS); @@ -2198,13 +2229,18 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (ret == -EAGAIN) { unlock_extent(tree, start, page_end, GFP_NOFS); redirty_page_for_writepage(wbc, page); + update_nr_written(page, wbc, nr_written); unlock_page(page); ret = 0; - goto update_nr_written; + goto done_unlocked; } } - nr_written++; + /* + * we don't want to touch the inode after unlocking the page, + * so we update the mapping writeback index now + */ + update_nr_written(page, wbc, nr_written + 1); end = page_end; if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) @@ -2314,9 +2350,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, (unsigned long long)end); } - ret = submit_extent_page(WRITE, tree, page, sector, - iosize, pg_offset, bdev, - &epd->bio, max_nr, + ret = submit_extent_page(write_flags, tree, page, + sector, iosize, pg_offset, + bdev, &epd->bio, max_nr, end_bio_extent_writepage, 0, 0, 0); if (ret) @@ -2336,11 +2372,8 @@ done: unlock_extent(tree, unlock_start, page_end, GFP_NOFS); unlock_page(page); -update_nr_written: - wbc->nr_to_write -= nr_written; - if (wbc->range_cyclic || (wbc->nr_to_write > 0 && - wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) - page->mapping->writeback_index = page->index + nr_written; +done_unlocked: + return 0; } @@ -2460,15 +2493,23 @@ retry: return ret; } -static noinline void flush_write_bio(void *data) +static void flush_epd_write_bio(struct extent_page_data *epd) { - struct extent_page_data *epd = data; if (epd->bio) { - submit_one_bio(WRITE, epd->bio, 0, 0); + if (epd->sync_io) + submit_one_bio(WRITE_SYNC, epd->bio, 0, 0); + else + submit_one_bio(WRITE, epd->bio, 0, 0); epd->bio = NULL; } } +static noinline void flush_write_bio(void *data) +{ + struct extent_page_data *epd = data; + flush_epd_write_bio(epd); +} + int extent_write_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, struct writeback_control *wbc) @@ -2480,23 +2521,22 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, .tree = tree, .get_extent = get_extent, .extent_locked = 0, + .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; struct writeback_control wbc_writepages = { .bdi = wbc->bdi, - .sync_mode = WB_SYNC_NONE, + .sync_mode = wbc->sync_mode, .older_than_this = NULL, .nr_to_write = 64, .range_start = page_offset(page) + PAGE_CACHE_SIZE, .range_end = (loff_t)-1, }; - ret = __extent_writepage(page, wbc, &epd); extent_write_cache_pages(tree, mapping, &wbc_writepages, __extent_writepage, &epd, flush_write_bio); - if (epd.bio) - submit_one_bio(WRITE, epd.bio, 0, 0); + flush_epd_write_bio(&epd); return ret; } @@ -2515,6 +2555,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, .tree = tree, .get_extent = get_extent, .extent_locked = 1, + .sync_io = mode == WB_SYNC_ALL, }; struct writeback_control wbc_writepages = { .bdi = inode->i_mapping->backing_dev_info, @@ -2540,8 +2581,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, start += PAGE_CACHE_SIZE; } - if (epd.bio) - submit_one_bio(WRITE, epd.bio, 0, 0); + flush_epd_write_bio(&epd); return ret; } @@ -2556,13 +2596,13 @@ int extent_writepages(struct extent_io_tree *tree, .tree = tree, .get_extent = get_extent, .extent_locked = 0, + .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; ret = extent_write_cache_pages(tree, mapping, wbc, __extent_writepage, &epd, flush_write_bio); - if (epd.bio) - submit_one_bio(WRITE, epd.bio, 0, 0); + flush_epd_write_bio(&epd); return ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9c9fb46ccd08..482f8db2cfd0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -830,7 +830,7 @@ again: ret = btrfs_del_items(trans, root, path, del_slot, del_nr); BUG_ON(ret); - goto done; + goto release; } else if (split == start) { if (locked_end < extent_end) { ret = try_lock_extent(&BTRFS_I(inode)->io_tree, @@ -926,6 +926,8 @@ again: } done: btrfs_mark_buffer_dirty(leaf); + +release: btrfs_release_path(root, path); if (split_end && split == start) { split = end; @@ -1131,7 +1133,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (will_write) { btrfs_fdatawrite_range(inode->i_mapping, pos, pos + write_bytes - 1, - WB_SYNC_NONE); + WB_SYNC_ALL); } else { balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a0d1dd492a58..65219f6a16a1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4970,10 +4970,10 @@ out_fail: return err; } -static int prealloc_file_range(struct inode *inode, u64 start, u64 end, +static int prealloc_file_range(struct btrfs_trans_handle *trans, + struct inode *inode, u64 start, u64 end, u64 alloc_hint, int mode) { - struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; u64 alloc_size; @@ -4981,10 +4981,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, u64 num_bytes = end - start; int ret = 0; - trans = btrfs_join_transaction(root, 1); - BUG_ON(!trans); - btrfs_set_trans_block_group(trans, inode); - while (num_bytes > 0) { alloc_size = min(num_bytes, root->fs_info->max_extent); ret = btrfs_reserve_extent(trans, root, alloc_size, @@ -5015,7 +5011,6 @@ out: BUG_ON(ret); } - btrfs_end_transaction(trans, root); return ret; } @@ -5029,11 +5024,18 @@ static long btrfs_fallocate(struct inode *inode, int mode, u64 alloc_hint = 0; u64 mask = BTRFS_I(inode)->root->sectorsize - 1; struct extent_map *em; + struct btrfs_trans_handle *trans; int ret; alloc_start = offset & ~mask; alloc_end = (offset + len + mask) & ~mask; + /* + * wait for ordered IO before we have any locks. We'll loop again + * below with the locks held. + */ + btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); + mutex_lock(&inode->i_mutex); if (alloc_start > inode->i_size) { ret = btrfs_cont_expand(inode, alloc_start); @@ -5043,6 +5045,16 @@ static long btrfs_fallocate(struct inode *inode, int mode, while (1) { struct btrfs_ordered_extent *ordered; + + trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); + if (!trans) { + ret = -EIO; + goto out; + } + + /* the extent lock is ordered inside the running + * transaction + */ lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, GFP_NOFS); ordered = btrfs_lookup_first_ordered_extent(inode, @@ -5053,6 +5065,12 @@ static long btrfs_fallocate(struct inode *inode, int mode, btrfs_put_ordered_extent(ordered); unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, GFP_NOFS); + btrfs_end_transaction(trans, BTRFS_I(inode)->root); + + /* + * we can't wait on the range with the transaction + * running or with the extent lock held + */ btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); } else { @@ -5070,7 +5088,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, last_byte = min(extent_map_end(em), alloc_end); last_byte = (last_byte + mask) & ~mask; if (em->block_start == EXTENT_MAP_HOLE) { - ret = prealloc_file_range(inode, cur_offset, + ret = prealloc_file_range(trans, inode, cur_offset, last_byte, alloc_hint, mode); if (ret < 0) { free_extent_map(em); @@ -5089,6 +5107,8 @@ static long btrfs_fallocate(struct inode *inode, int mode, } unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, GFP_NOFS); + + btrfs_end_transaction(trans, BTRFS_I(inode)->root); out: mutex_unlock(&inode->i_mutex); return ret; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7594bec1be10..9f135e878507 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -461,15 +461,9 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); - - if (!vol_args) - return -ENOMEM; - - if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { - ret = -EFAULT; - goto out; - } + vol_args = memdup_user(arg, sizeof(*vol_args)); + if (IS_ERR(vol_args)) + return PTR_ERR(vol_args); vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; namelen = strlen(vol_args->name); @@ -545,7 +539,6 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) out_unlock: mutex_unlock(&root->fs_info->volume_mutex); -out: kfree(vol_args); return ret; } @@ -565,15 +558,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, if (root->fs_info->sb->s_flags & MS_RDONLY) return -EROFS; - vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); - - if (!vol_args) - return -ENOMEM; - - if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { - ret = -EFAULT; - goto out; - } + vol_args = memdup_user(arg, sizeof(*vol_args)); + if (IS_ERR(vol_args)) + return PTR_ERR(vol_args); vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; namelen = strlen(vol_args->name); @@ -675,19 +662,13 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); + vol_args = memdup_user(arg, sizeof(*vol_args)); + if (IS_ERR(vol_args)) + return PTR_ERR(vol_args); - if (!vol_args) - return -ENOMEM; - - if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { - ret = -EFAULT; - goto out; - } vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; ret = btrfs_init_new_device(root, vol_args->name); -out: kfree(vol_args); return ret; } @@ -703,19 +684,13 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) if (root->fs_info->sb->s_flags & MS_RDONLY) return -EROFS; - vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); + vol_args = memdup_user(arg, sizeof(*vol_args)); + if (IS_ERR(vol_args)) + return PTR_ERR(vol_args); - if (!vol_args) - return -ENOMEM; - - if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { - ret = -EFAULT; - goto out; - } vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; ret = btrfs_rm_device(root, vol_args->name); -out: kfree(vol_args); return ret; } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 53c87b197d70..d6f0806c682f 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -489,7 +489,7 @@ again: /* start IO across the range first to instantiate any delalloc * extents */ - btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); + btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); /* The compression code will leave pages locked but return from * writepage without setting the page writeback. Starting again diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9744af9d71e9..a7acfe639a44 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -635,14 +635,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - vol = kmalloc(sizeof(*vol), GFP_KERNEL); - if (!vol) - return -ENOMEM; - - if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) { - ret = -EFAULT; - goto out; - } + vol = memdup_user((void __user *)arg, sizeof(*vol)); + if (IS_ERR(vol)) + return PTR_ERR(vol); switch (cmd) { case BTRFS_IOC_SCAN_DEV: @@ -650,7 +645,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, &btrfs_fs_type, &fs_devices); break; } -out: + kfree(vol); return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e0913e469728..e53835b88594 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) return NULL; } +static void requeue_list(struct btrfs_pending_bios *pending_bios, + struct bio *head, struct bio *tail) +{ + + struct bio *old_head; + + old_head = pending_bios->head; + pending_bios->head = head; + if (pending_bios->tail) + tail->bi_next = old_head; + else + pending_bios->tail = tail; +} + /* * we try to collect pending bios for a device so we don't get a large * number of procs sending bios down to the same device. This greatly @@ -141,10 +155,12 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) struct bio *pending; struct backing_dev_info *bdi; struct btrfs_fs_info *fs_info; + struct btrfs_pending_bios *pending_bios; struct bio *tail; struct bio *cur; int again = 0; - unsigned long num_run = 0; + unsigned long num_run; + unsigned long num_sync_run; unsigned long limit; unsigned long last_waited = 0; @@ -153,20 +169,30 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) limit = btrfs_async_submit_limit(fs_info); limit = limit * 2 / 3; + /* we want to make sure that every time we switch from the sync + * list to the normal list, we unplug + */ + num_sync_run = 0; + loop: spin_lock(&device->io_lock); + num_run = 0; loop_lock: + /* take all the bios off the list at once and process them * later on (without the lock held). But, remember the * tail and other pointers so the bios can be properly reinserted * into the list if we hit congestion */ - pending = device->pending_bios; - tail = device->pending_bio_tail; + if (device->pending_sync_bios.head) + pending_bios = &device->pending_sync_bios; + else + pending_bios = &device->pending_bios; + + pending = pending_bios->head; + tail = pending_bios->tail; WARN_ON(pending && !tail); - device->pending_bios = NULL; - device->pending_bio_tail = NULL; /* * if pending was null this time around, no bios need processing @@ -176,16 +202,41 @@ loop_lock: * device->running_pending is used to synchronize with the * schedule_bio code. */ - if (pending) { - again = 1; - device->running_pending = 1; - } else { + if (device->pending_sync_bios.head == NULL && + device->pending_bios.head == NULL) { again = 0; device->running_pending = 0; + } else { + again = 1; + device->running_pending = 1; } + + pending_bios->head = NULL; + pending_bios->tail = NULL; + spin_unlock(&device->io_lock); + /* + * if we're doing the regular priority list, make sure we unplug + * for any high prio bios we've sent down + */ + if (pending_bios == &device->pending_bios && num_sync_run > 0) { + num_sync_run = 0; + blk_run_backing_dev(bdi, NULL); + } + while (pending) { + + rmb(); + if (pending_bios != &device->pending_sync_bios && + device->pending_sync_bios.head && + num_run > 16) { + cond_resched(); + spin_lock(&device->io_lock); + requeue_list(pending_bios, pending, tail); + goto loop_lock; + } + cur = pending; pending = pending->bi_next; cur->bi_next = NULL; @@ -196,10 +247,18 @@ loop_lock: wake_up(&fs_info->async_submit_wait); BUG_ON(atomic_read(&cur->bi_cnt) == 0); - bio_get(cur); submit_bio(cur->bi_rw, cur); - bio_put(cur); num_run++; + if (bio_sync(cur)) + num_sync_run++; + + if (need_resched()) { + if (num_sync_run) { + blk_run_backing_dev(bdi, NULL); + num_sync_run = 0; + } + cond_resched(); + } /* * we made progress, there is more work to do and the bdi @@ -208,7 +267,6 @@ loop_lock: */ if (pending && bdi_write_congested(bdi) && num_run > 16 && fs_info->fs_devices->open_devices > 1) { - struct bio *old_head; struct io_context *ioc; ioc = current->io_context; @@ -233,17 +291,17 @@ loop_lock: * against it before looping */ last_waited = ioc->last_waited; + if (need_resched()) { + if (num_sync_run) { + blk_run_backing_dev(bdi, NULL); + num_sync_run = 0; + } + cond_resched(); + } continue; } spin_lock(&device->io_lock); - - old_head = device->pending_bios; - device->pending_bios = pending; - if (device->pending_bio_tail) - tail->bi_next = old_head; - else - device->pending_bio_tail = tail; - + requeue_list(pending_bios, pending, tail); device->running_pending = 1; spin_unlock(&device->io_lock); @@ -251,11 +309,18 @@ loop_lock: goto done; } } + + if (num_sync_run) { + num_sync_run = 0; + blk_run_backing_dev(bdi, NULL); + } + + cond_resched(); if (again) goto loop; spin_lock(&device->io_lock); - if (device->pending_bios) + if (device->pending_bios.head || device->pending_sync_bios.head) goto loop_lock; spin_unlock(&device->io_lock); @@ -2497,7 +2562,7 @@ again: max_errors = 1; } } - if (multi_ret && rw == WRITE && + if (multi_ret && (rw & (1 << BIO_RW)) && stripes_allocated < stripes_required) { stripes_allocated = map->num_stripes; free_extent_map(em); @@ -2762,6 +2827,7 @@ static noinline int schedule_bio(struct btrfs_root *root, int rw, struct bio *bio) { int should_queue = 1; + struct btrfs_pending_bios *pending_bios; /* don't bother with additional async steps for reads, right now */ if (!(rw & (1 << BIO_RW))) { @@ -2783,13 +2849,17 @@ static noinline int schedule_bio(struct btrfs_root *root, bio->bi_rw |= rw; spin_lock(&device->io_lock); + if (bio_sync(bio)) + pending_bios = &device->pending_sync_bios; + else + pending_bios = &device->pending_bios; - if (device->pending_bio_tail) - device->pending_bio_tail->bi_next = bio; + if (pending_bios->tail) + pending_bios->tail->bi_next = bio; - device->pending_bio_tail = bio; - if (!device->pending_bios) - device->pending_bios = bio; + pending_bios->tail = bio; + if (!pending_bios->head) + pending_bios->head = bio; if (device->running_pending) should_queue = 0; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2185de72ff7d..5836327ba5dd 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -23,13 +23,22 @@ #include "async-thread.h" struct buffer_head; +struct btrfs_pending_bios { + struct bio *head; + struct bio *tail; +}; + struct btrfs_device { struct list_head dev_list; struct list_head dev_alloc_list; struct btrfs_fs_devices *fs_devices; struct btrfs_root *dev_root; - struct bio *pending_bios; - struct bio *pending_bio_tail; + + /* regular prio bios */ + struct btrfs_pending_bios pending_bios; + /* WRITE_SYNC bios */ + struct btrfs_pending_bios pending_sync_bios; + int running_pending; u64 generation; diff --git a/fs/compat.c b/fs/compat.c index 3f84d5f15889..379a399bf5c3 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -181,22 +181,24 @@ asmlinkage long compat_sys_newstat(char __user * filename, struct compat_stat __user *statbuf) { struct kstat stat; - int error = vfs_stat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_compat_stat(&stat, statbuf); - return error; + error = vfs_stat(filename, &stat); + if (error) + return error; + return cp_compat_stat(&stat, statbuf); } asmlinkage long compat_sys_newlstat(char __user * filename, struct compat_stat __user *statbuf) { struct kstat stat; - int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_compat_stat(&stat, statbuf); - return error; + error = vfs_lstat(filename, &stat); + if (error) + return error; + return cp_compat_stat(&stat, statbuf); } #ifndef __ARCH_WANT_STAT64 @@ -204,21 +206,12 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename, struct compat_stat __user *statbuf, int flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_compat_stat(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_compat_stat(&stat, statbuf); } #endif diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 3e87ce443ea2..b83f6bcfa51a 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -58,7 +58,6 @@ #include <linux/i2c.h> #include <linux/i2c-dev.h> #include <linux/atalk.h> -#include <linux/loop.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci.h> @@ -68,6 +67,7 @@ #include <linux/gigaset_dev.h> #ifdef CONFIG_BLOCK +#include <linux/loop.h> #include <scsi/scsi.h> #include <scsi/scsi_ioctl.h> #include <scsi/sg.h> @@ -2660,6 +2660,8 @@ HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl) HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl) /* block stuff */ #ifdef CONFIG_BLOCK +/* loop */ +IGNORE_IOCTL(LOOP_CLR_FD) /* Raw devices */ HANDLE_IOCTL(RAW_SETBIND, raw_ioctl) HANDLE_IOCTL(RAW_GETBIND, raw_ioctl) @@ -2728,9 +2730,6 @@ HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans) IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32) IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32) -/* loop */ -IGNORE_IOCTL(LOOP_CLR_FD) - #ifdef CONFIG_SPARC /* Sparc framebuffers, handled in sbusfb_compat_ioctl() */ IGNORE_IOCTL(FBIOGTYPE) diff --git a/fs/dcache.c b/fs/dcache.c index 761d30be2683..1fcffebfb44f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2149,7 +2149,6 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) int result; unsigned long seq; - /* FIXME: This is old behavior, needed? Please check callers. */ if (new_dentry == old_dentry) return 1; diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index a67fea655f49..dda3c58eefc0 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -418,18 +418,13 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, if (count == 0) goto out; - data = kmalloc(count, GFP_KERNEL); - if (!data) { - printk(KERN_ERR "%s: Out of memory whilst attempting to " - "kmalloc([%zd], GFP_KERNEL)\n", __func__, count); + + data = memdup_user(buf, count); + if (IS_ERR(data)) { + printk(KERN_ERR "%s: memdup_user returned error [%ld]\n", + __func__, PTR_ERR(data)); goto out; } - rc = copy_from_user(data, buf, count); - if (rc) { - printk(KERN_ERR "%s: copy_from_user returned error [%d]\n", - __func__, rc); - goto out_free; - } sz = count; i = 0; switch (data[i++]) { diff --git a/fs/filesystems.c b/fs/filesystems.c index 1aa70260e6d1..a24c58e181db 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -199,7 +199,7 @@ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2) return retval; } -int get_filesystem_list(char * buf) +int __init get_filesystem_list(char *buf) { int len = 0; struct file_system_type * tmp; diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index bf23a62aa925..70f87f43afa2 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -156,6 +156,12 @@ static void inode_go_sync(struct gfs2_glock *gl) error = filemap_fdatawait(metamapping); mapping_set_error(metamapping, error); gfs2_ail_empty_gl(gl); + /* + * Writeback of the data mapping may cause the dirty flag to be set + * so we have to clear it again here. + */ + smp_mb__before_clear_bit(); + clear_bit(GLF_DIRTY, &gl->gl_flags); } /** diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 101caf3ee861..5d82e91887e3 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -413,7 +413,9 @@ out_unlock: gfs2_glock_dq(&gh); out: gfs2_holder_uninit(&gh); - if (ret) + if (ret == -ENOMEM) + ret = VM_FAULT_OOM; + else if (ret) ret = VM_FAULT_SIGBUS; return ret; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 23a3c76711e0..153d9681192b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -26,7 +26,6 @@ #include <linux/pagevec.h> #include <linux/parser.h> #include <linux/mman.h> -#include <linux/quotaops.h> #include <linux/slab.h> #include <linux/dnotify.h> #include <linux/statfs.h> @@ -842,7 +841,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) bad_val: printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n", args[0].from, p); - return 1; + return -EINVAL; } static int diff --git a/fs/namei.c b/fs/namei.c index b8433ebfae05..78f253cd2d4f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1248,6 +1248,8 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) int err; struct qstr this; + WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); + err = __lookup_one_len(name, &this, base, len); if (err) return ERR_PTR(err); diff --git a/fs/namespace.c b/fs/namespace.c index d9138f81ec10..41196209a906 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1377,7 +1377,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, if (parent_path) { detach_mnt(source_mnt, parent_path); attach_mnt(source_mnt, path); - touch_mnt_namespace(current->nsproxy->mnt_ns); + touch_mnt_namespace(parent_path->mnt->mnt_ns); } else { mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); commit_tree(source_mnt); diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index f54360f50a9c..fa038df63ac8 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -660,13 +660,10 @@ outrel: if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN) return -ENOMEM; if (user.object_name_len) { - newname = kmalloc(user.object_name_len, GFP_USER); - if (!newname) - return -ENOMEM; - if (copy_from_user(newname, user.object_name, user.object_name_len)) { - kfree(newname); - return -EFAULT; - } + newname = memdup_user(user.object_name, + user.object_name_len); + if (IS_ERR(newname)) + return PTR_ERR(newname); } else { newname = NULL; } @@ -760,13 +757,9 @@ outrel: if (user.len > NCP_PRIVATE_DATA_MAX_LEN) return -ENOMEM; if (user.len) { - new = kmalloc(user.len, GFP_USER); - if (!new) - return -ENOMEM; - if (copy_from_user(new, user.data, user.len)) { - kfree(new); - return -EFAULT; - } + new = memdup_user(user.data, user.len); + if (IS_ERR(new)) + return PTR_ERR(new); } else { new = NULL; } diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index e6a1932c7110..35869a4921f1 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -713,7 +713,8 @@ nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p, if (args->npages != 0) xdr_encode_pages(buf, args->pages, 0, args->len); else - req->rq_slen += args->len; + req->rq_slen = xdr_adjust_iovec(req->rq_svec, + p + XDR_QUADLEN(args->len)); err = nfsacl_encode(buf, base, args->inode, (args->mask & NFS_ACL) ? diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 3444c0052a87..5275097a7565 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -229,21 +229,23 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) goto out; status = vfs_readdir(filp, nfsd4_build_namelist, &names); fput(filp); + mutex_lock(&dir->d_inode->i_mutex); while (!list_empty(&names)) { entry = list_entry(names.next, struct name_list, list); dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); - goto out; + break; } status = f(dir, dentry); dput(dentry); if (status) - goto out; + break; list_del(&entry->list); kfree(entry); } + mutex_unlock(&dir->d_inode->i_mutex); out: while (!list_empty(&names)) { entry = list_entry(names.next, struct name_list, list); @@ -255,36 +257,6 @@ out: } static int -nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry) -{ - int status; - - if (!S_ISREG(dir->d_inode->i_mode)) { - printk("nfsd4: non-file found in client recovery directory\n"); - return -EINVAL; - } - mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - status = vfs_unlink(dir->d_inode, dentry); - mutex_unlock(&dir->d_inode->i_mutex); - return status; -} - -static int -nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry) -{ - int status; - - /* For now this directory should already be empty, but we empty it of - * any regular files anyway, just in case the directory was created by - * a kernel from the future.... */ - nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); - mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - status = vfs_rmdir(dir->d_inode, dentry); - mutex_unlock(&dir->d_inode->i_mutex); - return status; -} - -static int nfsd4_unlink_clid_dir(char *name, int namlen) { struct dentry *dentry; @@ -294,18 +266,18 @@ nfsd4_unlink_clid_dir(char *name, int namlen) mutex_lock(&rec_dir.dentry->d_inode->i_mutex); dentry = lookup_one_len(name, rec_dir.dentry, namlen); - mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); - return status; + goto out_unlock; } status = -ENOENT; if (!dentry->d_inode) goto out; - - status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry); + status = vfs_rmdir(rec_dir.dentry->d_inode, dentry); out: dput(dentry); +out_unlock: + mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); return status; } @@ -348,7 +320,7 @@ purge_old(struct dentry *parent, struct dentry *child) if (nfs4_has_reclaimed_state(child->d_name.name, false)) return 0; - status = nfsd4_clear_clid_dir(parent, child); + status = vfs_rmdir(parent->d_inode, child); if (status) printk("failed to remove client recovery directory %s\n", child->d_name.name); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ab93fcfef254..6c68ffd6b4bb 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -116,10 +116,15 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, } if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { /* successfully crossed mount point */ - exp_put(exp); - *expp = exp2; + /* + * This is subtle: dentry is *not* under mnt at this point. + * The only reason we are safe is that original mnt is pinned + * down by exp, so we should dput before putting exp. + */ dput(dentry); *dpp = mounts; + exp_put(exp); + *expp = exp2; } else { exp_put(exp2); dput(mounts); @@ -1885,8 +1890,8 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, return 0; } -static int nfsd_buffered_readdir(struct file *file, filldir_t func, - struct readdir_cd *cdp, loff_t *offsetp) +static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, + struct readdir_cd *cdp, loff_t *offsetp) { struct readdir_data buf; struct buffered_dirent *de; @@ -1896,11 +1901,12 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func, buf.dirent = (void *)__get_free_page(GFP_KERNEL); if (!buf.dirent) - return -ENOMEM; + return nfserrno(-ENOMEM); offset = *offsetp; while (1) { + struct inode *dir_inode = file->f_path.dentry->d_inode; unsigned int reclen; cdp->err = nfserr_eof; /* will be cleared on successful read */ @@ -1919,26 +1925,38 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func, if (!size) break; + /* + * Various filldir functions may end up calling back into + * lookup_one_len() and the file system's ->lookup() method. + * These expect i_mutex to be held, as it would within readdir. + */ + host_err = mutex_lock_killable(&dir_inode->i_mutex); + if (host_err) + break; + de = (struct buffered_dirent *)buf.dirent; while (size > 0) { offset = de->offset; if (func(cdp, de->name, de->namlen, de->offset, de->ino, de->d_type)) - goto done; + break; if (cdp->err != nfs_ok) - goto done; + break; reclen = ALIGN(sizeof(*de) + de->namlen, sizeof(u64)); size -= reclen; de = (struct buffered_dirent *)((char *)de + reclen); } + mutex_unlock(&dir_inode->i_mutex); + if (size > 0) /* We bailed out early */ + break; + offset = vfs_llseek(file, 0, SEEK_CUR); } - done: free_page((unsigned long)(buf.dirent)); if (host_err) diff --git a/fs/stat.c b/fs/stat.c index 2db740a0cfb5..075694e31d8b 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -55,59 +55,54 @@ int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) EXPORT_SYMBOL(vfs_getattr); -int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat) +int vfs_fstat(unsigned int fd, struct kstat *stat) { - struct path path; - int error; + struct file *f = fget(fd); + int error = -EBADF; - error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path); - if (!error) { - error = vfs_getattr(path.mnt, path.dentry, stat); - path_put(&path); + if (f) { + error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat); + fput(f); } return error; } +EXPORT_SYMBOL(vfs_fstat); -int vfs_stat(char __user *name, struct kstat *stat) +int vfs_fstatat(int dfd, char __user *filename, struct kstat *stat, int flag) { - return vfs_stat_fd(AT_FDCWD, name, stat); -} + struct path path; + int error = -EINVAL; + int lookup_flags = 0; -EXPORT_SYMBOL(vfs_stat); + if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) + goto out; -int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat) -{ - struct path path; - int error; + if (!(flag & AT_SYMLINK_NOFOLLOW)) + lookup_flags |= LOOKUP_FOLLOW; - error = user_path_at(dfd, name, 0, &path); - if (!error) { - error = vfs_getattr(path.mnt, path.dentry, stat); - path_put(&path); - } + error = user_path_at(dfd, filename, lookup_flags, &path); + if (error) + goto out; + + error = vfs_getattr(path.mnt, path.dentry, stat); + path_put(&path); +out: return error; } +EXPORT_SYMBOL(vfs_fstatat); -int vfs_lstat(char __user *name, struct kstat *stat) +int vfs_stat(char __user *name, struct kstat *stat) { - return vfs_lstat_fd(AT_FDCWD, name, stat); + return vfs_fstatat(AT_FDCWD, name, stat, 0); } +EXPORT_SYMBOL(vfs_stat); -EXPORT_SYMBOL(vfs_lstat); - -int vfs_fstat(unsigned int fd, struct kstat *stat) +int vfs_lstat(char __user *name, struct kstat *stat) { - struct file *f = fget(fd); - int error = -EBADF; - - if (f) { - error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat); - fput(f); - } - return error; + return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW); } +EXPORT_SYMBOL(vfs_lstat); -EXPORT_SYMBOL(vfs_fstat); #ifdef __ARCH_WANT_OLD_STAT @@ -155,23 +150,25 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; - int error = vfs_stat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_old_stat(&stat, statbuf); + error = vfs_stat(filename, &stat); + if (error) + return error; - return error; + return cp_old_stat(&stat, statbuf); } SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; - int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_old_stat(&stat, statbuf); + error = vfs_lstat(filename, &stat); + if (error) + return error; - return error; + return cp_old_stat(&stat, statbuf); } SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf) @@ -240,23 +237,23 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf) { struct kstat stat; - int error = vfs_stat_fd(AT_FDCWD, filename, &stat); - - if (!error) - error = cp_new_stat(&stat, statbuf); + int error = vfs_stat(filename, &stat); - return error; + if (error) + return error; + return cp_new_stat(&stat, statbuf); } SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf) { struct kstat stat; - int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_new_stat(&stat, statbuf); + error = vfs_lstat(filename, &stat); + if (error) + return error; - return error; + return cp_new_stat(&stat, statbuf); } #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) @@ -264,21 +261,12 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, char __user *, filename, struct stat __user *, statbuf, int, flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_new_stat(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_new_stat(&stat, statbuf); } #endif @@ -404,21 +392,12 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename, struct stat64 __user *, statbuf, int, flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_new_stat64(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_new_stat64(&stat, statbuf); } #endif /* __ARCH_WANT_STAT64 */ diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 93e0c0281d45..9345806c8853 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -157,14 +157,9 @@ static ssize_t write(struct file *file, const char __user *userbuf, count = size - offs; } - temp = kmalloc(count, GFP_KERNEL); - if (!temp) - return -ENOMEM; - - if (copy_from_user(temp, userbuf, count)) { - count = -EFAULT; - goto out_free; - } + temp = memdup_user(userbuf, count); + if (IS_ERR(temp)) + return PTR_ERR(temp); mutex_lock(&bb->mutex); @@ -176,8 +171,6 @@ static ssize_t write(struct file *file, const char __user *userbuf, if (count > 0) *off = offs + count; -out_free: - kfree(temp); return count; } diff --git a/fs/xattr.c b/fs/xattr.c index 197c4fcac032..d51b8f9db921 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -237,13 +237,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value, if (size) { if (size > XATTR_SIZE_MAX) return -E2BIG; - kvalue = kmalloc(size, GFP_KERNEL); - if (!kvalue) - return -ENOMEM; - if (copy_from_user(kvalue, value, size)) { - kfree(kvalue); - return -EFAULT; - } + kvalue = memdup_user(value, size); + if (IS_ERR(kvalue)) + return PTR_ERR(kvalue); } error = vfs_setxattr(d, kname, kvalue, size, flags); diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index d0b499418a7d..34eaab608e6e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -489,17 +489,12 @@ xfs_attrmulti_attr_set( if (len > XATTR_SIZE_MAX) return EINVAL; - kbuf = kmalloc(len, GFP_KERNEL); - if (!kbuf) - return ENOMEM; - - if (copy_from_user(kbuf, ubuf, len)) - goto out_kfree; + kbuf = memdup_user(ubuf, len); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); - out_kfree: - kfree(kbuf); return error; } @@ -540,20 +535,16 @@ xfs_attrmulti_by_handle( if (!size || size > 16 * PAGE_SIZE) goto out_dput; - error = ENOMEM; - ops = kmalloc(size, GFP_KERNEL); - if (!ops) + ops = memdup_user(am_hreq.ops, size); + if (IS_ERR(ops)) { + error = PTR_ERR(ops); goto out_dput; - - error = EFAULT; - if (copy_from_user(ops, am_hreq.ops, size)) - goto out_kfree_ops; + } attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); if (!attr_name) goto out_kfree_ops; - error = 0; for (i = 0; i < am_hreq.opcount; i++) { ops[i].am_error = strncpy_from_user(attr_name, diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index c70c4e3db790..0882d166239a 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -427,20 +427,16 @@ xfs_compat_attrmulti_by_handle( if (!size || size > 16 * PAGE_SIZE) goto out_dput; - error = ENOMEM; - ops = kmalloc(size, GFP_KERNEL); - if (!ops) + ops = memdup_user(compat_ptr(am_hreq.ops), size); + if (IS_ERR(ops)) { + error = PTR_ERR(ops); goto out_dput; - - error = EFAULT; - if (copy_from_user(ops, compat_ptr(am_hreq.ops), size)) - goto out_kfree_ops; + } attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); if (!attr_name) goto out_kfree_ops; - error = 0; for (i = 0; i < am_hreq.opcount; i++) { ops[i].am_error = strncpy_from_user(attr_name, |