diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/block_dev.c | 7 | ||||
-rw-r--r-- | fs/buffer.c | 28 | ||||
-rw-r--r-- | fs/internal.h | 5 | ||||
-rw-r--r-- | fs/mpage.c | 2 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify_user.c | 2 | ||||
-rw-r--r-- | fs/notify/fsnotify.h | 3 | ||||
-rw-r--r-- | fs/notify/group.c | 2 | ||||
-rw-r--r-- | fs/notify/inotify/inotify_fsnotify.c | 6 | ||||
-rw-r--r-- | fs/ntfs/debug.c | 2 | ||||
-rw-r--r-- | fs/ntfs/file.c | 5 | ||||
-rw-r--r-- | fs/ntfs/super.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/aops.c | 15 | ||||
-rw-r--r-- | fs/ocfs2/cluster/heartbeat.c | 19 | ||||
-rw-r--r-- | fs/ocfs2/cluster/heartbeat.h | 1 | ||||
-rw-r--r-- | fs/ocfs2/cluster/netdebug.c | 78 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 43 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmdebug.c | 39 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmdomain.c | 44 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 3 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 7 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.c | 23 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 47 | ||||
-rw-r--r-- | fs/ocfs2/inode.h | 2 | ||||
-rw-r--r-- | fs/ocfs2/move_extents.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/stack_user.c | 2 | ||||
-rw-r--r-- | fs/proc/base.c | 36 | ||||
-rw-r--r-- | fs/proc/internal.h | 5 | ||||
-rw-r--r-- | fs/proc/kcore.c | 4 | ||||
-rw-r--r-- | fs/proc/page.c | 3 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 332 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 88 |
31 files changed, 453 insertions, 404 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index 6d7274619bf9..e2f3ad0879ce 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -304,6 +304,12 @@ static int blkdev_readpage(struct file * file, struct page * page) return block_read_full_page(page, blkdev_get_block); } +static int blkdev_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block); +} + static int blkdev_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -1622,6 +1628,7 @@ static int blkdev_releasepage(struct page *page, gfp_t wait) static const struct address_space_operations def_blk_aops = { .readpage = blkdev_readpage, + .readpages = blkdev_readpages, .writepage = blkdev_writepage, .write_begin = blkdev_write_begin, .write_end = blkdev_write_end, diff --git a/fs/buffer.c b/fs/buffer.c index 3588a80854b2..44c14a87750e 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1253,7 +1253,7 @@ static struct buffer_head *__bread_slow(struct buffer_head *bh) * a local interrupt disable for that. */ -#define BH_LRU_SIZE 8 +#define BH_LRU_SIZE 16 struct bh_lru { struct buffer_head *bhs[BH_LRU_SIZE]; @@ -2956,7 +2956,7 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) /* * This allows us to do IO even on the odd last sectors - * of a device, even if the bh block size is some multiple + * of a device, even if the block size is some multiple * of the physical sector size. * * We'll just truncate the bio to the size of the device, @@ -2966,10 +2966,11 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) * errors, this only handles the "we need to be able to * do IO at the final sector" case. */ -static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh) +void guard_bio_eod(int rw, struct bio *bio) { sector_t maxsector; - unsigned bytes; + struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1]; + unsigned truncated_bytes; maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; if (!maxsector) @@ -2984,23 +2985,20 @@ static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh) return; maxsector -= bio->bi_iter.bi_sector; - bytes = bio->bi_iter.bi_size; - if (likely((bytes >> 9) <= maxsector)) + if (likely((bio->bi_iter.bi_size >> 9) <= maxsector)) return; - /* Uhhuh. We've got a bh that straddles the device size! */ - bytes = maxsector << 9; + /* Uhhuh. We've got a bio that straddles the device size! */ + truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9); /* Truncate the bio.. */ - bio->bi_iter.bi_size = bytes; - bio->bi_io_vec[0].bv_len = bytes; + bio->bi_iter.bi_size -= truncated_bytes; + bvec->bv_len -= truncated_bytes; /* ..and clear the end of the buffer for reads */ if ((rw & RW_MASK) == READ) { - void *kaddr = kmap_atomic(bh->b_page); - memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes); - kunmap_atomic(kaddr); - flush_dcache_page(bh->b_page); + zero_user(bvec->bv_page, bvec->bv_offset + bvec->bv_len, + truncated_bytes); } } @@ -3041,7 +3039,7 @@ int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags) bio->bi_flags |= bio_flags; /* Take care of bh's that straddle the end of the device */ - guard_bh_eod(rw, bio, bh); + guard_bio_eod(rw, bio); if (buffer_meta(bh)) rw |= REQ_META; diff --git a/fs/internal.h b/fs/internal.h index e325b4f9c799..b2623200107b 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -35,6 +35,11 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait) #endif /* + * buffer.c + */ +extern void guard_bio_eod(int rw, struct bio *bio); + +/* * char_dev.c */ extern void __init chrdev_init(void); diff --git a/fs/mpage.c b/fs/mpage.c index 5f9ed622274f..3e79220babac 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -28,6 +28,7 @@ #include <linux/backing-dev.h> #include <linux/pagevec.h> #include <linux/cleancache.h> +#include "internal.h" /* * I/O completion handler for multipage BIOs. @@ -57,6 +58,7 @@ static void mpage_end_io(struct bio *bio, int err) static struct bio *mpage_bio_submit(int rw, struct bio *bio) { bio->bi_end_io = mpage_end_io; + guard_bio_eod(rw, bio); submit_bio(rw, bio); return NULL; } diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index b13992a41bd9..c991616acca9 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -78,7 +78,7 @@ static int create_fd(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - client_fd = get_unused_fd(); + client_fd = get_unused_fd_flags(group->fanotify_data.f_flags); if (client_fd < 0) return client_fd; diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 85e7d2b431d9..9c0898c4cfe1 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -23,9 +23,6 @@ extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct vfsmount *mnt, int allow_dups); -/* final kfree of a group */ -extern void fsnotify_final_destroy_group(struct fsnotify_group *group); - /* vfsmount specific destruction of a mark */ extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark); /* inode specific destruction of a mark */ diff --git a/fs/notify/group.c b/fs/notify/group.c index ad1995980456..d16b62cb2854 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -31,7 +31,7 @@ /* * Final freeing of a group */ -void fsnotify_final_destroy_group(struct fsnotify_group *group) +static void fsnotify_final_destroy_group(struct fsnotify_group *group) { if (group->ops->free_group_priv) group->ops->free_group_priv(group); diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 0f88bc0b4e6c..7d888d77d59a 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -165,8 +165,10 @@ static void inotify_free_group_priv(struct fsnotify_group *group) /* ideally the idr is empty and we won't hit the BUG in the callback */ idr_for_each(&group->inotify_data.idr, idr_callback, group); idr_destroy(&group->inotify_data.idr); - atomic_dec(&group->inotify_data.user->inotify_devs); - free_uid(group->inotify_data.user); + if (group->inotify_data.user) { + atomic_dec(&group->inotify_data.user->inotify_devs); + free_uid(group->inotify_data.user); + } } static void inotify_free_event(struct fsnotify_event *fsn_event) diff --git a/fs/ntfs/debug.c b/fs/ntfs/debug.c index dd6103cc93c1..825a54e8f490 100644 --- a/fs/ntfs/debug.c +++ b/fs/ntfs/debug.c @@ -112,7 +112,7 @@ void __ntfs_error(const char *function, const struct super_block *sb, /* If 1, output debug messages, and if 0, don't. */ int debug_msgs = 0; -void __ntfs_debug (const char *file, int line, const char *function, +void __ntfs_debug(const char *file, int line, const char *function, const char *fmt, ...) { struct va_format vaf; diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index f5ec1ce7a532..643faa44f22b 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -1,7 +1,7 @@ /* * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc. + * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -410,7 +410,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping, BUG_ON(!nr_pages); err = nr = 0; do { - pages[nr] = find_lock_page(mapping, index); + pages[nr] = find_get_page_flags(mapping, index, FGP_LOCK | + FGP_ACCESSED); if (!pages[nr]) { if (!*cached_page) { *cached_page = page_cache_alloc(mapping); diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 6c3296e546c3..9e1e112074fb 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3208,7 +3208,7 @@ static void __exit exit_ntfs_fs(void) } MODULE_AUTHOR("Anton Altaparmakov <anton@tuxera.com>"); -MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc."); +MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc."); MODULE_VERSION(NTFS_VERSION); MODULE_LICENSE("GPL"); #ifdef DEBUG diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 4a231a166cf8..1ef547e49373 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1481,8 +1481,16 @@ static int ocfs2_write_begin_inline(struct address_space *mapping, handle_t *handle; struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto out; + } + page = find_or_create_page(mapping, 0, GFP_NOFS); if (!page) { + ocfs2_commit_trans(osb, handle); ret = -ENOMEM; mlog_errno(ret); goto out; @@ -1494,13 +1502,6 @@ static int ocfs2_write_begin_inline(struct address_space *mapping, wc->w_pages[0] = wc->w_target_page = page; wc->w_num_pages = 1; - handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out; - } - ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 73039295d0d1..d13385448168 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -2572,6 +2572,25 @@ int o2hb_check_node_heartbeating(u8 node_num) } EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating); +int o2hb_check_node_heartbeating_no_sem(u8 node_num) +{ + unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; + unsigned long flags; + + spin_lock_irqsave(&o2hb_live_lock, flags); + o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map)); + spin_unlock_irqrestore(&o2hb_live_lock, flags); + if (!test_bit(node_num, testing_map)) { + mlog(ML_HEARTBEAT, + "node (%u) does not have heartbeating enabled.\n", + node_num); + return 0; + } + + return 1; +} +EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating_no_sem); + int o2hb_check_node_heartbeating_from_callback(u8 node_num) { unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index 00ad8e8fea51..3ef5137dc362 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h @@ -80,6 +80,7 @@ void o2hb_fill_node_map(unsigned long *map, void o2hb_exit(void); int o2hb_init(void); int o2hb_check_node_heartbeating(u8 node_num); +int o2hb_check_node_heartbeating_no_sem(u8 node_num); int o2hb_check_node_heartbeating_from_callback(u8 node_num); int o2hb_check_local_node_heartbeating(void); void o2hb_stop_all_regions(void); diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index 73ba81928bce..27d1242c8383 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c @@ -185,29 +185,13 @@ static const struct seq_operations nst_seq_ops = { static int nst_fop_open(struct inode *inode, struct file *file) { struct o2net_send_tracking *dummy_nst; - struct seq_file *seq; - int ret; - dummy_nst = kmalloc(sizeof(struct o2net_send_tracking), GFP_KERNEL); - if (dummy_nst == NULL) { - ret = -ENOMEM; - goto out; - } - dummy_nst->st_task = NULL; - - ret = seq_open(file, &nst_seq_ops); - if (ret) - goto out; - - seq = file->private_data; - seq->private = dummy_nst; + dummy_nst = __seq_open_private(file, &nst_seq_ops, sizeof(*dummy_nst)); + if (!dummy_nst) + return -ENOMEM; o2net_debug_add_nst(dummy_nst); - dummy_nst = NULL; - -out: - kfree(dummy_nst); - return ret; + return 0; } static int nst_fop_release(struct inode *inode, struct file *file) @@ -412,33 +396,27 @@ static const struct seq_operations sc_seq_ops = { .show = sc_seq_show, }; -static int sc_common_open(struct file *file, struct o2net_sock_debug *sd) +static int sc_common_open(struct file *file, int ctxt) { + struct o2net_sock_debug *sd; struct o2net_sock_container *dummy_sc; - struct seq_file *seq; - int ret; - dummy_sc = kmalloc(sizeof(struct o2net_sock_container), GFP_KERNEL); - if (dummy_sc == NULL) { - ret = -ENOMEM; - goto out; - } - dummy_sc->sc_page = NULL; + dummy_sc = kzalloc(sizeof(*dummy_sc), GFP_KERNEL); + if (!dummy_sc) + return -ENOMEM; - ret = seq_open(file, &sc_seq_ops); - if (ret) - goto out; + sd = __seq_open_private(file, &sc_seq_ops, sizeof(*sd)); + if (!sd) { + kfree(dummy_sc); + return -ENOMEM; + } - seq = file->private_data; - seq->private = sd; + sd->dbg_ctxt = ctxt; sd->dbg_sock = dummy_sc; - o2net_debug_add_sc(dummy_sc); - dummy_sc = NULL; + o2net_debug_add_sc(dummy_sc); -out: - kfree(dummy_sc); - return ret; + return 0; } static int sc_fop_release(struct inode *inode, struct file *file) @@ -453,16 +431,7 @@ static int sc_fop_release(struct inode *inode, struct file *file) static int stats_fop_open(struct inode *inode, struct file *file) { - struct o2net_sock_debug *sd; - - sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL); - if (sd == NULL) - return -ENOMEM; - - sd->dbg_ctxt = SHOW_SOCK_STATS; - sd->dbg_sock = NULL; - - return sc_common_open(file, sd); + return sc_common_open(file, SHOW_SOCK_STATS); } static const struct file_operations stats_seq_fops = { @@ -474,16 +443,7 @@ static const struct file_operations stats_seq_fops = { static int sc_fop_open(struct inode *inode, struct file *file) { - struct o2net_sock_debug *sd; - - sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL); - if (sd == NULL) - return -ENOMEM; - - sd->dbg_ctxt = SHOW_SOCK_CONTAINERS; - sd->dbg_sock = NULL; - - return sc_common_open(file, sd); + return sc_common_open(file, SHOW_SOCK_CONTAINERS); } static const struct file_operations sc_seq_fops = { diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index ea34952f9496..97de0fbd9f78 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -536,7 +536,7 @@ static void o2net_set_nn_state(struct o2net_node *nn, if (nn->nn_persistent_error || nn->nn_sc_valid) wake_up(&nn->nn_sc_wq); - if (!was_err && nn->nn_persistent_error) { + if (was_valid && !was_err && nn->nn_persistent_error) { o2quo_conn_err(o2net_num_from_nn(nn)); queue_delayed_work(o2net_wq, &nn->nn_still_up, msecs_to_jiffies(O2NET_QUORUM_DELAY_MS)); @@ -1601,7 +1601,15 @@ static void o2net_start_connect(struct work_struct *work) struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; int ret = 0, stop; unsigned int timeout; + unsigned int noio_flag; + /* + * sock_create allocates the sock with GFP_KERNEL. We must set + * per-process flag PF_MEMALLOC_NOIO so that all allocations done + * by this process are done as if GFP_NOIO was specified. So we + * are not reentering filesystem while doing memory reclaim. + */ + noio_flag = memalloc_noio_save(); /* if we're greater we initiate tx, otherwise we accept */ if (o2nm_this_node() <= o2net_num_from_nn(nn)) goto out; @@ -1710,6 +1718,7 @@ out: if (mynode) o2nm_node_put(mynode); + memalloc_noio_restore(noio_flag); return; } @@ -1721,7 +1730,8 @@ static void o2net_connect_expired(struct work_struct *work) spin_lock(&nn->nn_lock); if (!nn->nn_sc_valid) { printk(KERN_NOTICE "o2net: No connection established with " - "node %u after %u.%u seconds, giving up.\n", + "node %u after %u.%u seconds, check network and" + " cluster configuration.\n", o2net_num_from_nn(nn), o2net_idle_timeout() / 1000, o2net_idle_timeout() % 1000); @@ -1835,6 +1845,15 @@ static int o2net_accept_one(struct socket *sock, int *more) struct o2nm_node *local_node = NULL; struct o2net_sock_container *sc = NULL; struct o2net_node *nn; + unsigned int noio_flag; + + /* + * sock_create_lite allocates the sock with GFP_KERNEL. We must set + * per-process flag PF_MEMALLOC_NOIO so that all allocations done + * by this process are done as if GFP_NOIO was specified. So we + * are not reentering filesystem while doing memory reclaim. + */ + noio_flag = memalloc_noio_save(); BUG_ON(sock == NULL); *more = 0; @@ -1951,6 +1970,8 @@ out: o2nm_node_put(local_node); if (sc) sc_put(sc); + + memalloc_noio_restore(noio_flag); return ret; } @@ -2146,17 +2167,13 @@ int o2net_init(void) o2quo_init(); if (o2net_debugfs_init()) - return -ENOMEM; + goto out; o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); - if (!o2net_hand || !o2net_keep_req || !o2net_keep_resp) { - kfree(o2net_hand); - kfree(o2net_keep_req); - kfree(o2net_keep_resp); - return -ENOMEM; - } + if (!o2net_hand || !o2net_keep_req || !o2net_keep_resp) + goto out; o2net_hand->protocol_version = cpu_to_be64(O2NET_PROTOCOL_VERSION); o2net_hand->connector_id = cpu_to_be64(1); @@ -2181,6 +2198,14 @@ int o2net_init(void) } return 0; + +out: + kfree(o2net_hand); + kfree(o2net_keep_req); + kfree(o2net_keep_resp); + + o2quo_exit(); + return -ENOMEM; } void o2net_exit(void) diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 18f13c2e4a10..149eb556b8c6 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -647,41 +647,30 @@ static const struct seq_operations debug_lockres_ops = { static int debug_lockres_open(struct inode *inode, struct file *file) { struct dlm_ctxt *dlm = inode->i_private; - int ret = -ENOMEM; - struct seq_file *seq; - struct debug_lockres *dl = NULL; + struct debug_lockres *dl; + void *buf; - dl = kzalloc(sizeof(struct debug_lockres), GFP_KERNEL); - if (!dl) { - mlog_errno(ret); + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) goto bail; - } - dl->dl_len = PAGE_SIZE; - dl->dl_buf = kmalloc(dl->dl_len, GFP_KERNEL); - if (!dl->dl_buf) { - mlog_errno(ret); - goto bail; - } + dl = __seq_open_private(file, &debug_lockres_ops, sizeof(*dl)); + if (!dl) + goto bailfree; - ret = seq_open(file, &debug_lockres_ops); - if (ret) { - mlog_errno(ret); - goto bail; - } - - seq = file->private_data; - seq->private = dl; + dl->dl_len = PAGE_SIZE; + dl->dl_buf = buf; dlm_grab(dlm); dl->dl_ctxt = dlm; return 0; + +bailfree: + kfree(buf); bail: - if (dl) - kfree(dl->dl_buf); - kfree(dl); - return ret; + mlog_errno(-ENOMEM); + return -ENOMEM; } static int debug_lockres_release(struct inode *inode, struct file *file) diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 3fcf205ee900..02d315fef432 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -839,7 +839,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, * to back off and try again. This gives heartbeat a chance * to catch up. */ - if (!o2hb_check_node_heartbeating(query->node_idx)) { + if (!o2hb_check_node_heartbeating_no_sem(query->node_idx)) { mlog(0, "node %u is not in our live map yet\n", query->node_idx); @@ -1975,24 +1975,22 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, dlm = kzalloc(sizeof(*dlm), GFP_KERNEL); if (!dlm) { - mlog_errno(-ENOMEM); + ret = -ENOMEM; + mlog_errno(ret); goto leave; } dlm->name = kstrdup(domain, GFP_KERNEL); if (dlm->name == NULL) { - mlog_errno(-ENOMEM); - kfree(dlm); - dlm = NULL; + ret = -ENOMEM; + mlog_errno(ret); goto leave; } dlm->lockres_hash = (struct hlist_head **)dlm_alloc_pagevec(DLM_HASH_PAGES); if (!dlm->lockres_hash) { - mlog_errno(-ENOMEM); - kfree(dlm->name); - kfree(dlm); - dlm = NULL; + ret = -ENOMEM; + mlog_errno(ret); goto leave; } @@ -2002,11 +2000,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, dlm->master_hash = (struct hlist_head **) dlm_alloc_pagevec(DLM_HASH_PAGES); if (!dlm->master_hash) { - mlog_errno(-ENOMEM); - dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); - kfree(dlm->name); - kfree(dlm); - dlm = NULL; + ret = -ENOMEM; + mlog_errno(ret); goto leave; } @@ -2017,14 +2012,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, dlm->node_num = o2nm_this_node(); ret = dlm_create_debugfs_subroot(dlm); - if (ret < 0) { - dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES); - dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); - kfree(dlm->name); - kfree(dlm); - dlm = NULL; + if (ret < 0) goto leave; - } spin_lock_init(&dlm->spinlock); spin_lock_init(&dlm->master_lock); @@ -2085,6 +2074,19 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, atomic_read(&dlm->dlm_refs.refcount)); leave: + if (ret < 0 && dlm) { + if (dlm->master_hash) + dlm_free_pagevec((void **)dlm->master_hash, + DLM_HASH_PAGES); + + if (dlm->lockres_hash) + dlm_free_pagevec((void **)dlm->lockres_hash, + DLM_HASH_PAGES); + + kfree(dlm->name); + kfree(dlm); + dlm = NULL; + } return dlm; } diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 12ba682fc53c..215e41abf101 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -625,9 +625,6 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, return res; error: - if (res && res->lockname.name) - kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name); - if (res) kmem_cache_free(dlm_lockres_cache, res); return NULL; diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 45067faf5695..3365839d2971 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1710,9 +1710,12 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, BUG(); } else __dlm_lockres_grab_inflight_worker(dlm, res); - } else /* put.. incase we are not the master */ + spin_unlock(&res->spinlock); + } else { + /* put.. incase we are not the master */ + spin_unlock(&res->spinlock); dlm_lockres_put(res); - spin_unlock(&res->spinlock); + } } spin_unlock(&dlm->spinlock); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 52cfe99ae056..21262f2b1654 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2892,37 +2892,24 @@ static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) { - int ret; struct ocfs2_dlm_seq_priv *priv; - struct seq_file *seq; struct ocfs2_super *osb; - priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL); + priv = __seq_open_private(file, &ocfs2_dlm_seq_ops, sizeof(*priv)); if (!priv) { - ret = -ENOMEM; - mlog_errno(ret); - goto out; + mlog_errno(-ENOMEM); + return -ENOMEM; } + osb = inode->i_private; ocfs2_get_dlm_debug(osb->osb_dlm_debug); priv->p_dlm_debug = osb->osb_dlm_debug; INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); - ret = seq_open(file, &ocfs2_dlm_seq_ops); - if (ret) { - kfree(priv); - mlog_errno(ret); - goto out; - } - - seq = file->private_data; - seq->private = priv; - ocfs2_add_lockres_tracking(&priv->p_iter_res, priv->p_dlm_debug); -out: - return ret; + return 0; } static const struct file_operations ocfs2_dlm_debug_fops = { diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 2930e231f3f9..682732f3f0d8 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -760,7 +760,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, struct address_space *mapping = inode->i_mapping; struct page *page; unsigned long index = abs_from >> PAGE_CACHE_SHIFT; - handle_t *handle = NULL; + handle_t *handle; int ret = 0; unsigned zero_from, zero_to, block_start, block_end; struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; @@ -769,11 +769,17 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); BUG_ON(abs_from & (inode->i_blkbits - 1)); + handle = ocfs2_zero_start_ordered_transaction(inode, di_bh); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } + page = find_or_create_page(mapping, index, GFP_NOFS); if (!page) { ret = -ENOMEM; mlog_errno(ret); - goto out; + goto out_commit_trans; } /* Get the offsets within the page that we want to zero */ @@ -805,15 +811,6 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, goto out_unlock; } - if (!handle) { - handle = ocfs2_zero_start_ordered_transaction(inode, - di_bh); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - handle = NULL; - break; - } - } /* must not update i_size! */ ret = block_commit_write(page, block_start + 1, @@ -824,27 +821,29 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, ret = 0; } + /* + * fs-writeback will release the dirty pages without page lock + * whose offset are over inode size, the release happens at + * block_write_full_page(). + */ + i_size_write(inode, abs_to); + inode->i_blocks = ocfs2_inode_sector_count(inode); + di->i_size = cpu_to_le64((u64)i_size_read(inode)); + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); + di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + di->i_mtime_nsec = di->i_ctime_nsec; if (handle) { - /* - * fs-writeback will release the dirty pages without page lock - * whose offset are over inode size, the release happens at - * block_write_full_page(). - */ - i_size_write(inode, abs_to); - inode->i_blocks = ocfs2_inode_sector_count(inode); - di->i_size = cpu_to_le64((u64)i_size_read(inode)); - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); - di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); - di->i_mtime_nsec = di->i_ctime_nsec; ocfs2_journal_dirty(handle, di_bh); ocfs2_update_inode_fsync_trans(handle, inode, 1); - ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); } out_unlock: unlock_page(page); page_cache_release(page); +out_commit_trans: + if (handle) + ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); out: return ret; } diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index a6c991c0fc98..a9b76de46047 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -162,7 +162,7 @@ static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode) { int c_to_s_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits - 9; - return (blkcnt_t)(OCFS2_I(inode)->ip_clusters << c_to_s_bits); + return (blkcnt_t)OCFS2_I(inode)->ip_clusters << c_to_s_bits; } /* Validate that a bh contains a valid inode */ diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 6219aaadeb08..74caffeeee1d 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -404,7 +404,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode, * 'vict_blkno' was out of the valid range. */ if ((vict_blkno < le64_to_cpu(rec->c_blkno)) || - (vict_blkno >= (le32_to_cpu(ac_dinode->id1.bitmap1.i_total) << + (vict_blkno >= ((u64)le32_to_cpu(ac_dinode->id1.bitmap1.i_total) << bits_per_unit))) { ret = -EINVAL; goto out; diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 13a8537d8e8b..720aa389e0ea 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -591,7 +591,7 @@ static int ocfs2_control_release(struct inode *inode, struct file *file) */ ocfs2_control_this_node = -1; running_proto.pv_major = 0; - running_proto.pv_major = 0; + running_proto.pv_minor = 0; } out: diff --git a/fs/proc/base.c b/fs/proc/base.c index baf852b648ad..4c542b907754 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -632,29 +632,35 @@ static const struct file_operations proc_single_file_operations = { .release = single_release, }; -static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) + +struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) { - struct task_struct *task = get_proc_task(file_inode(file)); - struct mm_struct *mm; + struct task_struct *task = get_proc_task(inode); + struct mm_struct *mm = ERR_PTR(-ESRCH); - if (!task) - return -ESRCH; + if (task) { + mm = mm_access(task, mode); + put_task_struct(task); - mm = mm_access(task, mode); - put_task_struct(task); + if (!IS_ERR_OR_NULL(mm)) { + /* ensure this mm_struct can't be freed */ + atomic_inc(&mm->mm_count); + /* but do not pin its memory */ + mmput(mm); + } + } + + return mm; +} + +static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) +{ + struct mm_struct *mm = proc_mem_open(inode, mode); if (IS_ERR(mm)) return PTR_ERR(mm); - if (mm) { - /* ensure this mm_struct can't be freed */ - atomic_inc(&mm->mm_count); - /* but do not pin its memory */ - mmput(mm); - } - file->private_data = mm; - return 0; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 7da13e49128a..aa7a0ee182e1 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -268,8 +268,9 @@ extern int proc_remount(struct super_block *, int *, char *); * task_[no]mmu.c */ struct proc_maps_private { - struct pid *pid; + struct inode *inode; struct task_struct *task; + struct mm_struct *mm; #ifdef CONFIG_MMU struct vm_area_struct *tail_vma; #endif @@ -278,6 +279,8 @@ struct proc_maps_private { #endif }; +struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode); + extern const struct file_operations proc_pid_maps_operations; extern const struct file_operations proc_tid_maps_operations; extern const struct file_operations proc_pid_numa_maps_operations; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 6df8d0722c97..91a4e6426321 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -610,8 +610,10 @@ static void __init proc_kcore_text_init(void) struct kcore_list kcore_modules; static void __init add_modules_range(void) { - kclist_add(&kcore_modules, (void *)MODULES_VADDR, + if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) { + kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_END - MODULES_VADDR, KCORE_VMALLOC); + } } #else static void __init add_modules_range(void) diff --git a/fs/proc/page.c b/fs/proc/page.c index e647c55275d9..1e3187da1fed 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -133,6 +133,9 @@ u64 stable_page_flags(struct page *page) if (PageBuddy(page)) u |= 1 << KPF_BUDDY; + if (PageBalloon(page)) + u |= 1 << KPF_BALLOON; + u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index c34156888d70..b7a7dc963a35 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -87,32 +87,14 @@ unsigned long task_statm(struct mm_struct *mm, #ifdef CONFIG_NUMA /* - * These functions are for numa_maps but called in generic **maps seq_file - * ->start(), ->stop() ops. - * - * numa_maps scans all vmas under mmap_sem and checks their mempolicy. - * Each mempolicy object is controlled by reference counting. The problem here - * is how to avoid accessing dead mempolicy object. - * - * Because we're holding mmap_sem while reading seq_file, it's safe to access - * each vma's mempolicy, no vma objects will never drop refs to mempolicy. - * - * A task's mempolicy (task->mempolicy) has different behavior. task->mempolicy - * is set and replaced under mmap_sem but unrefed and cleared under task_lock(). - * So, without task_lock(), we cannot trust get_vma_policy() because we cannot - * gurantee the task never exits under us. But taking task_lock() around - * get_vma_plicy() causes lock order problem. - * - * To access task->mempolicy without lock, we hold a reference count of an - * object pointed by task->mempolicy and remember it. This will guarantee - * that task->mempolicy points to an alive object or NULL in numa_maps accesses. + * Save get_task_policy() for show_numa_map(). */ static void hold_task_mempolicy(struct proc_maps_private *priv) { struct task_struct *task = priv->task; task_lock(task); - priv->task_mempolicy = task->mempolicy; + priv->task_mempolicy = get_task_policy(task); mpol_get(priv->task_mempolicy); task_unlock(task); } @@ -129,124 +111,154 @@ static void release_task_mempolicy(struct proc_maps_private *priv) } #endif -static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) +static void vma_stop(struct proc_maps_private *priv) { - if (vma && vma != priv->tail_vma) { - struct mm_struct *mm = vma->vm_mm; - release_task_mempolicy(priv); - up_read(&mm->mmap_sem); - mmput(mm); - } + struct mm_struct *mm = priv->mm; + + release_task_mempolicy(priv); + up_read(&mm->mmap_sem); + mmput(mm); +} + +static struct vm_area_struct * +m_next_vma(struct proc_maps_private *priv, struct vm_area_struct *vma) +{ + if (vma == priv->tail_vma) + return NULL; + return vma->vm_next ?: priv->tail_vma; +} + +static void m_cache_vma(struct seq_file *m, struct vm_area_struct *vma) +{ + if (m->count < m->size) /* vma is copied successfully */ + m->version = m_next_vma(m->private, vma) ? vma->vm_start : -1UL; } -static void *m_start(struct seq_file *m, loff_t *pos) +static void *m_start(struct seq_file *m, loff_t *ppos) { struct proc_maps_private *priv = m->private; unsigned long last_addr = m->version; struct mm_struct *mm; - struct vm_area_struct *vma, *tail_vma = NULL; - loff_t l = *pos; - - /* Clear the per syscall fields in priv */ - priv->task = NULL; - priv->tail_vma = NULL; - - /* - * We remember last_addr rather than next_addr to hit with - * vmacache most of the time. We have zero last_addr at - * the beginning and also after lseek. We will have -1 last_addr - * after the end of the vmas. - */ + struct vm_area_struct *vma; + unsigned int pos = *ppos; + /* See m_cache_vma(). Zero at the start or after lseek. */ if (last_addr == -1UL) return NULL; - priv->task = get_pid_task(priv->pid, PIDTYPE_PID); + priv->task = get_proc_task(priv->inode); if (!priv->task) return ERR_PTR(-ESRCH); - mm = mm_access(priv->task, PTRACE_MODE_READ); - if (!mm || IS_ERR(mm)) - return mm; - down_read(&mm->mmap_sem); + mm = priv->mm; + if (!mm || !atomic_inc_not_zero(&mm->mm_users)) + return NULL; - tail_vma = get_gate_vma(priv->task->mm); - priv->tail_vma = tail_vma; + down_read(&mm->mmap_sem); hold_task_mempolicy(priv); - /* Start with last addr hint */ - vma = find_vma(mm, last_addr); - if (last_addr && vma) { - vma = vma->vm_next; - goto out; + priv->tail_vma = get_gate_vma(mm); + + if (last_addr) { + vma = find_vma(mm, last_addr); + if (vma && (vma = m_next_vma(priv, vma))) + return vma; } - /* - * Check the vma index is within the range and do - * sequential scan until m_index. - */ - vma = NULL; - if ((unsigned long)l < mm->map_count) { - vma = mm->mmap; - while (l-- && vma) + m->version = 0; + if (pos < mm->map_count) { + for (vma = mm->mmap; pos; pos--) { + m->version = vma->vm_start; vma = vma->vm_next; - goto out; + } + return vma; } - if (l != mm->map_count) - tail_vma = NULL; /* After gate vma */ - -out: - if (vma) - return vma; + /* we do not bother to update m->version in this case */ + if (pos == mm->map_count && priv->tail_vma) + return priv->tail_vma; - release_task_mempolicy(priv); - /* End of vmas has been reached */ - m->version = (tail_vma != NULL)? 0: -1UL; - up_read(&mm->mmap_sem); - mmput(mm); - return tail_vma; + vma_stop(priv); + return NULL; } static void *m_next(struct seq_file *m, void *v, loff_t *pos) { struct proc_maps_private *priv = m->private; - struct vm_area_struct *vma = v; - struct vm_area_struct *tail_vma = priv->tail_vma; + struct vm_area_struct *next; (*pos)++; - if (vma && (vma != tail_vma) && vma->vm_next) - return vma->vm_next; - vma_stop(priv, vma); - return (vma != tail_vma)? tail_vma: NULL; + next = m_next_vma(priv, v); + if (!next) + vma_stop(priv); + return next; } static void m_stop(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; - struct vm_area_struct *vma = v; - if (!IS_ERR(vma)) - vma_stop(priv, vma); - if (priv->task) + if (!IS_ERR_OR_NULL(v)) + vma_stop(priv); + if (priv->task) { put_task_struct(priv->task); + priv->task = NULL; + } +} + +static int proc_maps_open(struct inode *inode, struct file *file, + const struct seq_operations *ops, int psize) +{ + struct proc_maps_private *priv = __seq_open_private(file, ops, psize); + + if (!priv) + return -ENOMEM; + + priv->inode = inode; + priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); + if (IS_ERR(priv->mm)) { + int err = PTR_ERR(priv->mm); + + seq_release_private(inode, file); + return err; + } + + return 0; +} + +static int proc_map_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct proc_maps_private *priv = seq->private; + + if (priv->mm) + mmdrop(priv->mm); + + return seq_release_private(inode, file); } static int do_maps_open(struct inode *inode, struct file *file, const struct seq_operations *ops) { - struct proc_maps_private *priv; - int ret = -ENOMEM; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (priv) { - priv->pid = proc_pid(inode); - ret = seq_open(file, ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = priv; - } else { - kfree(priv); - } + return proc_maps_open(inode, file, ops, + sizeof(struct proc_maps_private)); +} + +static pid_t pid_of_stack(struct proc_maps_private *priv, + struct vm_area_struct *vma, bool is_pid) +{ + struct inode *inode = priv->inode; + struct task_struct *task; + pid_t ret = 0; + + rcu_read_lock(); + task = pid_task(proc_pid(inode), PIDTYPE_PID); + if (task) { + task = task_of_stack(task, vma, is_pid); + if (task) + ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); } + rcu_read_unlock(); + return ret; } @@ -256,7 +268,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; struct proc_maps_private *priv = m->private; - struct task_struct *task = priv->task; vm_flags_t flags = vma->vm_flags; unsigned long ino = 0; unsigned long long pgoff = 0; @@ -321,8 +332,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) goto done; } - tid = vm_is_stack(task, vma, is_pid); - + tid = pid_of_stack(priv, vma, is_pid); if (tid != 0) { /* * Thread stack in /proc/PID/task/TID/maps or @@ -349,15 +359,8 @@ done: static int show_map(struct seq_file *m, void *v, int is_pid) { - struct vm_area_struct *vma = v; - struct proc_maps_private *priv = m->private; - struct task_struct *task = priv->task; - - show_map_vma(m, vma, is_pid); - - if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task->mm)) - ? vma->vm_start : 0; + show_map_vma(m, v, is_pid); + m_cache_vma(m, v); return 0; } @@ -399,14 +402,14 @@ const struct file_operations proc_pid_maps_operations = { .open = pid_maps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = proc_map_release, }; const struct file_operations proc_tid_maps_operations = { .open = tid_maps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = proc_map_release, }; /* @@ -583,8 +586,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) static int show_smap(struct seq_file *m, void *v, int is_pid) { - struct proc_maps_private *priv = m->private; - struct task_struct *task = priv->task; struct vm_area_struct *vma = v; struct mem_size_stats mss; struct mm_walk smaps_walk = { @@ -637,10 +638,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) mss.nonlinear >> 10); show_smap_vma_flags(m, vma); - - if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task->mm)) - ? vma->vm_start : 0; + m_cache_vma(m, vma); return 0; } @@ -682,14 +680,14 @@ const struct file_operations proc_pid_smaps_operations = { .open = pid_smaps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = proc_map_release, }; const struct file_operations proc_tid_smaps_operations = { .open = tid_smaps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = proc_map_release, }; /* @@ -1029,7 +1027,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, spinlock_t *ptl; pte_t *pte; int err = 0; - pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); @@ -1043,6 +1040,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, for (; addr != end; addr += PAGE_SIZE) { unsigned long offset; + pagemap_entry_t pme; offset = (addr & ~PAGEMAP_WALK_MASK) >> PAGE_SHIFT; @@ -1057,32 +1055,51 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (pmd_trans_unstable(pmd)) return 0; - for (; addr != end; addr += PAGE_SIZE) { - int flags2; - - /* check to see if we've left 'vma' behind - * and need a new, higher one */ - if (vma && (addr >= vma->vm_end)) { - vma = find_vma(walk->mm, addr); - if (vma && (vma->vm_flags & VM_SOFTDIRTY)) - flags2 = __PM_SOFT_DIRTY; - else - flags2 = 0; - pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2)); + + while (1) { + /* End of address space hole, which we mark as non-present. */ + unsigned long hole_end; + + if (vma) + hole_end = min(end, vma->vm_start); + else + hole_end = end; + + for (; addr < hole_end; addr += PAGE_SIZE) { + pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); + + err = add_to_pagemap(addr, &pme, pm); + if (err) + return err; } - /* check that 'vma' actually covers this address, - * and that it isn't a huge page vma */ - if (vma && (vma->vm_start <= addr) && - !is_vm_hugetlb_page(vma)) { + if (!vma || vma->vm_start >= end) + break; + /* + * We can't possibly be in a hugetlb VMA. In general, + * for a mm_walk with a pmd_entry and a hugetlb_entry, + * the pmd_entry can only be called on addresses in a + * hugetlb if the walk starts in a non-hugetlb VMA and + * spans a hugepage VMA. Since pagemap_read walks are + * PMD-sized and PMD-aligned, this will never be true. + */ + BUG_ON(is_vm_hugetlb_page(vma)); + + /* Addresses in the VMA. */ + for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) { + pagemap_entry_t pme; pte = pte_offset_map(pmd, addr); pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); - /* unmap before userspace copy */ pte_unmap(pte); + err = add_to_pagemap(addr, &pme, pm); + if (err) + return err; } - err = add_to_pagemap(addr, &pme, pm); - if (err) - return err; + + if (addr == end) + break; + + vma = find_vma(walk->mm, addr); } cond_resched(); @@ -1415,7 +1432,6 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) struct vm_area_struct *vma = v; struct numa_maps *md = &numa_priv->md; struct file *file = vma->vm_file; - struct task_struct *task = proc_priv->task; struct mm_struct *mm = vma->vm_mm; struct mm_walk walk = {}; struct mempolicy *pol; @@ -1435,9 +1451,13 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) walk.private = md; walk.mm = mm; - pol = get_vma_policy(task, vma, vma->vm_start); - mpol_to_str(buffer, sizeof(buffer), pol); - mpol_cond_put(pol); + pol = __get_vma_policy(vma, vma->vm_start); + if (pol) { + mpol_to_str(buffer, sizeof(buffer), pol); + mpol_cond_put(pol); + } else { + mpol_to_str(buffer, sizeof(buffer), proc_priv->task_mempolicy); + } seq_printf(m, "%08lx %s", vma->vm_start, buffer); @@ -1447,7 +1467,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { seq_puts(m, " heap"); } else { - pid_t tid = vm_is_stack(task, vma, is_pid); + pid_t tid = pid_of_stack(proc_priv, vma, is_pid); if (tid != 0) { /* * Thread stack in /proc/PID/task/TID/maps or @@ -1495,9 +1515,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) seq_printf(m, " N%d=%lu", nid, md->node[nid]); out: seq_putc(m, '\n'); - - if (m->count < m->size) - m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0; + m_cache_vma(m, vma); return 0; } @@ -1528,20 +1546,8 @@ static const struct seq_operations proc_tid_numa_maps_op = { static int numa_maps_open(struct inode *inode, struct file *file, const struct seq_operations *ops) { - struct numa_maps_private *priv; - int ret = -ENOMEM; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (priv) { - priv->proc_maps.pid = proc_pid(inode); - ret = seq_open(file, ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = priv; - } else { - kfree(priv); - } - } - return ret; + return proc_maps_open(inode, file, ops, + sizeof(struct numa_maps_private)); } static int pid_numa_maps_open(struct inode *inode, struct file *file) @@ -1558,13 +1564,13 @@ const struct file_operations proc_pid_numa_maps_operations = { .open = pid_numa_maps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = proc_map_release, }; const struct file_operations proc_tid_numa_maps_operations = { .open = tid_numa_maps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = proc_map_release, }; #endif /* CONFIG_NUMA */ diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 678455d2d683..599ec2e20104 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -123,6 +123,25 @@ unsigned long task_statm(struct mm_struct *mm, return size; } +static pid_t pid_of_stack(struct proc_maps_private *priv, + struct vm_area_struct *vma, bool is_pid) +{ + struct inode *inode = priv->inode; + struct task_struct *task; + pid_t ret = 0; + + rcu_read_lock(); + task = pid_task(proc_pid(inode), PIDTYPE_PID); + if (task) { + task = task_of_stack(task, vma, is_pid); + if (task) + ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); + } + rcu_read_unlock(); + + return ret; +} + /* * display a single VMA to a sequenced file */ @@ -163,7 +182,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, seq_pad(m, ' '); seq_path(m, &file->f_path, ""); } else if (mm) { - pid_t tid = vm_is_stack(priv->task, vma, is_pid); + pid_t tid = pid_of_stack(priv, vma, is_pid); if (tid != 0) { seq_pad(m, ' '); @@ -212,22 +231,22 @@ static void *m_start(struct seq_file *m, loff_t *pos) loff_t n = *pos; /* pin the task and mm whilst we play with them */ - priv->task = get_pid_task(priv->pid, PIDTYPE_PID); + priv->task = get_proc_task(priv->inode); if (!priv->task) return ERR_PTR(-ESRCH); - mm = mm_access(priv->task, PTRACE_MODE_READ); - if (!mm || IS_ERR(mm)) { - put_task_struct(priv->task); - priv->task = NULL; - return mm; - } - down_read(&mm->mmap_sem); + mm = priv->mm; + if (!mm || !atomic_inc_not_zero(&mm->mm_users)) + return NULL; + down_read(&mm->mmap_sem); /* start from the Nth VMA */ for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) if (n-- == 0) return p; + + up_read(&mm->mmap_sem); + mmput(mm); return NULL; } @@ -235,11 +254,13 @@ static void m_stop(struct seq_file *m, void *_vml) { struct proc_maps_private *priv = m->private; + if (!IS_ERR_OR_NULL(_vml)) { + up_read(&priv->mm->mmap_sem); + mmput(priv->mm); + } if (priv->task) { - struct mm_struct *mm = priv->task->mm; - up_read(&mm->mmap_sem); - mmput(mm); put_task_struct(priv->task); + priv->task = NULL; } } @@ -269,20 +290,33 @@ static int maps_open(struct inode *inode, struct file *file, const struct seq_operations *ops) { struct proc_maps_private *priv; - int ret = -ENOMEM; - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (priv) { - priv->pid = proc_pid(inode); - ret = seq_open(file, ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = priv; - } else { - kfree(priv); - } + + priv = __seq_open_private(file, ops, sizeof(*priv)); + if (!priv) + return -ENOMEM; + + priv->inode = inode; + priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); + if (IS_ERR(priv->mm)) { + int err = PTR_ERR(priv->mm); + + seq_release_private(inode, file); + return err; } - return ret; + + return 0; +} + + +static int map_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct proc_maps_private *priv = seq->private; + + if (priv->mm) + mmdrop(priv->mm); + + return seq_release_private(inode, file); } static int pid_maps_open(struct inode *inode, struct file *file) @@ -299,13 +333,13 @@ const struct file_operations proc_pid_maps_operations = { .open = pid_maps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = map_release, }; const struct file_operations proc_tid_maps_operations = { .open = tid_maps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = map_release, }; |