From b193d59a4863ea670872d76dc99231ddeb598625 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Apr 2013 15:55:00 -0400 Subject: NFSv4: Fix a memory leak in nfs4_discover_server_trunking When we assign a new rpc_client to clp->cl_rpcclient, we need to destroy the old one. Signed-off-by: Trond Myklebust Cc: Chuck Lever Cc: stable@vger.kernel.org [>=3.7] --- fs/nfs/nfs4state.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6ace365c6334..d41a3518509f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1886,7 +1886,13 @@ again: status = PTR_ERR(clnt); break; } - clp->cl_rpcclient = clnt; + /* Note: this is safe because we haven't yet marked the + * client as ready, so we are the only user of + * clp->cl_rpcclient + */ + clnt = xchg(&clp->cl_rpcclient, clnt); + rpc_shutdown_client(clnt); + clnt = clp->cl_rpcclient; goto again; case -NFS4ERR_MINOR_VERS_MISMATCH: -- cgit v1.2.3 From 7b1f1fd1842e6ede25183c267ae733a7f67f00bc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Apr 2013 16:11:11 -0400 Subject: NFSv4/4.1: Fix bugs in nfs4[01]_walk_client_list It is unsafe to use list_for_each_entry_safe() here, because when we drop the nn->nfs_client_lock, we pin the _current_ list entry and ensure that it stays in the list, but we don't do the same for the _next_ list entry. Use of list_for_each_entry() is therefore the correct thing to do. Also fix the refcounting in nfs41_walk_client_list(). Finally, ensure that the nfs_client has finished being initialised and, in the case of NFSv4.1, that the session is set up. Signed-off-by: Trond Myklebust Cc: Chuck Lever Cc: Bryan Schumaker Cc: stable@vger.kernel.org [>= 3.7] --- fs/nfs/nfs4client.c | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index ac4fc9a8fdbc..c7b346f8cc44 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -300,7 +300,7 @@ int nfs40_walk_client_list(struct nfs_client *new, struct rpc_cred *cred) { struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); - struct nfs_client *pos, *n, *prev = NULL; + struct nfs_client *pos, *prev = NULL; struct nfs4_setclientid_res clid = { .clientid = new->cl_clientid, .confirm = new->cl_confirm, @@ -308,10 +308,23 @@ int nfs40_walk_client_list(struct nfs_client *new, int status = -NFS4ERR_STALE_CLIENTID; spin_lock(&nn->nfs_client_lock); - list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { + list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { /* If "pos" isn't marked ready, we can't trust the * remaining fields in "pos" */ - if (pos->cl_cons_state < NFS_CS_READY) + if (pos->cl_cons_state > NFS_CS_READY) { + atomic_inc(&pos->cl_count); + spin_unlock(&nn->nfs_client_lock); + + if (prev) + nfs_put_client(prev); + prev = pos; + + status = nfs_wait_client_init_complete(pos); + spin_lock(&nn->nfs_client_lock); + if (status < 0) + continue; + } + if (pos->cl_cons_state != NFS_CS_READY) continue; if (pos->rpc_ops != new->rpc_ops) @@ -423,16 +436,16 @@ int nfs41_walk_client_list(struct nfs_client *new, struct rpc_cred *cred) { struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); - struct nfs_client *pos, *n, *prev = NULL; + struct nfs_client *pos, *prev = NULL; int status = -NFS4ERR_STALE_CLIENTID; spin_lock(&nn->nfs_client_lock); - list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { + list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { /* If "pos" isn't marked ready, we can't trust the * remaining fields in "pos", especially the client * ID and serverowner fields. Wait for CREATE_SESSION * to finish. */ - if (pos->cl_cons_state < NFS_CS_READY) { + if (pos->cl_cons_state > NFS_CS_READY) { atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); @@ -440,18 +453,17 @@ int nfs41_walk_client_list(struct nfs_client *new, nfs_put_client(prev); prev = pos; - nfs4_schedule_lease_recovery(pos); status = nfs_wait_client_init_complete(pos); - if (status < 0) { - nfs_put_client(pos); - spin_lock(&nn->nfs_client_lock); - continue; + if (status == 0) { + nfs4_schedule_lease_recovery(pos); + status = nfs4_wait_clnt_recover(pos); } - status = pos->cl_cons_state; spin_lock(&nn->nfs_client_lock); if (status < 0) continue; } + if (pos->cl_cons_state != NFS_CS_READY) + continue; if (pos->rpc_ops != new->rpc_ops) continue; @@ -469,17 +481,17 @@ int nfs41_walk_client_list(struct nfs_client *new, continue; atomic_inc(&pos->cl_count); - spin_unlock(&nn->nfs_client_lock); + *result = pos; dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", __func__, pos, atomic_read(&pos->cl_count)); - - *result = pos; - return 0; + break; } /* No matching nfs_client found. */ spin_unlock(&nn->nfs_client_lock); dprintk("NFS: <-- %s status = %d\n", __func__, status); + if (prev) + nfs_put_client(prev); return status; } #endif /* CONFIG_NFS_V4_1 */ -- cgit v1.2.3 From fa332941c0c7c00e3420078268b7558d0ef792b5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Apr 2013 12:56:52 -0400 Subject: NFSv4: Fix another potential state manager deadlock Don't hold the NFSv4 sequence id while we check for open permission. The call to ACCESS may block due to reboot recovery. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 26431cf62ddb..0ad025eb523b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1046,6 +1046,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) /* Save the delegation */ nfs4_stateid_copy(&stateid, &delegation->stateid); rcu_read_unlock(); + nfs_release_seqid(opendata->o_arg.seqid); ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); if (ret != 0) goto out; -- cgit v1.2.3 From 52f21999c7b921a0390708b66ed286282c2e4bee Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Mar 2013 13:30:23 -0400 Subject: ecryptfs: close rmmod race Signed-off-by: Al Viro --- fs/ecryptfs/miscdev.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 412e6eda25f8..e4141f257495 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -80,13 +80,6 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) int rc; mutex_lock(&ecryptfs_daemon_hash_mux); - rc = try_module_get(THIS_MODULE); - if (rc == 0) { - rc = -EIO; - printk(KERN_ERR "%s: Error attempting to increment module use " - "count; rc = [%d]\n", __func__, rc); - goto out_unlock_daemon_list; - } rc = ecryptfs_find_daemon_by_euid(&daemon); if (!rc) { rc = -EINVAL; @@ -96,7 +89,7 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) if (rc) { printk(KERN_ERR "%s: Error attempting to spawn daemon; " "rc = [%d]\n", __func__, rc); - goto out_module_put_unlock_daemon_list; + goto out_unlock_daemon_list; } mutex_lock(&daemon->mux); if (daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN) { @@ -108,9 +101,6 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) atomic_inc(&ecryptfs_num_miscdev_opens); out_unlock_daemon: mutex_unlock(&daemon->mux); -out_module_put_unlock_daemon_list: - if (rc) - module_put(THIS_MODULE); out_unlock_daemon_list: mutex_unlock(&ecryptfs_daemon_hash_mux); return rc; @@ -147,7 +137,6 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file) "bug.\n", __func__, rc); BUG(); } - module_put(THIS_MODULE); return rc; } @@ -471,6 +460,7 @@ out_free: static const struct file_operations ecryptfs_miscdev_fops = { + .owner = THIS_MODULE, .open = ecryptfs_miscdev_open, .poll = ecryptfs_miscdev_poll, .read = ecryptfs_miscdev_read, -- cgit v1.2.3 From 8ce584c7416d8a85a6f3edc17d1cddefe331e87e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Mar 2013 20:13:46 -0400 Subject: procfs: add proc_remove_subtree() just what it sounds like; do that only to procfs subtrees you've created - doing that to something shared with another driver is not only antisocial, but might cause interesting races with proc_create() and its ilk. Signed-off-by: Al Viro --- fs/proc/generic.c | 119 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 89 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 4b3b3ffb52f1..21e1a8f1659d 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -755,37 +755,8 @@ void pde_put(struct proc_dir_entry *pde) free_proc_entry(pde); } -/* - * Remove a /proc entry and free it if it's not currently in use. - */ -void remove_proc_entry(const char *name, struct proc_dir_entry *parent) +static void entry_rundown(struct proc_dir_entry *de) { - struct proc_dir_entry **p; - struct proc_dir_entry *de = NULL; - const char *fn = name; - unsigned int len; - - spin_lock(&proc_subdir_lock); - if (__xlate_proc_name(name, &parent, &fn) != 0) { - spin_unlock(&proc_subdir_lock); - return; - } - len = strlen(fn); - - for (p = &parent->subdir; *p; p=&(*p)->next ) { - if (proc_match(len, fn, *p)) { - de = *p; - *p = de->next; - de->next = NULL; - break; - } - } - spin_unlock(&proc_subdir_lock); - if (!de) { - WARN(1, "name '%s'\n", name); - return; - } - spin_lock(&de->pde_unload_lock); /* * Stop accepting new callers into module. If you're @@ -817,6 +788,40 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) spin_lock(&de->pde_unload_lock); } spin_unlock(&de->pde_unload_lock); +} + +/* + * Remove a /proc entry and free it if it's not currently in use. + */ +void remove_proc_entry(const char *name, struct proc_dir_entry *parent) +{ + struct proc_dir_entry **p; + struct proc_dir_entry *de = NULL; + const char *fn = name; + unsigned int len; + + spin_lock(&proc_subdir_lock); + if (__xlate_proc_name(name, &parent, &fn) != 0) { + spin_unlock(&proc_subdir_lock); + return; + } + len = strlen(fn); + + for (p = &parent->subdir; *p; p=&(*p)->next ) { + if (proc_match(len, fn, *p)) { + de = *p; + *p = de->next; + de->next = NULL; + break; + } + } + spin_unlock(&proc_subdir_lock); + if (!de) { + WARN(1, "name '%s'\n", name); + return; + } + + entry_rundown(de); if (S_ISDIR(de->mode)) parent->nlink--; @@ -827,3 +832,57 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) pde_put(de); } EXPORT_SYMBOL(remove_proc_entry); + +int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) +{ + struct proc_dir_entry **p; + struct proc_dir_entry *root = NULL, *de, *next; + const char *fn = name; + unsigned int len; + + spin_lock(&proc_subdir_lock); + if (__xlate_proc_name(name, &parent, &fn) != 0) { + spin_unlock(&proc_subdir_lock); + return -ENOENT; + } + len = strlen(fn); + + for (p = &parent->subdir; *p; p=&(*p)->next ) { + if (proc_match(len, fn, *p)) { + root = *p; + *p = root->next; + root->next = NULL; + break; + } + } + if (!root) { + spin_unlock(&proc_subdir_lock); + return -ENOENT; + } + de = root; + while (1) { + next = de->subdir; + if (next) { + de->subdir = next->next; + next->next = NULL; + de = next; + continue; + } + spin_unlock(&proc_subdir_lock); + + entry_rundown(de); + next = de->parent; + if (S_ISDIR(de->mode)) + next->nlink--; + de->nlink = 0; + if (de == root) + break; + pde_put(de); + + spin_lock(&proc_subdir_lock); + de = next; + } + pde_put(root); + return 0; +} +EXPORT_SYMBOL(remove_proc_subtree); -- cgit v1.2.3 From e9c5d8a562f01b211926d70443378eb14b29a676 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 9 Apr 2013 17:33:29 +0400 Subject: mnt: release locks on error path in do_loopback do_loopback calls lock_mount(path) and forget to unlock_mount if clone_mnt or copy_mnt fails. [ 77.661566] ================================================ [ 77.662939] [ BUG: lock held when returning to user space! ] [ 77.664104] 3.9.0-rc5+ #17 Not tainted [ 77.664982] ------------------------------------------------ [ 77.666488] mount/514 is leaving the kernel with locks still held! [ 77.668027] 2 locks held by mount/514: [ 77.668817] #0: (&sb->s_type->i_mutex_key#7){+.+.+.}, at: [] lock_mount+0x32/0xe0 [ 77.671755] #1: (&namespace_sem){+++++.}, at: [] lock_mount+0x4a/0xe0 Signed-off-by: Andrey Vagin Signed-off-by: Al Viro --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 50ca17d3cb45..6c7d31eebba4 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1686,7 +1686,7 @@ static int do_loopback(struct path *path, const char *old_name, if (IS_ERR(mnt)) { err = PTR_ERR(mnt); - goto out; + goto out2; } err = graft_tree(mnt, path); -- cgit v1.2.3 From eb04e0ac198cec3bab407ad220438dfa65c19c67 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 10 Apr 2013 12:44:18 -0400 Subject: NFSv4: Doh! Typo in the fix to nfs41_walk_client_list Make sure that we set the status to 0 on success. Missed in testing because it never appears when doing multiple mounts to _different_ servers. Signed-off-by: Trond Myklebust Cc: # 3.7.x: 7b1f1fd: NFSv4/4.1: Fix bugs in nfs4[01]_walk_client_list --- fs/nfs/nfs4client.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index c7b346f8cc44..66b6664dcd4c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -482,6 +482,7 @@ int nfs41_walk_client_list(struct nfs_client *new, atomic_inc(&pos->cl_count); *result = pos; + status = 0; dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", __func__, pos, atomic_read(&pos->cl_count)); break; -- cgit v1.2.3 From c369c9a4a7c82d33329d869cbaf93304cc7a0c40 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Tue, 9 Apr 2013 18:17:41 +0100 Subject: cifs: Allow passwords which begin with a delimitor Fixes a regression in cifs_parse_mount_options where a password which begins with a delimitor is parsed incorrectly as being a blank password. Signed-off-by: Sachin Prabhu Acked-by: Jeff Layton Cc: Signed-off-by: Steve French --- fs/cifs/connect.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 991c63c6bdd0..21b3a291c327 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1575,14 +1575,24 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, } break; case Opt_blank_pass: - vol->password = NULL; - break; - case Opt_pass: /* passwords have to be handled differently * to allow the character used for deliminator * to be passed within them */ + /* + * Check if this is a case where the password + * starts with a delimiter + */ + tmp_end = strchr(data, '='); + tmp_end++; + if (!(tmp_end < end && tmp_end[1] == delim)) { + /* No it is not. Set the password to NULL */ + vol->password = NULL; + break; + } + /* Yes it is. Drop down to Opt_pass below.*/ + case Opt_pass: /* Obtain the value string */ value = strchr(data, '='); value++; -- cgit v1.2.3 From f2530dc71cf0822f90bb63ea4600caaef33a66bb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Apr 2013 09:33:34 +0200 Subject: kthread: Prevent unpark race which puts threads on the wrong cpu The smpboot threads rely on the park/unpark mechanism which binds per cpu threads on a particular core. Though the functionality is racy: CPU0 CPU1 CPU2 unpark(T) wake_up_process(T) clear(SHOULD_PARK) T runs leave parkme() due to !SHOULD_PARK bind_to(CPU2) BUG_ON(wrong CPU) We cannot let the tasks move themself to the target CPU as one of those tasks is actually the migration thread itself, which requires that it starts running on the target cpu right away. The solution to this problem is to prevent wakeups in park mode which are not from unpark(). That way we can guarantee that the association of the task to the target cpu is working correctly. Add a new task state (TASK_PARKED) which prevents other wakeups and use this state explicitly for the unpark wakeup. Peter noticed: Also, since the task state is visible to userspace and all the parked tasks are still in the PID space, its a good hint in ps and friends that these tasks aren't really there for the moment. The migration thread has another related issue. CPU0 CPU1 Bring up CPU2 create_thread(T) park(T) wait_for_completion() parkme() complete() sched_set_stop_task() schedule(TASK_PARKED) The sched_set_stop_task() call is issued while the task is on the runqueue of CPU1 and that confuses the hell out of the stop_task class on that cpu. So we need the same synchronizaion before sched_set_stop_task(). Reported-by: Dave Jones Reported-and-tested-by: Dave Hansen Reported-and-tested-by: Borislav Petkov Acked-by: Peter Ziljstra Cc: Srivatsa S. Bhat Cc: dhillf@gmail.com Cc: Ingo Molnar Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1304091635430.21884@ionos Signed-off-by: Thomas Gleixner --- fs/proc/array.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/proc/array.c b/fs/proc/array.c index f7ed9ee46eb9..cbd0f1b324b9 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -143,6 +143,7 @@ static const char * const task_state_array[] = { "x (dead)", /* 64 */ "K (wakekill)", /* 128 */ "W (waking)", /* 256 */ + "P (parked)", /* 512 */ }; static inline const char *get_task_state(struct task_struct *tsk) -- cgit v1.2.3 From 4bc4bee4595662d8bff92180d5c32e3313a704b0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 5 Apr 2013 20:50:09 +0000 Subject: Btrfs: make sure nbytes are right after log replay While trying to track down a tree log replay bug I noticed that fsck was always complaining about nbytes not being right for our fsynced file. That is because the new fsync stuff doesn't wait for ordered extents to complete, so the inodes nbytes are not necessarily updated properly when we log it. So to fix this we need to set nbytes to whatever it is on the inode that is on disk, so when we replay the extents we can just add the bytes that are being added as we replay the extent. This makes it work for the case that we have the wrong nbytes or the case that we logged everything and nbytes is actually correct. With this I'm no longer getting nbytes errors out of btrfsck. Cc: stable@vger.kernel.org Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 48 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 451fad96ecd1..ef96381569a4 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -317,6 +317,7 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, unsigned long src_ptr; unsigned long dst_ptr; int overwrite_root = 0; + bool inode_item = key->type == BTRFS_INODE_ITEM_KEY; if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) overwrite_root = 1; @@ -326,6 +327,9 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, /* look for the key in the destination tree */ ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret < 0) + return ret; + if (ret == 0) { char *src_copy; char *dst_copy; @@ -367,6 +371,30 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, return 0; } + /* + * We need to load the old nbytes into the inode so when we + * replay the extents we've logged we get the right nbytes. + */ + if (inode_item) { + struct btrfs_inode_item *item; + u64 nbytes; + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + nbytes = btrfs_inode_nbytes(path->nodes[0], item); + item = btrfs_item_ptr(eb, slot, + struct btrfs_inode_item); + btrfs_set_inode_nbytes(eb, item, nbytes); + } + } else if (inode_item) { + struct btrfs_inode_item *item; + + /* + * New inode, set nbytes to 0 so that the nbytes comes out + * properly when we replay the extents. + */ + item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); + btrfs_set_inode_nbytes(eb, item, 0); } insert: btrfs_release_path(path); @@ -486,7 +514,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, int found_type; u64 extent_end; u64 start = key->offset; - u64 saved_nbytes; + u64 nbytes = 0; struct btrfs_file_extent_item *item; struct inode *inode = NULL; unsigned long size; @@ -496,10 +524,19 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, found_type = btrfs_file_extent_type(eb, item); if (found_type == BTRFS_FILE_EXTENT_REG || - found_type == BTRFS_FILE_EXTENT_PREALLOC) - extent_end = start + btrfs_file_extent_num_bytes(eb, item); - else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + found_type == BTRFS_FILE_EXTENT_PREALLOC) { + nbytes = btrfs_file_extent_num_bytes(eb, item); + extent_end = start + nbytes; + + /* + * We don't add to the inodes nbytes if we are prealloc or a + * hole. + */ + if (btrfs_file_extent_disk_bytenr(eb, item) == 0) + nbytes = 0; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { size = btrfs_file_extent_inline_len(eb, item); + nbytes = btrfs_file_extent_ram_bytes(eb, item); extent_end = ALIGN(start + size, root->sectorsize); } else { ret = 0; @@ -548,7 +585,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, } btrfs_release_path(path); - saved_nbytes = inode_get_bytes(inode); /* drop any overlapping extents */ ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1); BUG_ON(ret); @@ -635,7 +671,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); } - inode_set_bytes(inode, saved_nbytes); + inode_add_bytes(inode, nbytes); ret = btrfs_update_inode(trans, root, inode); out: if (inode) -- cgit v1.2.3 From 5b55d708335a9e3e4f61f2dadf7511502205ccd1 Mon Sep 17 00:00:00 2001 From: Suleiman Souhlal Date: Sat, 13 Apr 2013 16:03:06 -0700 Subject: vfs: Revert spurious fix to spinning prevention in prune_icache_sb Revert commit 62a3ddef6181 ("vfs: fix spinning prevention in prune_icache_sb"). This commit doesn't look right: since we are looking at the tail of the list (sb->s_inode_lru.prev) if we want to skip an inode, we should put it back at the head of the list instead of the tail, otherwise we will keep spinning on it. Discovered when investigating why prune_icache_sb came top in perf reports of a swapping load. Signed-off-by: Suleiman Souhlal Signed-off-by: Hugh Dickins Cc: stable@vger.kernel.org # v3.2+ Signed-off-by: Linus Torvalds --- fs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index f5f7c06c36fb..a898b3d43ccf 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -725,7 +725,7 @@ void prune_icache_sb(struct super_block *sb, int nr_to_scan) * inode to the back of the list so we don't spin on it. */ if (!spin_trylock(&inode->i_lock)) { - list_move_tail(&inode->i_lru, &sb->s_inode_lru); + list_move(&inode->i_lru, &sb->s_inode_lru); continue; } -- cgit v1.2.3 From a2fce9143057f4eb7675a21cca1b6beabe585c8b Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 17 Apr 2013 15:58:27 -0700 Subject: hugetlbfs: stop setting VM_DONTDUMP in initializing vma(VM_HUGETLB) Currently we fail to include any data on hugepages into coredump, because VM_DONTDUMP is set on hugetlbfs's vma. This behavior was recently introduced by commit 314e51b9851b ("mm: kill vma flag VM_RESERVED and mm->reserved_vm counter"). This looks to me a serious regression, so let's fix it. Signed-off-by: Naoya Horiguchi Acked-by: Konstantin Khlebnikov Acked-by: Michal Hocko Reviewed-by: Rik van Riel Acked-by: KOSAKI Motohiro Acked-by: David Rientjes Cc: [3.7+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 84e3d856e91d..523464e62849 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -110,7 +110,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) * way when do_mmap_pgoff unwinds (may be important on powerpc * and ia64). */ - vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; vma->vm_ops = &hugetlb_vm_ops; if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) -- cgit v1.2.3 From 23d9e482136e31c9d287633a6e473daa172767c4 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 17 Apr 2013 15:58:28 -0700 Subject: fs/binfmt_elf.c: fix hugetlb memory check in vma_dump_size() Documentation/filesystems/proc.txt says about coredump_filter bitmask, Note bit 0-4 doesn't effect any hugetlb memory. hugetlb memory are only effected by bit 5-6. However current code can go into the subsequent flag checks of bit 0-4 for vma(VM_HUGETLB). So this patch inserts 'return' and makes it work as written in the document. Signed-off-by: Naoya Horiguchi Reviewed-by: Rik van Riel Acked-by: Michal Hocko Reviewed-by: HATAYAMA Daisuke Acked-by: KOSAKI Motohiro Acked-by: David Rientjes Cc: [3.7+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 3939829f6c5c..86af964c2425 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1137,6 +1137,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, goto whole; if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE)) goto whole; + return 0; } /* Do not dump I/O mapped devices or special mappings */ -- cgit v1.2.3 From 12f267a20aecf8b84a2a9069b9011f1661c779b4 Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Wed, 17 Apr 2013 15:58:33 -0700 Subject: hfsplus: fix potential overflow in hfsplus_file_truncate() Change a u32 to loff_t hfsplus_file_truncate(). Signed-off-by: Vyacheslav Dubeyko Cc: Christoph Hellwig Cc: Al Viro Cc: Hin-Tak Leung Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index a94f0f779d5e..fe0a76213d9e 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -533,7 +533,7 @@ void hfsplus_file_truncate(struct inode *inode) struct address_space *mapping = inode->i_mapping; struct page *page; void *fsdata; - u32 size = inode->i_size; + loff_t size = inode->i_size; res = pagecache_write_begin(NULL, mapping, size, 0, AOP_FLAG_UNINTERRUPTIBLE, -- cgit v1.2.3 From 0a82a8d132b26d438eb90b3ab35a7016e7227a1d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 18 Apr 2013 09:00:26 -0700 Subject: Revert "block: add missing block_bio_complete() tracepoint" This reverts commit 3a366e614d0837d9fc23f78cdb1a1186ebc3387f. Wanlong Gao reports that it causes a kernel panic on his machine several minutes after boot. Reverting it removes the panic. Jens says: "It's not quite clear why that is yet, so I think we should just revert the commit for 3.9 final (which I'm assuming is pretty close). The wifi is crap at the LSF hotel, so sending this email instead of queueing up a revert and pull request." Reported-by: Wanlong Gao Requested-by: Jens Axboe Cc: Tejun Heo Cc: Steven Rostedt Signed-off-by: Linus Torvalds --- fs/bio.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index bb5768f59b32..b96fc6ce4855 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1428,8 +1428,6 @@ void bio_endio(struct bio *bio, int error) else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) error = -EIO; - trace_block_bio_complete(bio, error); - if (bio->bi_end_io) bio->bi_end_io(bio, error); } -- cgit v1.2.3