summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOmar Sandoval <osandov@fb.com>2016-05-20 13:50:33 -0700
committerChris Mason <clm@fb.com>2016-06-25 06:20:10 -0700
commit02dbfc99b424dde3cf0a492ed3bec4f222441754 (patch)
treed08f267bac15ea5939185424bbe73306b7f11cb6
parent33688abb2802ff3a230bd2441f765477b94cc89e (diff)
downloadlinux-02dbfc99b424dde3cf0a492ed3bec4f222441754.tar.gz
linux-02dbfc99b424dde3cf0a492ed3bec4f222441754.tar.bz2
linux-02dbfc99b424dde3cf0a492ed3bec4f222441754.zip
Btrfs: fix ->iterate_shared() by upgrading i_rwsem for delayed nodes
Commit fe742fd4f90f ("Revert "btrfs: switch to ->iterate_shared()"") backed out the conversion to ->iterate_shared() for Btrfs because the delayed inode handling in btrfs_real_readdir() is racy. However, we can still do readdir in parallel if there are no delayed nodes. This is a temporary fix which upgrades the shared inode lock to an exclusive lock only when we have delayed items until we come up with a more complete solution. While we're here, rename the btrfs_{get,put}_delayed_items functions to make it very clear that they're just for readdir. Tested with xfstests and by doing a parallel kernel build: while make tinyconfig && make -j4 && git clean dqfx; do : done along with a bunch of parallel finds in another shell: while true; do for ((i=0; i<4; i++)); do find . >/dev/null & done wait done Signed-off-by: Omar Sandoval <osandov@fb.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/delayed-inode.c27
-rw-r--r--fs/btrfs/delayed-inode.h10
-rw-r--r--fs/btrfs/inode.c10
3 files changed, 34 insertions, 13 deletions
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 61561c2a3f96..d3aaabbfada0 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1606,15 +1606,23 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
return 0;
}
-void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
- struct list_head *del_list)
+bool btrfs_readdir_get_delayed_items(struct inode *inode,
+ struct list_head *ins_list,
+ struct list_head *del_list)
{
struct btrfs_delayed_node *delayed_node;
struct btrfs_delayed_item *item;
delayed_node = btrfs_get_delayed_node(inode);
if (!delayed_node)
- return;
+ return false;
+
+ /*
+ * We can only do one readdir with delayed items at a time because of
+ * item->readdir_list.
+ */
+ inode_unlock_shared(inode);
+ inode_lock(inode);
mutex_lock(&delayed_node->mutex);
item = __btrfs_first_delayed_insertion_item(delayed_node);
@@ -1641,10 +1649,13 @@ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
* requeue or dequeue this delayed node.
*/
atomic_dec(&delayed_node->refs);
+
+ return true;
}
-void btrfs_put_delayed_items(struct list_head *ins_list,
- struct list_head *del_list)
+void btrfs_readdir_put_delayed_items(struct inode *inode,
+ struct list_head *ins_list,
+ struct list_head *del_list)
{
struct btrfs_delayed_item *curr, *next;
@@ -1659,6 +1670,12 @@ void btrfs_put_delayed_items(struct list_head *ins_list,
if (atomic_dec_and_test(&curr->refs))
kfree(curr);
}
+
+ /*
+ * The VFS is going to do up_read(), so we need to downgrade back to a
+ * read lock.
+ */
+ downgrade_write(&inode->i_rwsem);
}
int btrfs_should_delete_dir_index(struct list_head *del_list,
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 0167853c84ae..2495b3d4075f 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -137,10 +137,12 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
void btrfs_destroy_delayed_inodes(struct btrfs_root *root);
/* Used for readdir() */
-void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
- struct list_head *del_list);
-void btrfs_put_delayed_items(struct list_head *ins_list,
- struct list_head *del_list);
+bool btrfs_readdir_get_delayed_items(struct inode *inode,
+ struct list_head *ins_list,
+ struct list_head *del_list);
+void btrfs_readdir_put_delayed_items(struct inode *inode,
+ struct list_head *ins_list,
+ struct list_head *del_list);
int btrfs_should_delete_dir_index(struct list_head *del_list,
u64 index);
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d2be95cfb6d1..969a25c5abcb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5757,6 +5757,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
int name_len;
int is_curr = 0; /* ctx->pos points to the current index? */
bool emitted;
+ bool put = false;
/* FIXME, use a real flag for deciding about the key type */
if (root->fs_info->tree_root == root)
@@ -5774,7 +5775,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
if (key_type == BTRFS_DIR_INDEX_KEY) {
INIT_LIST_HEAD(&ins_list);
INIT_LIST_HEAD(&del_list);
- btrfs_get_delayed_items(inode, &ins_list, &del_list);
+ put = btrfs_readdir_get_delayed_items(inode, &ins_list,
+ &del_list);
}
key.type = key_type;
@@ -5921,8 +5923,8 @@ next:
nopos:
ret = 0;
err:
- if (key_type == BTRFS_DIR_INDEX_KEY)
- btrfs_put_delayed_items(&ins_list, &del_list);
+ if (put)
+ btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
btrfs_free_path(path);
return ret;
}
@@ -10534,7 +10536,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = {
static const struct file_operations btrfs_dir_file_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
- .iterate = btrfs_real_readdir,
+ .iterate_shared = btrfs_real_readdir,
.unlocked_ioctl = btrfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_compat_ioctl,