summaryrefslogtreecommitdiffstats
path: root/fs/kernfs
diff options
context:
space:
mode:
authorShaohua Li <shli@fb.com>2017-07-12 11:49:48 -0700
committerJens Axboe <axboe@kernel.dk>2017-07-29 09:00:03 -0600
commitba16b2846a8c6965d0d35be3968bc10f6277812d (patch)
tree332d1796149f4e5eb713fcf95b44cc6177965158 /fs/kernfs
parent4a3ef68acacf31570066e69593de5cc49cc91638 (diff)
downloadlinux-ba16b2846a8c6965d0d35be3968bc10f6277812d.tar.gz
linux-ba16b2846a8c6965d0d35be3968bc10f6277812d.tar.bz2
linux-ba16b2846a8c6965d0d35be3968bc10f6277812d.zip
kernfs: add an API to get kernfs node from inode number
Add an API to get kernfs node from inode number. We will need this to implement exportfs operations. This API will be used in blktrace too later, so it should be as fast as possible. To make the API lock free, kernfs node is freed in RCU context. And we depend on kernfs_node count/ino number to filter out stale kernfs nodes. Acked-by: Tejun Heo <tj@kernel.org> Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: Shaohua Li <shli@fb.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'fs/kernfs')
-rw-r--r--fs/kernfs/dir.c57
-rw-r--r--fs/kernfs/kernfs-internal.h2
-rw-r--r--fs/kernfs/mount.c11
3 files changed, 69 insertions, 1 deletions
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 33f711f6b86e..7be37c838007 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -508,6 +508,10 @@ void kernfs_put(struct kernfs_node *kn)
struct kernfs_node *parent;
struct kernfs_root *root;
+ /*
+ * kernfs_node is freed with ->count 0, kernfs_find_and_get_node_by_ino
+ * depends on this to filter reused stale node
+ */
if (!kn || !atomic_dec_and_test(&kn->count))
return;
root = kernfs_root(kn);
@@ -649,6 +653,11 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
kn->ino = ret;
kn->generation = gen;
+ /*
+ * set ino first. This barrier is paired with atomic_inc_not_zero in
+ * kernfs_find_and_get_node_by_ino
+ */
+ smp_mb__before_atomic();
atomic_set(&kn->count, 1);
atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
RB_CLEAR_NODE(&kn->rb);
@@ -680,6 +689,54 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
return kn;
}
+/*
+ * kernfs_find_and_get_node_by_ino - get kernfs_node from inode number
+ * @root: the kernfs root
+ * @ino: inode number
+ *
+ * RETURNS:
+ * NULL on failure. Return a kernfs node with reference counter incremented
+ */
+struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
+ unsigned int ino)
+{
+ struct kernfs_node *kn;
+
+ rcu_read_lock();
+ kn = idr_find(&root->ino_idr, ino);
+ if (!kn)
+ goto out;
+
+ /*
+ * Since kernfs_node is freed in RCU, it's possible an old node for ino
+ * is freed, but reused before RCU grace period. But a freed node (see
+ * kernfs_put) or an incompletedly initialized node (see
+ * __kernfs_new_node) should have 'count' 0. We can use this fact to
+ * filter out such node.
+ */
+ if (!atomic_inc_not_zero(&kn->count)) {
+ kn = NULL;
+ goto out;
+ }
+
+ /*
+ * The node could be a new node or a reused node. If it's a new node,
+ * we are ok. If it's reused because of RCU (because of
+ * SLAB_TYPESAFE_BY_RCU), the __kernfs_new_node always sets its 'ino'
+ * before 'count'. So if 'count' is uptodate, 'ino' should be uptodate,
+ * hence we can use 'ino' to filter stale node.
+ */
+ if (kn->ino != ino)
+ goto out;
+ rcu_read_unlock();
+
+ return kn;
+out:
+ rcu_read_unlock();
+ kernfs_put(kn);
+ return NULL;
+}
+
/**
* kernfs_add_one - add kernfs_node to parent without warning
* @kn: kernfs_node to be added
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 2d5144ab4251..e9c226f29828 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -98,6 +98,8 @@ int kernfs_add_one(struct kernfs_node *kn);
struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
const char *name, umode_t mode,
unsigned flags);
+struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
+ unsigned int ino);
/*
* file.c
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index d5b149a45be1..69c48bec8a63 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -330,7 +330,16 @@ struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns)
void __init kernfs_init(void)
{
+
+ /*
+ * the slab is freed in RCU context, so kernfs_find_and_get_node_by_ino
+ * can access the slab lock free. This could introduce stale nodes,
+ * please see how kernfs_find_and_get_node_by_ino filters out stale
+ * nodes.
+ */
kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
sizeof(struct kernfs_node),
- 0, SLAB_PANIC, NULL);
+ 0,
+ SLAB_PANIC | SLAB_TYPESAFE_BY_RCU,
+ NULL);
}