summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/maple_tree.c306
1 files changed, 197 insertions, 109 deletions
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index ae37a167e25d..5577e6da6fe3 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -185,7 +185,7 @@ static void mt_free_rcu(struct rcu_head *head)
*/
static void ma_free_rcu(struct maple_node *node)
{
- node->parent = ma_parent_ptr(node);
+ WARN_ON(node->parent != ma_parent_ptr(node));
call_rcu(&node->rcu, mt_free_rcu);
}
@@ -539,11 +539,14 @@ static inline struct maple_node *mte_parent(const struct maple_enode *enode)
*/
static inline bool ma_dead_node(const struct maple_node *node)
{
- struct maple_node *parent = (void *)((unsigned long)
- node->parent & ~MAPLE_NODE_MASK);
+ struct maple_node *parent;
+ /* Do not reorder reads from the node prior to the parent check */
+ smp_rmb();
+ parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK);
return (parent == node);
}
+
/*
* mte_dead_node() - check if the @enode is dead.
* @enode: The encoded maple node
@@ -555,6 +558,8 @@ static inline bool mte_dead_node(const struct maple_enode *enode)
struct maple_node *parent, *node;
node = mte_to_node(enode);
+ /* Do not reorder reads from the node prior to the parent check */
+ smp_rmb();
parent = mte_parent(enode);
return (parent == node);
}
@@ -625,6 +630,8 @@ static inline unsigned int mas_alloc_req(const struct ma_state *mas)
* @node - the maple node
* @type - the node type
*
+ * In the event of a dead node, this array may be %NULL
+ *
* Return: A pointer to the maple node pivots
*/
static inline unsigned long *ma_pivots(struct maple_node *node,
@@ -817,6 +824,11 @@ static inline void *mt_slot(const struct maple_tree *mt,
return rcu_dereference_check(slots[offset], mt_locked(mt));
}
+static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots,
+ unsigned char offset)
+{
+ return rcu_dereference_protected(slots[offset], mt_locked(mt));
+}
/*
* mas_slot_locked() - Get the slot value when holding the maple tree lock.
* @mas: The maple state
@@ -828,7 +840,7 @@ static inline void *mt_slot(const struct maple_tree *mt,
static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots,
unsigned char offset)
{
- return rcu_dereference_protected(slots[offset], mt_locked(mas->tree));
+ return mt_slot_locked(mas->tree, slots, offset);
}
/*
@@ -900,6 +912,45 @@ static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt,
}
/*
+ * mt_clear_meta() - clear the metadata information of a node, if it exists
+ * @mt: The maple tree
+ * @mn: The maple node
+ * @type: The maple node type
+ * @offset: The offset of the highest sub-gap in this node.
+ * @end: The end of the data in this node.
+ */
+static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn,
+ enum maple_type type)
+{
+ struct maple_metadata *meta;
+ unsigned long *pivots;
+ void __rcu **slots;
+ void *next;
+
+ switch (type) {
+ case maple_range_64:
+ pivots = mn->mr64.pivot;
+ if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) {
+ slots = mn->mr64.slot;
+ next = mt_slot_locked(mt, slots,
+ MAPLE_RANGE64_SLOTS - 1);
+ if (unlikely((mte_to_node(next) &&
+ mte_node_type(next))))
+ return; /* no metadata, could be node */
+ }
+ fallthrough;
+ case maple_arange_64:
+ meta = ma_meta(mn, type);
+ break;
+ default:
+ return;
+ }
+
+ meta->gap = 0;
+ meta->end = 0;
+}
+
+/*
* ma_meta_end() - Get the data end of a node from the metadata
* @mn: The maple node
* @mt: The maple node type
@@ -1096,8 +1147,11 @@ static int mas_ascend(struct ma_state *mas)
a_type = mas_parent_enum(mas, p_enode);
a_node = mte_parent(p_enode);
a_slot = mte_parent_slot(p_enode);
- pivots = ma_pivots(a_node, a_type);
a_enode = mt_mk_node(a_node, a_type);
+ pivots = ma_pivots(a_node, a_type);
+
+ if (unlikely(ma_dead_node(a_node)))
+ return 1;
if (!set_min && a_slot) {
set_min = true;
@@ -1249,26 +1303,21 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
node = mas->alloc;
node->request_count = 0;
while (requested) {
- max_req = MAPLE_ALLOC_SLOTS;
- if (node->node_count) {
- unsigned int offset = node->node_count;
-
- slots = (void **)&node->slot[offset];
- max_req -= offset;
- } else {
- slots = (void **)&node->slot;
- }
-
+ max_req = MAPLE_ALLOC_SLOTS - node->node_count;
+ slots = (void **)&node->slot[node->node_count];
max_req = min(requested, max_req);
count = mt_alloc_bulk(gfp, max_req, slots);
if (!count)
goto nomem_bulk;
+ if (node->node_count == 0) {
+ node->slot[0]->node_count = 0;
+ node->slot[0]->request_count = 0;
+ }
+
node->node_count += count;
allocated += count;
node = node->slot[0];
- node->node_count = 0;
- node->request_count = 0;
requested -= count;
}
mas->alloc->total = allocated;
@@ -1354,12 +1403,16 @@ static inline struct maple_enode *mas_start(struct ma_state *mas)
mas->max = ULONG_MAX;
mas->depth = 0;
+retry:
root = mas_root(mas);
/* Tree with nodes */
if (likely(xa_is_node(root))) {
mas->depth = 1;
mas->node = mte_safe_root(root);
mas->offset = 0;
+ if (mte_dead_node(mas->node))
+ goto retry;
+
return NULL;
}
@@ -1401,6 +1454,9 @@ static inline unsigned char ma_data_end(struct maple_node *node,
{
unsigned char offset;
+ if (!pivots)
+ return 0;
+
if (type == maple_arange_64)
return ma_meta_end(node, type);
@@ -1436,6 +1492,9 @@ static inline unsigned char mas_data_end(struct ma_state *mas)
return ma_meta_end(node, type);
pivots = ma_pivots(node, type);
+ if (unlikely(ma_dead_node(node)))
+ return 0;
+
offset = mt_pivots[type] - 1;
if (likely(!pivots[offset]))
return ma_meta_end(node, type);
@@ -1724,8 +1783,10 @@ static inline void mas_replace(struct ma_state *mas, bool advanced)
rcu_assign_pointer(slots[offset], mas->node);
}
- if (!advanced)
+ if (!advanced) {
+ mte_set_node_dead(old_enode);
mas_free(mas, old_enode);
+ }
}
/*
@@ -3659,10 +3720,9 @@ static inline int mas_root_expand(struct ma_state *mas, void *entry)
slot++;
mas->depth = 1;
mas_set_height(mas);
-
+ ma_set_meta(node, maple_leaf_64, 0, slot);
/* swap the new root into the tree */
rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
- ma_set_meta(node, maple_leaf_64, 0, slot);
return slot;
}
@@ -3875,18 +3935,13 @@ static inline void *mtree_lookup_walk(struct ma_state *mas)
end = ma_data_end(node, type, pivots, max);
if (unlikely(ma_dead_node(node)))
goto dead_node;
-
- if (pivots[offset] >= mas->index)
- goto next;
-
do {
- offset++;
- } while ((offset < end) && (pivots[offset] < mas->index));
-
- if (likely(offset > end))
- max = pivots[offset];
+ if (pivots[offset] >= mas->index) {
+ max = pivots[offset];
+ break;
+ }
+ } while (++offset < end);
-next:
slots = ma_slots(node, type);
next = mt_slot(mas->tree, slots, offset);
if (unlikely(ma_dead_node(node)))
@@ -4164,6 +4219,7 @@ static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas)
done:
mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end);
if (in_rcu) {
+ mte_set_node_dead(mas->node);
mas->node = mt_mk_node(newnode, wr_mas->type);
mas_replace(mas, false);
} else {
@@ -4505,6 +4561,9 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min)
node = mas_mn(mas);
slots = ma_slots(node, mt);
pivots = ma_pivots(node, mt);
+ if (unlikely(ma_dead_node(node)))
+ return 1;
+
mas->max = pivots[offset];
if (offset)
mas->min = pivots[offset - 1] + 1;
@@ -4526,6 +4585,9 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min)
slots = ma_slots(node, mt);
pivots = ma_pivots(node, mt);
offset = ma_data_end(node, mt, pivots, mas->max);
+ if (unlikely(ma_dead_node(node)))
+ return 1;
+
if (offset)
mas->min = pivots[offset - 1] + 1;
@@ -4574,6 +4636,7 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
struct maple_enode *enode;
int level = 0;
unsigned char offset;
+ unsigned char node_end;
enum maple_type mt;
void __rcu **slots;
@@ -4597,7 +4660,11 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
node = mas_mn(mas);
mt = mte_node_type(mas->node);
pivots = ma_pivots(node, mt);
- } while (unlikely(offset == ma_data_end(node, mt, pivots, mas->max)));
+ node_end = ma_data_end(node, mt, pivots, mas->max);
+ if (unlikely(ma_dead_node(node)))
+ return 1;
+
+ } while (unlikely(offset == node_end));
slots = ma_slots(node, mt);
pivot = mas_safe_pivot(mas, pivots, ++offset, mt);
@@ -4613,6 +4680,9 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
mt = mte_node_type(mas->node);
slots = ma_slots(node, mt);
pivots = ma_pivots(node, mt);
+ if (unlikely(ma_dead_node(node)))
+ return 1;
+
offset = 0;
pivot = pivots[0];
}
@@ -4659,11 +4729,14 @@ static inline void *mas_next_nentry(struct ma_state *mas,
return NULL;
}
- pivots = ma_pivots(node, type);
slots = ma_slots(node, type);
- mas->index = mas_safe_min(mas, pivots, mas->offset);
+ pivots = ma_pivots(node, type);
count = ma_data_end(node, type, pivots, mas->max);
- if (ma_dead_node(node))
+ if (unlikely(ma_dead_node(node)))
+ return NULL;
+
+ mas->index = mas_safe_min(mas, pivots, mas->offset);
+ if (unlikely(ma_dead_node(node)))
return NULL;
if (mas->index > max)
@@ -4817,6 +4890,11 @@ retry:
slots = ma_slots(mn, mt);
pivots = ma_pivots(mn, mt);
+ if (unlikely(ma_dead_node(mn))) {
+ mas_rewalk(mas, index);
+ goto retry;
+ }
+
if (offset == mt_pivots[mt])
pivot = mas->max;
else
@@ -5400,24 +5478,26 @@ no_gap:
}
/*
- * mas_dead_leaves() - Mark all leaves of a node as dead.
+ * mte_dead_leaves() - Mark all leaves of a node as dead.
* @mas: The maple state
* @slots: Pointer to the slot array
+ * @type: The maple node type
*
* Must hold the write lock.
*
* Return: The number of leaves marked as dead.
*/
static inline
-unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots)
+unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt,
+ void __rcu **slots)
{
struct maple_node *node;
enum maple_type type;
void *entry;
int offset;
- for (offset = 0; offset < mt_slot_count(mas->node); offset++) {
- entry = mas_slot_locked(mas, slots, offset);
+ for (offset = 0; offset < mt_slot_count(enode); offset++) {
+ entry = mt_slot(mt, slots, offset);
type = mte_node_type(entry);
node = mte_to_node(entry);
/* Use both node and type to catch LE & BE metadata */
@@ -5425,7 +5505,6 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots)
break;
mte_set_node_dead(entry);
- smp_wmb(); /* Needed for RCU */
node->type = type;
rcu_assign_pointer(slots[offset], node);
}
@@ -5433,151 +5512,160 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots)
return offset;
}
-static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offset)
+/**
+ * mte_dead_walk() - Walk down a dead tree to just before the leaves
+ * @enode: The maple encoded node
+ * @offset: The starting offset
+ *
+ * Note: This can only be used from the RCU callback context.
+ */
+static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset)
{
struct maple_node *node, *next;
void __rcu **slots = NULL;
- next = mas_mn(mas);
+ next = mte_to_node(*enode);
do {
- mas->node = ma_enode_ptr(next);
- node = mas_mn(mas);
+ *enode = ma_enode_ptr(next);
+ node = mte_to_node(*enode);
slots = ma_slots(node, node->type);
- next = mas_slot_locked(mas, slots, offset);
+ next = rcu_dereference_protected(slots[offset],
+ lock_is_held(&rcu_callback_map));
offset = 0;
} while (!ma_is_leaf(next->type));
return slots;
}
+/**
+ * mt_free_walk() - Walk & free a tree in the RCU callback context
+ * @head: The RCU head that's within the node.
+ *
+ * Note: This can only be used from the RCU callback context.
+ */
static void mt_free_walk(struct rcu_head *head)
{
void __rcu **slots;
struct maple_node *node, *start;
- struct maple_tree mt;
+ struct maple_enode *enode;
unsigned char offset;
enum maple_type type;
- MA_STATE(mas, &mt, 0, 0);
node = container_of(head, struct maple_node, rcu);
if (ma_is_leaf(node->type))
goto free_leaf;
- mt_init_flags(&mt, node->ma_flags);
- mas_lock(&mas);
start = node;
- mas.node = mt_mk_node(node, node->type);
- slots = mas_dead_walk(&mas, 0);
- node = mas_mn(&mas);
+ enode = mt_mk_node(node, node->type);
+ slots = mte_dead_walk(&enode, 0);
+ node = mte_to_node(enode);
do {
mt_free_bulk(node->slot_len, slots);
offset = node->parent_slot + 1;
- mas.node = node->piv_parent;
- if (mas_mn(&mas) == node)
- goto start_slots_free;
-
- type = mte_node_type(mas.node);
- slots = ma_slots(mte_to_node(mas.node), type);
- if ((offset < mt_slots[type]) && (slots[offset]))
- slots = mas_dead_walk(&mas, offset);
-
- node = mas_mn(&mas);
+ enode = node->piv_parent;
+ if (mte_to_node(enode) == node)
+ goto free_leaf;
+
+ type = mte_node_type(enode);
+ slots = ma_slots(mte_to_node(enode), type);
+ if ((offset < mt_slots[type]) &&
+ rcu_dereference_protected(slots[offset],
+ lock_is_held(&rcu_callback_map)))
+ slots = mte_dead_walk(&enode, offset);
+ node = mte_to_node(enode);
} while ((node != start) || (node->slot_len < offset));
slots = ma_slots(node, node->type);
mt_free_bulk(node->slot_len, slots);
-start_slots_free:
- mas_unlock(&mas);
free_leaf:
mt_free_rcu(&node->rcu);
}
-static inline void __rcu **mas_destroy_descend(struct ma_state *mas,
- struct maple_enode *prev, unsigned char offset)
+static inline void __rcu **mte_destroy_descend(struct maple_enode **enode,
+ struct maple_tree *mt, struct maple_enode *prev, unsigned char offset)
{
struct maple_node *node;
- struct maple_enode *next = mas->node;
+ struct maple_enode *next = *enode;
void __rcu **slots = NULL;
+ enum maple_type type;
+ unsigned char next_offset = 0;
do {
- mas->node = next;
- node = mas_mn(mas);
- slots = ma_slots(node, mte_node_type(mas->node));
- next = mas_slot_locked(mas, slots, 0);
+ *enode = next;
+ node = mte_to_node(*enode);
+ type = mte_node_type(*enode);
+ slots = ma_slots(node, type);
+ next = mt_slot_locked(mt, slots, next_offset);
if ((mte_dead_node(next)))
- next = mas_slot_locked(mas, slots, 1);
+ next = mt_slot_locked(mt, slots, ++next_offset);
- mte_set_node_dead(mas->node);
- node->type = mte_node_type(mas->node);
+ mte_set_node_dead(*enode);
+ node->type = type;
node->piv_parent = prev;
node->parent_slot = offset;
- offset = 0;
- prev = mas->node;
+ offset = next_offset;
+ next_offset = 0;
+ prev = *enode;
} while (!mte_is_leaf(next));
return slots;
}
-static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_flags,
+static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
bool free)
{
void __rcu **slots;
struct maple_node *node = mte_to_node(enode);
struct maple_enode *start;
- struct maple_tree mt;
-
- MA_STATE(mas, &mt, 0, 0);
- if (mte_is_leaf(enode))
+ if (mte_is_leaf(enode)) {
+ node->type = mte_node_type(enode);
goto free_leaf;
+ }
- mt_init_flags(&mt, ma_flags);
- mas_lock(&mas);
-
- mas.node = start = enode;
- slots = mas_destroy_descend(&mas, start, 0);
- node = mas_mn(&mas);
+ start = enode;
+ slots = mte_destroy_descend(&enode, mt, start, 0);
+ node = mte_to_node(enode); // Updated in the above call.
do {
enum maple_type type;
unsigned char offset;
struct maple_enode *parent, *tmp;
- node->slot_len = mas_dead_leaves(&mas, slots);
+ node->slot_len = mte_dead_leaves(enode, mt, slots);
if (free)
mt_free_bulk(node->slot_len, slots);
offset = node->parent_slot + 1;
- mas.node = node->piv_parent;
- if (mas_mn(&mas) == node)
- goto start_slots_free;
+ enode = node->piv_parent;
+ if (mte_to_node(enode) == node)
+ goto free_leaf;
- type = mte_node_type(mas.node);
- slots = ma_slots(mte_to_node(mas.node), type);
+ type = mte_node_type(enode);
+ slots = ma_slots(mte_to_node(enode), type);
if (offset >= mt_slots[type])
goto next;
- tmp = mas_slot_locked(&mas, slots, offset);
+ tmp = mt_slot_locked(mt, slots, offset);
if (mte_node_type(tmp) && mte_to_node(tmp)) {
- parent = mas.node;
- mas.node = tmp;
- slots = mas_destroy_descend(&mas, parent, offset);
+ parent = enode;
+ enode = tmp;
+ slots = mte_destroy_descend(&enode, mt, parent, offset);
}
next:
- node = mas_mn(&mas);
- } while (start != mas.node);
+ node = mte_to_node(enode);
+ } while (start != enode);
- node = mas_mn(&mas);
- node->slot_len = mas_dead_leaves(&mas, slots);
+ node = mte_to_node(enode);
+ node->slot_len = mte_dead_leaves(enode, mt, slots);
if (free)
mt_free_bulk(node->slot_len, slots);
-start_slots_free:
- mas_unlock(&mas);
-
free_leaf:
if (free)
mt_free_rcu(&node->rcu);
+ else
+ mt_clear_meta(mt, node, node->type);
}
/*
@@ -5593,10 +5681,10 @@ static inline void mte_destroy_walk(struct maple_enode *enode,
struct maple_node *node = mte_to_node(enode);
if (mt_in_rcu(mt)) {
- mt_destroy_walk(enode, mt->ma_flags, false);
+ mt_destroy_walk(enode, mt, false);
call_rcu(&node->rcu, mt_free_walk);
} else {
- mt_destroy_walk(enode, mt->ma_flags, true);
+ mt_destroy_walk(enode, mt, true);
}
}
@@ -6618,11 +6706,11 @@ static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn,
while (likely(!ma_is_leaf(mt))) {
MT_BUG_ON(mas->tree, mte_dead_node(mas->node));
slots = ma_slots(mn, mt);
- pivots = ma_pivots(mn, mt);
- max = pivots[0];
entry = mas_slot(mas, slots, 0);
+ pivots = ma_pivots(mn, mt);
if (unlikely(ma_dead_node(mn)))
return NULL;
+ max = pivots[0];
mas->node = entry;
mn = mas_mn(mas);
mt = mte_node_type(mas->node);
@@ -6642,13 +6730,13 @@ static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn,
if (likely(entry))
return entry;
- pivots = ma_pivots(mn, mt);
- mas->index = pivots[0] + 1;
mas->offset = 1;
entry = mas_slot(mas, slots, 1);
+ pivots = ma_pivots(mn, mt);
if (unlikely(ma_dead_node(mn)))
return NULL;
+ mas->index = pivots[0] + 1;
if (mas->index > limit)
goto none;