diff options
Diffstat (limited to 'include/linux/rhashtable.h')
-rw-r--r-- | include/linux/rhashtable.h | 261 |
1 files changed, 165 insertions, 96 deletions
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 0c9175aeab8a..ccbbafdf5547 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -24,12 +24,27 @@ #include <linux/list_nulls.h> #include <linux/workqueue.h> #include <linux/rculist.h> +#include <linux/bit_spinlock.h> #include <linux/rhashtable-types.h> /* + * Objects in an rhashtable have an embedded struct rhash_head + * which is linked into as hash chain from the hash table - or one + * of two or more hash tables when the rhashtable is being resized. * The end of the chain is marked with a special nulls marks which has - * the least significant bit set. + * the least significant bit set but otherwise stores the address of + * the hash bucket. This allows us to be be sure we've found the end + * of the right list. + * The value stored in the hash bucket has BIT(2) used as a lock bit. + * This bit must be atomically set before any changes are made to + * the chain. To avoid dereferencing this pointer without clearing + * the bit first, we use an opaque 'struct rhash_lock_head *' for the + * pointer stored in the bucket. This struct needs to be defined so + * that rcu_derefernce() works on it, but it has no content so a + * cast is needed for it to be useful. This ensures it isn't + * used by mistake with clearing the lock bit first. */ +struct rhash_lock_head {}; /* Maximum chain length before rehash * @@ -52,8 +67,6 @@ * @nest: Number of bits of first-level nested table. * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash - * @locks_mask: Mask to apply before accessing locks[] - * @locks: Array of spinlocks protecting individual buckets * @walkers: List of active walkers * @rcu: RCU structure for freeing the table * @future_tbl: Table under construction during rehashing @@ -64,17 +77,71 @@ struct bucket_table { unsigned int size; unsigned int nest; u32 hash_rnd; - unsigned int locks_mask; - spinlock_t *locks; struct list_head walkers; struct rcu_head rcu; struct bucket_table __rcu *future_tbl; - struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; + struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; /* + * We lock a bucket by setting BIT(1) in the pointer - this is always + * zero in real pointers and in the nulls marker. + * bit_spin_locks do not handle contention well, but the whole point + * of the hashtable design is to achieve minimum per-bucket contention. + * A nested hash table might not have a bucket pointer. In that case + * we cannot get a lock. For remove and replace the bucket cannot be + * interesting and doesn't need locking. + * For insert we allocate the bucket if this is the last bucket_table, + * and then take the lock. + * Sometimes we unlock a bucket by writing a new pointer there. In that + * case we don't need to unlock, but we do need to reset state such as + * local_bh. For that we have rht_assign_unlock(). As rcu_assign_pointer() + * provides the same release semantics that bit_spin_unlock() provides, + * this is safe. + */ + +static inline void rht_lock(struct rhash_lock_head **bkt) +{ + local_bh_disable(); + bit_spin_lock(1, (unsigned long *)bkt); +} + +static inline void rht_unlock(struct rhash_lock_head **bkt) +{ + bit_spin_unlock(1, (unsigned long *)bkt); + local_bh_enable(); +} + +static inline void rht_assign_unlock(struct rhash_lock_head **bkt, + struct rhash_head *obj) +{ + struct rhash_head **p = (struct rhash_head **)bkt; + + rcu_assign_pointer(*p, obj); + preempt_enable(); + __release(bitlock); + local_bh_enable(); +} + +/* + * If 'p' is a bucket head and might be locked: + * rht_ptr() returns the address without the lock bit. + * rht_ptr_locked() returns the address WITH the lock bit. + */ +static inline struct rhash_head __rcu *rht_ptr(const struct rhash_lock_head *p) +{ + return (void *)(((unsigned long)p) & ~BIT(1)); +} + +static inline struct rhash_lock_head __rcu *rht_ptr_locked(const + struct rhash_head *p) +{ + return (void *)(((unsigned long)p) | BIT(1)); +} + +/* * NULLS_MARKER() expects a hash value with the low * bits mostly likely to be significant, and it discards * the msb. @@ -206,25 +273,6 @@ static inline bool rht_grow_above_max(const struct rhashtable *ht, return atomic_read(&ht->nelems) >= ht->max_elems; } -/* The bucket lock is selected based on the hash and protects mutations - * on a group of hash buckets. - * - * A maximum of tbl->size/2 bucket locks is allocated. This ensures that - * a single lock always covers both buckets which may both contains - * entries which link to the same bucket of the old table during resizing. - * This allows to simplify the locking as locking the bucket in both - * tables during resize always guarantee protection. - * - * IMPORTANT: When holding the bucket lock of both the old and new table - * during expansions and shrinking, the old bucket lock must always be - * acquired first. - */ -static inline spinlock_t *rht_bucket_lock(const struct bucket_table *tbl, - unsigned int hash) -{ - return &tbl->locks[hash & tbl->locks_mask]; -} - #ifdef CONFIG_PROVE_LOCKING int lockdep_rht_mutex_is_held(struct rhashtable *ht); int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash); @@ -263,13 +311,13 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void *arg); void rhashtable_destroy(struct rhashtable *ht); -struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash); -struct rhash_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash); -struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, - struct bucket_table *tbl, +struct rhash_lock_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash); +struct rhash_lock_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl, unsigned int hash); +struct rhash_lock_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash); #define rht_dereference(p, ht) \ rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) @@ -286,21 +334,21 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) -static inline struct rhash_head __rcu *const *rht_bucket( +static inline struct rhash_lock_head __rcu *const *rht_bucket( const struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } -static inline struct rhash_head __rcu **rht_bucket_var( +static inline struct rhash_lock_head __rcu **rht_bucket_var( struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } -static inline struct rhash_head __rcu **rht_bucket_insert( +static inline struct rhash_lock_head __rcu **rht_bucket_insert( struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) : @@ -326,7 +374,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * @hash: the hash value / bucket index */ #define rht_for_each(pos, tbl, hash) \ - rht_for_each_from(pos, *rht_bucket(tbl, hash), tbl, hash) + rht_for_each_from(pos, rht_ptr(*rht_bucket(tbl, hash)), tbl, hash) /** * rht_for_each_entry_from - iterate over hash chain from given head @@ -351,7 +399,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * @member: name of the &struct rhash_head within the hashable struct. */ #define rht_for_each_entry(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_from(tpos, pos, *rht_bucket(tbl, hash), \ + rht_for_each_entry_from(tpos, pos, rht_ptr(*rht_bucket(tbl, hash)), \ tbl, hash, member) /** @@ -367,7 +415,8 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * remove the loop cursor from the list. */ #define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ - for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \ + for (pos = rht_dereference_bucket(rht_ptr(*rht_bucket(tbl, hash)), \ + tbl, hash), \ next = !rht_is_a_nulls(pos) ? \ rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ @@ -402,8 +451,12 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_rcu(pos, tbl, hash) \ - rht_for_each_rcu_from(pos, *rht_bucket(tbl, hash), tbl, hash) +#define rht_for_each_rcu(pos, tbl, hash) \ + for (({barrier(); }), \ + pos = rht_ptr(rht_dereference_bucket_rcu( \ + *rht_bucket(tbl, hash), tbl, hash)); \ + !rht_is_a_nulls(pos); \ + pos = rcu_dereference_raw(pos->next)) /** * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head @@ -437,7 +490,8 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_rcu_from(tpos, pos, *rht_bucket(tbl, hash), \ + rht_for_each_entry_rcu_from(tpos, pos, \ + rht_ptr(*rht_bucket(tbl, hash)), \ tbl, hash, member) /** @@ -483,7 +537,7 @@ static inline struct rhash_head *__rhashtable_lookup( .ht = ht, .key = key, }; - struct rhash_head __rcu * const *head; + struct rhash_lock_head __rcu * const *bkt; struct bucket_table *tbl; struct rhash_head *he; unsigned int hash; @@ -491,9 +545,10 @@ static inline struct rhash_head *__rhashtable_lookup( tbl = rht_dereference_rcu(ht->tbl, ht); restart: hash = rht_key_hashfn(ht, tbl, key, params); - head = rht_bucket(tbl, hash); + bkt = rht_bucket(tbl, hash); do { - rht_for_each_rcu_from(he, *head, tbl, hash) { + he = rht_ptr(rht_dereference_bucket_rcu(*bkt, tbl, hash)); + rht_for_each_rcu_from(he, he, tbl, hash) { if (params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, he)) : rhashtable_compare(&arg, rht_obj(ht, he))) @@ -503,7 +558,7 @@ restart: /* An object might have been moved to a different hash chain, * while we walk along it - better check and retry. */ - } while (he != RHT_NULLS_MARKER(head)); + } while (he != RHT_NULLS_MARKER(bkt)); /* Ensure we see any new tables. */ smp_rmb(); @@ -599,10 +654,10 @@ static inline void *__rhashtable_insert_fast( .ht = ht, .key = key, }; + struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct bucket_table *tbl; struct rhash_head *head; - spinlock_t *lock; unsigned int hash; int elasticity; void *data; @@ -611,23 +666,22 @@ static inline void *__rhashtable_insert_fast( tbl = rht_dereference_rcu(ht->tbl, ht); hash = rht_head_hashfn(ht, tbl, obj, params); - lock = rht_bucket_lock(tbl, hash); - spin_lock_bh(lock); + elasticity = RHT_ELASTICITY; + bkt = rht_bucket_insert(ht, tbl, hash); + data = ERR_PTR(-ENOMEM); + if (!bkt) + goto out; + pprev = NULL; + rht_lock(bkt); if (unlikely(rcu_access_pointer(tbl->future_tbl))) { slow_path: - spin_unlock_bh(lock); + rht_unlock(bkt); rcu_read_unlock(); return rhashtable_insert_slow(ht, key, obj); } - elasticity = RHT_ELASTICITY; - pprev = rht_bucket_insert(ht, tbl, hash); - data = ERR_PTR(-ENOMEM); - if (!pprev) - goto out; - - rht_for_each_from(head, *pprev, tbl, hash) { + rht_for_each_from(head, rht_ptr(*bkt), tbl, hash) { struct rhlist_head *plist; struct rhlist_head *list; @@ -643,7 +697,7 @@ slow_path: data = rht_obj(ht, head); if (!rhlist) - goto out; + goto out_unlock; list = container_of(obj, struct rhlist_head, rhead); @@ -652,9 +706,13 @@ slow_path: RCU_INIT_POINTER(list->next, plist); head = rht_dereference_bucket(head->next, tbl, hash); RCU_INIT_POINTER(list->rhead.next, head); - rcu_assign_pointer(*pprev, obj); - - goto good; + if (pprev) { + rcu_assign_pointer(*pprev, obj); + rht_unlock(bkt); + } else + rht_assign_unlock(bkt, obj); + data = NULL; + goto out; } if (elasticity <= 0) @@ -662,12 +720,13 @@ slow_path: data = ERR_PTR(-E2BIG); if (unlikely(rht_grow_above_max(ht, tbl))) - goto out; + goto out_unlock; if (unlikely(rht_grow_above_100(ht, tbl))) goto slow_path; - head = rht_dereference_bucket(*pprev, tbl, hash); + /* Inserting at head of list makes unlocking free. */ + head = rht_ptr(rht_dereference_bucket(*bkt, tbl, hash)); RCU_INIT_POINTER(obj->next, head); if (rhlist) { @@ -677,20 +736,21 @@ slow_path: RCU_INIT_POINTER(list->next, NULL); } - rcu_assign_pointer(*pprev, obj); - atomic_inc(&ht->nelems); + rht_assign_unlock(bkt, obj); + if (rht_grow_above_75(ht, tbl)) schedule_work(&ht->run_work); -good: data = NULL; - out: - spin_unlock_bh(lock); rcu_read_unlock(); return data; + +out_unlock: + rht_unlock(bkt); + goto out; } /** @@ -699,9 +759,9 @@ out: * @obj: pointer to hash head inside object * @params: hash table parameters * - * Will take a per bucket spinlock to protect against mutual mutations + * Will take the per bucket bitlock to protect against mutual mutations * on the same bucket. Multiple insertions may occur in parallel unless - * they map to the same bucket lock. + * they map to the same bucket. * * It is safe to call this function from atomic context. * @@ -728,9 +788,9 @@ static inline int rhashtable_insert_fast( * @list: pointer to hash list head inside object * @params: hash table parameters * - * Will take a per bucket spinlock to protect against mutual mutations + * Will take the per bucket bitlock to protect against mutual mutations * on the same bucket. Multiple insertions may occur in parallel unless - * they map to the same bucket lock. + * they map to the same bucket. * * It is safe to call this function from atomic context. * @@ -751,9 +811,9 @@ static inline int rhltable_insert_key( * @list: pointer to hash list head inside object * @params: hash table parameters * - * Will take a per bucket spinlock to protect against mutual mutations + * Will take the per bucket bitlock to protect against mutual mutations * on the same bucket. Multiple insertions may occur in parallel unless - * they map to the same bucket lock. + * they map to the same bucket. * * It is safe to call this function from atomic context. * @@ -880,21 +940,20 @@ static inline int __rhashtable_remove_fast_one( struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { + struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; - spinlock_t * lock; unsigned int hash; int err = -ENOENT; hash = rht_head_hashfn(ht, tbl, obj, params); - lock = rht_bucket_lock(tbl, hash); + bkt = rht_bucket_var(tbl, hash); + if (!bkt) + return -ENOENT; + pprev = NULL; + rht_lock(bkt); - spin_lock_bh(lock); - - pprev = rht_bucket_var(tbl, hash); - if (!pprev) - goto out; - rht_for_each_from(he, *pprev, tbl, hash) { + rht_for_each_from(he, rht_ptr(*bkt), tbl, hash) { struct rhlist_head *list; list = container_of(he, struct rhlist_head, rhead); @@ -934,13 +993,17 @@ static inline int __rhashtable_remove_fast_one( } } - rcu_assign_pointer(*pprev, obj); - break; + if (pprev) { + rcu_assign_pointer(*pprev, obj); + rht_unlock(bkt); + } else { + rht_assign_unlock(bkt, obj); + } + goto unlocked; } -out: - spin_unlock_bh(lock); - + rht_unlock(bkt); +unlocked: if (err > 0) { atomic_dec(&ht->nelems); if (unlikely(ht->p.automatic_shrinking && @@ -1029,9 +1092,9 @@ static inline int __rhashtable_replace_fast( struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) { + struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; - spinlock_t *lock; unsigned int hash; int err = -ENOENT; @@ -1042,27 +1105,33 @@ static inline int __rhashtable_replace_fast( if (hash != rht_head_hashfn(ht, tbl, obj_new, params)) return -EINVAL; - lock = rht_bucket_lock(tbl, hash); + bkt = rht_bucket_var(tbl, hash); + if (!bkt) + return -ENOENT; - spin_lock_bh(lock); + pprev = NULL; + rht_lock(bkt); - pprev = rht_bucket_var(tbl, hash); - if (!pprev) - goto out; - rht_for_each_from(he, *pprev, tbl, hash) { + rht_for_each_from(he, rht_ptr(*bkt), tbl, hash) { if (he != obj_old) { pprev = &he->next; continue; } rcu_assign_pointer(obj_new->next, obj_old->next); - rcu_assign_pointer(*pprev, obj_new); + if (pprev) { + rcu_assign_pointer(*pprev, obj_new); + rht_unlock(bkt); + } else { + rht_assign_unlock(bkt, obj_new); + } err = 0; - break; + goto unlocked; } -out: - spin_unlock_bh(lock); + rht_unlock(bkt); + +unlocked: return err; } |