diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2023-05-20 23:57:48 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:10:02 -0400 |
commit | 1fb4fe63178881a0ac043a5c05288d9fff85d6b8 (patch) | |
tree | 0d0d67219ffab8adfe1c1ba51eee301fa2f938e3 /fs/bcachefs | |
parent | c4bd3491b1c0b335f63599ec96d1d4ab0d37a3c1 (diff) | |
download | linux-stable-1fb4fe63178881a0ac043a5c05288d9fff85d6b8.tar.gz linux-stable-1fb4fe63178881a0ac043a5c05288d9fff85d6b8.tar.bz2 linux-stable-1fb4fe63178881a0ac043a5c05288d9fff85d6b8.zip |
six locks: Kill six_lock_state union
As suggested by Linus, this drops the six_lock_state union in favor of
raw bitmasks.
On the one hand, bitfields give more type-level structure to the code.
However, a significant amount of the code was working with
six_lock_state as a u64/atomic64_t, and the conversions from the
bitfields to the u64 were deemed a bit too out-there.
More significantly, because bitfield order is poorly defined (#ifdef
__LITTLE_ENDIAN_BITFIELD can be used, but is gross), incrementing the
sequence number would overflow into the rest of the bitfield if the
compiler didn't put the sequence number at the high end of the word.
The new code is a bit saner when we're on an architecture without real
atomic64_t support - all accesses to lock->state now go through
atomic64_*() operations.
On architectures with real atomic64_t support, we additionally use
atomic bit ops for setting/clearing individual bits.
Text size: 7467 bytes -> 4649 bytes - compilers still suck at
bitfields.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs')
-rw-r--r-- | fs/bcachefs/btree_cache.c | 6 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_iter.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/btree_iter.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_key_cache.c | 6 | ||||
-rw-r--r-- | fs/bcachefs/btree_locking.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/six.c | 312 | ||||
-rw-r--r-- | fs/bcachefs/six.h | 40 | ||||
-rw-r--r-- | fs/bcachefs/trace.h | 8 |
10 files changed, 221 insertions, 165 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 5801f4ff9097..58ef9e7b4bdf 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -735,7 +735,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, set_btree_node_read_in_flight(b); six_unlock_write(&b->c.lock); - seq = b->c.lock.state.seq; + seq = six_lock_seq(&b->c.lock); six_unlock_intent(&b->c.lock); /* Unlock before doing IO: */ @@ -859,7 +859,7 @@ retry: } if (unlikely(btree_node_read_in_flight(b))) { - u32 seq = b->c.lock.state.seq; + u32 seq = six_lock_seq(&b->c.lock); six_unlock_type(&b->c.lock, lock_type); bch2_trans_unlock(trans); @@ -957,7 +957,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path * } if (unlikely(btree_node_read_in_flight(b))) { - u32 seq = b->c.lock.state.seq; + u32 seq = six_lock_seq(&b->c.lock); six_unlock_type(&b->c.lock, lock_type); bch2_trans_unlock(trans); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index decbbaace1ee..0a7a18eca397 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -483,7 +483,7 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) struct btree_node_entry *bne; bool reinit_iter = false; - EBUG_ON(!(b->c.lock.state.seq & 1)); + EBUG_ON(!six_lock_counts(&b->c.lock).n[SIX_LOCK_write]); BUG_ON(bset_written(b, bset(b, &b->set[1]))); BUG_ON(btree_node_just_written(b)); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index d906bfb6754d..3e65e6876ec7 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -652,9 +652,9 @@ void bch2_btree_path_level_init(struct btree_trans *trans, BUG_ON(path->cached); EBUG_ON(!btree_path_pos_in_node(path, b)); - EBUG_ON(b->c.lock.state.seq & 1); + EBUG_ON(six_lock_seq(&b->c.lock) & 1); - path->l[b->c.level].lock_seq = b->c.lock.state.seq; + path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock); path->l[b->c.level].b = b; __btree_path_level_init(path, b->c.level); } diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 188a6cd483f8..7d3564d72a7d 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -49,7 +49,7 @@ static inline bool btree_node_lock_seq_matches(const struct btree_path *path, * write lock. The lock sequence number is incremented by taking and * releasing write locks and is even when unlocked: */ - return path->l[level].lock_seq >> 1 == b->c.lock.state.seq >> 1; + return path->l[level].lock_seq >> 1 == six_lock_seq(&b->c.lock) >> 1; } static inline struct btree *btree_node_parent(struct btree_path *path, diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 9725d85b99b3..37977b774d61 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -251,7 +251,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, } path->l[0].b = (void *) ck; - path->l[0].lock_seq = ck->c.lock.state.seq; + path->l[0].lock_seq = six_lock_seq(&ck->c.lock); mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent); ret = bch2_btree_node_lock_write(trans, path, &ck->c); @@ -506,7 +506,7 @@ retry: mark_btree_node_locked(trans, path, 0, lock_want); } - path->l[0].lock_seq = ck->c.lock.state.seq; + path->l[0].lock_seq = six_lock_seq(&ck->c.lock); path->l[0].b = (void *) ck; fill: path->uptodate = BTREE_ITER_UPTODATE; @@ -588,7 +588,7 @@ retry: mark_btree_node_locked(trans, path, 0, lock_want); } - path->l[0].lock_seq = ck->c.lock.state.seq; + path->l[0].lock_seq = six_lock_seq(&ck->c.lock); path->l[0].b = (void *) ck; fill: if (!ck->valid) diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index 660975839c89..a897bdc123c3 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -175,7 +175,7 @@ bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_pat struct btree_path *linked; EBUG_ON(path->l[b->c.level].b != b); - EBUG_ON(path->l[b->c.level].lock_seq + 1 != b->c.lock.state.seq); + EBUG_ON(path->l[b->c.level].lock_seq + 1 != six_lock_seq(&b->c.lock)); EBUG_ON(btree_node_locked_type(path, b->c.level) != SIX_LOCK_write); mark_btree_node_locked_noreset(path, b->c.level, SIX_LOCK_intent); @@ -283,7 +283,7 @@ static inline int __btree_node_lock_write(struct btree_trans *trans, bool lock_may_not_fail) { EBUG_ON(&path->l[b->level].b->c != b); - EBUG_ON(path->l[b->level].lock_seq != b->lock.state.seq); + EBUG_ON(path->l[b->level].lock_seq != six_lock_seq(&b->lock)); EBUG_ON(!btree_node_intent_locked(path, b->level)); /* diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 6ba0954e648e..1319337c5382 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -688,7 +688,7 @@ err: bch2_trans_unlock(&trans); btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent); mark_btree_node_locked(&trans, path, b->c.level, SIX_LOCK_intent); - path->l[b->c.level].lock_seq = b->c.lock.state.seq; + path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock); path->l[b->c.level].b = b; bch2_btree_node_lock_write_nofail(&trans, path, &b->c); diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c index 1a64b8a027a7..2e222eb2a907 100644 --- a/fs/bcachefs/six.c +++ b/fs/bcachefs/six.c @@ -13,9 +13,9 @@ #include "six.h" #ifdef DEBUG -#define EBUG_ON(cond) BUG_ON(cond) +#define EBUG_ON(cond) BUG_ON(cond) #else -#define EBUG_ON(cond) do {} while (0) +#define EBUG_ON(cond) do {} while (0) #endif #define six_acquire(l, t, r, ip) lock_acquire(l, 0, t, r, 1, NULL, ip) @@ -23,6 +23,39 @@ static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type); +/* + * bits 0-26 reader count + * bits 26-27 write_locking (a thread is trying to get a write lock, + * but does not have one yet) + * bits 27-28 held for intent + * bits 28-29 nospin - optimistic spinning has timed out + * bits 29-30 has read waiters + * bits 30-31 has intent waiters + * bits 31-32 has write waiters + * bits 32-64 sequence number: incremented on every write lock or + * unlock, thus bit 33 (sequence number odd) indicates + * lock is currently held for write + */ + +#define SIX_STATE_READ_OFFSET 0 +#define SIX_STATE_READ_BITS 26 + +#define SIX_STATE_READ_LOCK ~(~0ULL << 26) +#define SIX_STATE_WRITE_LOCKING (1ULL << 26) +#define SIX_STATE_INTENT_HELD (1ULL << 27) +#define SIX_STATE_NOSPIN (1ULL << 28) +#define SIX_STATE_WAITING_READ (1ULL << (29 + SIX_LOCK_read)) +#define SIX_STATE_WAITING_INTENT (1ULL << (29 + SIX_LOCK_intent)) +#define SIX_STATE_WAITING_WRITE (1ULL << (29 + SIX_LOCK_write)) + +#define SIX_STATE_SEQ_OFFSET 32 +#define SIX_STATE_SEQ_BITS 32 +#define SIX_STATE_SEQ (~0ULL << 32) + +#define SIX_LOCK_HELD_read SIX_STATE_READ_LOCK +#define SIX_LOCK_HELD_intent SIX_STATE_INTENT_HELD +#define SIX_LOCK_HELD_write (1ULL << SIX_STATE_SEQ_OFFSET) + struct six_lock_vals { /* Value we add to the lock in order to take the lock: */ u64 lock_val; @@ -40,44 +73,109 @@ struct six_lock_vals { enum six_lock_type unlock_wakeup; }; -#define __SIX_VAL(field, _v) (((union six_lock_state) { .field = _v }).v) - -#define __SIX_LOCK_HELD_read __SIX_VAL(read_lock, ~0) -#define __SIX_LOCK_HELD_intent __SIX_VAL(intent_lock, ~0) -#define __SIX_LOCK_HELD_write __SIX_VAL(seq, 1) - #define LOCK_VALS { \ [SIX_LOCK_read] = { \ - .lock_val = __SIX_VAL(read_lock, 1), \ - .lock_fail = __SIX_LOCK_HELD_write + __SIX_VAL(write_locking, 1),\ - .unlock_val = -__SIX_VAL(read_lock, 1), \ - .held_mask = __SIX_LOCK_HELD_read, \ + .lock_val = 1ULL << SIX_STATE_READ_OFFSET, \ + .lock_fail = SIX_LOCK_HELD_write|SIX_STATE_WRITE_LOCKING,\ + .unlock_val = -(1ULL << SIX_STATE_READ_OFFSET), \ + .held_mask = SIX_LOCK_HELD_read, \ .unlock_wakeup = SIX_LOCK_write, \ }, \ [SIX_LOCK_intent] = { \ - .lock_val = __SIX_VAL(intent_lock, 1), \ - .lock_fail = __SIX_LOCK_HELD_intent, \ - .unlock_val = -__SIX_VAL(intent_lock, 1), \ - .held_mask = __SIX_LOCK_HELD_intent, \ + .lock_val = SIX_STATE_INTENT_HELD, \ + .lock_fail = SIX_LOCK_HELD_intent, \ + .unlock_val = -SIX_STATE_INTENT_HELD, \ + .held_mask = SIX_LOCK_HELD_intent, \ .unlock_wakeup = SIX_LOCK_intent, \ }, \ [SIX_LOCK_write] = { \ - .lock_val = __SIX_VAL(seq, 1), \ - .lock_fail = __SIX_LOCK_HELD_read, \ - .unlock_val = __SIX_VAL(seq, 1), \ - .held_mask = __SIX_LOCK_HELD_write, \ + .lock_val = SIX_LOCK_HELD_write, \ + .lock_fail = SIX_LOCK_HELD_read, \ + .unlock_val = SIX_LOCK_HELD_write, \ + .held_mask = SIX_LOCK_HELD_write, \ .unlock_wakeup = SIX_LOCK_read, \ }, \ } +static inline u32 six_state_seq(u64 state) +{ + return state >> SIX_STATE_SEQ_OFFSET; +} + +#ifdef CONFIG_GENERIC_ATOMIC64 + +static inline void six_set_bitmask(struct six_lock *lock, u64 mask) +{ + u64 old, new, v = atomic64_read(&lock->state); + + do { + old = new = v; + if ((old & mask) == mask) + break; + new |= mask; + } while ((v = atomic64_cmpxchg(&lock->state, old, new)) != old); +} + +static inline void six_clear_bitmask(struct six_lock *lock, u64 mask) +{ + u64 old, new, v = atomic64_read(&lock->state); + + do { + old = new = v; + if (!(old & mask)) + break; + new &= ~mask; + } while ((v = atomic64_cmpxchg(&lock->state, old, new)) != old); +} + +#else + +/* + * Returns the index of the first set bit, treating @mask as an array of ulongs: + * that is, a bit index that can be passed to test_bit()/set_bit(). + * + * Assumes the set bit we want is in the low 4 bytes: + */ +static inline unsigned u64_mask_to_ulong_bitnr(u64 mask) +{ +#if BITS_PER_LONG == 64 + return ilog2(mask); +#else +#if defined(__LITTLE_ENDIAN) + return ilog2((u32) mask); +#elif defined(__BIG_ENDIAN) + return ilog2((u32) mask) + 32; +#else +#error Unknown byteorder +#endif +#endif +} + +static inline void six_set_bitmask(struct six_lock *lock, u64 mask) +{ + unsigned bitnr = u64_mask_to_ulong_bitnr(mask); + + if (!test_bit(bitnr, (unsigned long *) &lock->state)) + set_bit(bitnr, (unsigned long *) &lock->state); +} + +static inline void six_clear_bitmask(struct six_lock *lock, u64 mask) +{ + unsigned bitnr = u64_mask_to_ulong_bitnr(mask); + + if (test_bit(bitnr, (unsigned long *) &lock->state)) + clear_bit(bitnr, (unsigned long *) &lock->state); +} + +#endif + static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type, - union six_lock_state old, - struct task_struct *owner) + u64 old, struct task_struct *owner) { if (type != SIX_LOCK_intent) return; - if (!old.intent_lock) { + if (!(old & SIX_LOCK_HELD_intent)) { EBUG_ON(lock->owner); lock->owner = owner; } else { @@ -95,22 +193,20 @@ static inline unsigned pcpu_read_count(struct six_lock *lock) return read_count; } -/* This is probably up there with the more evil things I've done */ -#define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l)) - static int __do_six_trylock_type(struct six_lock *lock, enum six_lock_type type, struct task_struct *task, bool try) { const struct six_lock_vals l[] = LOCK_VALS; - union six_lock_state old, new; int ret; - u64 v; + u64 old, new, v; EBUG_ON(type == SIX_LOCK_write && lock->owner != task); - EBUG_ON(type == SIX_LOCK_write && (lock->state.seq & 1)); - EBUG_ON(type == SIX_LOCK_write && (try != !(lock->state.write_locking))); + EBUG_ON(type == SIX_LOCK_write && + (atomic64_read(&lock->state) & SIX_LOCK_HELD_write)); + EBUG_ON(type == SIX_LOCK_write && + (try != !(atomic64_read(&lock->state) & SIX_STATE_WRITE_LOCKING))); /* * Percpu reader mode: @@ -133,8 +229,8 @@ static int __do_six_trylock_type(struct six_lock *lock, smp_mb(); - old.v = READ_ONCE(lock->state.v); - ret = !(old.v & l[type].lock_fail); + old = atomic64_read(&lock->state); + ret = !(old & l[type].lock_fail); this_cpu_sub(*lock->readers, !ret); preempt_enable(); @@ -144,12 +240,12 @@ static int __do_six_trylock_type(struct six_lock *lock, * lock, issue a wakeup because we might have caused a * spurious trylock failure: */ - if (old.write_locking) + if (old & SIX_STATE_WRITE_LOCKING) ret = -1 - SIX_LOCK_write; } else if (type == SIX_LOCK_write && lock->readers) { if (try) { - atomic64_add(__SIX_VAL(write_locking, 1), - &lock->state.counter); + atomic64_add(SIX_STATE_WRITE_LOCKING, + &lock->state); smp_mb__after_atomic(); } @@ -161,47 +257,47 @@ static int __do_six_trylock_type(struct six_lock *lock, */ v = 0; if (ret) - v += __SIX_VAL(seq, 1); + v += SIX_LOCK_HELD_write; if (ret || try) - v -= __SIX_VAL(write_locking, 1); + v -= SIX_STATE_WRITE_LOCKING; if (try && !ret) { - old.v = atomic64_add_return(v, &lock->state.counter); - if (old.waiters & (1 << SIX_LOCK_read)) + old = atomic64_add_return(v, &lock->state); + if (old & SIX_STATE_WAITING_READ) ret = -1 - SIX_LOCK_read; } else { - atomic64_add(v, &lock->state.counter); + atomic64_add(v, &lock->state); } } else { - v = READ_ONCE(lock->state.v); + v = atomic64_read(&lock->state); do { - new.v = old.v = v; + new = old = v; - if (!(old.v & l[type].lock_fail)) { - new.v += l[type].lock_val; + if (!(old & l[type].lock_fail)) { + new += l[type].lock_val; if (type == SIX_LOCK_write) - new.write_locking = 0; + new &= ~SIX_STATE_WRITE_LOCKING; } else { break; } - } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter, - old.v, new.v)) != old.v); + } while ((v = atomic64_cmpxchg_acquire(&lock->state, old, new)) != old); - ret = !(old.v & l[type].lock_fail); + ret = !(old & l[type].lock_fail); - EBUG_ON(ret && !(lock->state.v & l[type].held_mask)); + EBUG_ON(ret && !(atomic64_read(&lock->state) & l[type].held_mask)); } if (ret > 0) six_set_owner(lock, type, old, task); - EBUG_ON(type == SIX_LOCK_write && (try || ret > 0) && (lock->state.write_locking)); + EBUG_ON(type == SIX_LOCK_write && (try || ret > 0) && + (atomic64_read(&lock->state) & SIX_STATE_WRITE_LOCKING)); return ret; } -static inline void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type) +static void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type) { struct six_lock_waiter *w, *next; struct task_struct *task; @@ -235,7 +331,7 @@ again: wake_up_process(task); } - clear_bit(waitlist_bitnr(lock_type), (unsigned long *) &lock->state.v); + six_clear_bitmask(lock, SIX_STATE_WAITING_READ << lock_type); unlock: raw_spin_unlock(&lock->wait_lock); @@ -246,14 +342,13 @@ unlock: } __always_inline -static void six_lock_wakeup(struct six_lock *lock, - union six_lock_state state, +static void six_lock_wakeup(struct six_lock *lock, u64 state, enum six_lock_type lock_type) { - if (lock_type == SIX_LOCK_write && state.read_lock) + if (lock_type == SIX_LOCK_write && (state & SIX_LOCK_HELD_read)) return; - if (!(state.waiters & (1 << lock_type))) + if (!(state & (SIX_STATE_WAITING_READ << lock_type))) return; __six_lock_wakeup(lock, lock_type); @@ -288,8 +383,7 @@ bool six_relock_ip_type(struct six_lock *lock, enum six_lock_type type, unsigned seq, unsigned long ip) { const struct six_lock_vals l[] = LOCK_VALS; - union six_lock_state old; - u64 v; + u64 old, v; EBUG_ON(type == SIX_LOCK_write); @@ -302,8 +396,8 @@ bool six_relock_ip_type(struct six_lock *lock, enum six_lock_type type, smp_mb(); - old.v = READ_ONCE(lock->state.v); - ret = !(old.v & l[type].lock_fail) && old.seq == seq; + old = atomic64_read(&lock->state); + ret = !(old & l[type].lock_fail) && six_state_seq(old) == seq; this_cpu_sub(*lock->readers, !ret); preempt_enable(); @@ -314,21 +408,21 @@ bool six_relock_ip_type(struct six_lock *lock, enum six_lock_type type, */ if (ret) six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip); - else if (old.write_locking) + else if (old & SIX_STATE_WRITE_LOCKING) six_lock_wakeup(lock, old, SIX_LOCK_write); return ret; } - v = READ_ONCE(lock->state.v); + v = atomic64_read(&lock->state); do { - old.v = v; + old = v; - if (old.seq != seq || old.v & l[type].lock_fail) + if ((old & l[type].lock_fail) || six_state_seq(old) != seq) return false; - } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter, - old.v, - old.v + l[type].lock_val)) != old.v); + } while ((v = atomic64_cmpxchg_acquire(&lock->state, + old, + old + l[type].lock_val)) != old); six_set_owner(lock, type, old, current); if (type != SIX_LOCK_write) @@ -355,17 +449,6 @@ static inline bool six_can_spin_on_owner(struct six_lock *lock) return ret; } -static inline void six_set_nospin(struct six_lock *lock) -{ - union six_lock_state old, new; - u64 v = READ_ONCE(lock->state.v); - - do { - new.v = old.v = v; - new.nospin = true; - } while ((v = atomic64_cmpxchg(&lock->state.counter, old.v, new.v)) != old.v); -} - static inline bool six_spin_on_owner(struct six_lock *lock, struct task_struct *owner, u64 end_time) @@ -389,7 +472,7 @@ static inline bool six_spin_on_owner(struct six_lock *lock, } if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) { - six_set_nospin(lock); + six_set_bitmask(lock, SIX_STATE_NOSPIN); ret = false; break; } @@ -483,12 +566,12 @@ static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type ty six_lock_should_sleep_fn should_sleep_fn, void *p, unsigned long ip) { - union six_lock_state old; + u64 old; int ret = 0; if (type == SIX_LOCK_write) { - EBUG_ON(lock->state.write_locking); - atomic64_add(__SIX_VAL(write_locking, 1), &lock->state.counter); + EBUG_ON(atomic64_read(&lock->state) & SIX_STATE_WRITE_LOCKING); + atomic64_add(SIX_STATE_WRITE_LOCKING, &lock->state); smp_mb__after_atomic(); } @@ -502,8 +585,7 @@ static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type ty wait->lock_acquired = false; raw_spin_lock(&lock->wait_lock); - if (!(lock->state.waiters & (1 << type))) - set_bit(waitlist_bitnr(type), (unsigned long *) &lock->state.v); + six_set_bitmask(lock, SIX_STATE_WAITING_READ << type); /* * Retry taking the lock after taking waitlist lock, have raced with an * unlock: @@ -558,9 +640,8 @@ static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type ty __set_current_state(TASK_RUNNING); out: - if (ret && type == SIX_LOCK_write && lock->state.write_locking) { - old.v = atomic64_sub_return(__SIX_VAL(write_locking, 1), - &lock->state.counter); + if (ret && type == SIX_LOCK_write) { + six_clear_bitmask(lock, SIX_STATE_WRITE_LOCKING); six_lock_wakeup(lock, old, SIX_LOCK_read); } @@ -595,7 +676,7 @@ __always_inline static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type) { const struct six_lock_vals l[] = LOCK_VALS; - union six_lock_state state; + u64 state; if (type == SIX_LOCK_intent) lock->owner = NULL; @@ -605,15 +686,15 @@ static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type) smp_mb(); /* unlock barrier */ this_cpu_dec(*lock->readers); smp_mb(); /* between unlocking and checking for waiters */ - state.v = READ_ONCE(lock->state.v); + state = atomic64_read(&lock->state); } else { u64 v = l[type].unlock_val; if (type != SIX_LOCK_read) - v -= lock->state.v & __SIX_VAL(nospin, 1); + v -= atomic64_read(&lock->state) & SIX_STATE_NOSPIN; - EBUG_ON(!(lock->state.v & l[type].held_mask)); - state.v = atomic64_add_return_release(v, &lock->state.counter); + EBUG_ON(!(atomic64_read(&lock->state) & l[type].held_mask)); + state = atomic64_add_return_release(v, &lock->state); } six_lock_wakeup(lock, state, l[type].unlock_wakeup); @@ -622,7 +703,7 @@ static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type) void six_unlock_ip_type(struct six_lock *lock, enum six_lock_type type, unsigned long ip) { EBUG_ON(type == SIX_LOCK_write && - !(lock->state.v & __SIX_LOCK_HELD_intent)); + !(atomic64_read(&lock->state) & SIX_LOCK_HELD_intent)); EBUG_ON((type == SIX_LOCK_write || type == SIX_LOCK_intent) && lock->owner != current); @@ -650,23 +731,22 @@ EXPORT_SYMBOL_GPL(six_lock_downgrade); bool six_lock_tryupgrade(struct six_lock *lock) { - union six_lock_state old, new; - u64 v = READ_ONCE(lock->state.v); + const struct six_lock_vals l[] = LOCK_VALS; + u64 old, new, v = atomic64_read(&lock->state); do { - new.v = old.v = v; + new = old = v; - if (new.intent_lock) + if (new & SIX_LOCK_HELD_intent) return false; if (!lock->readers) { - EBUG_ON(!new.read_lock); - new.read_lock--; + EBUG_ON(!(new & SIX_LOCK_HELD_read)); + new += l[SIX_LOCK_read].unlock_val; } - new.intent_lock = 1; - } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter, - old.v, new.v)) != old.v); + new |= SIX_LOCK_HELD_intent; + } while ((v = atomic64_cmpxchg_acquire(&lock->state, old, new)) != old); if (lock->readers) this_cpu_dec(*lock->readers); @@ -712,13 +792,14 @@ void six_lock_increment(struct six_lock *lock, enum six_lock_type type) if (lock->readers) { this_cpu_inc(*lock->readers); } else { - EBUG_ON(!lock->state.read_lock && - !lock->state.intent_lock); - atomic64_add(l[type].lock_val, &lock->state.counter); + EBUG_ON(!(atomic64_read(&lock->state) & + (SIX_LOCK_HELD_read| + SIX_LOCK_HELD_intent))); + atomic64_add(l[type].lock_val, &lock->state); } break; case SIX_LOCK_intent: - EBUG_ON(!lock->state.intent_lock); + EBUG_ON(!(atomic64_read(&lock->state) & SIX_LOCK_HELD_intent)); lock->intent_lock_recurse++; break; case SIX_LOCK_write: @@ -730,7 +811,7 @@ EXPORT_SYMBOL_GPL(six_lock_increment); void six_lock_wakeup_all(struct six_lock *lock) { - union six_lock_state state = lock->state; + u64 state = atomic64_read(&lock->state); struct six_lock_waiter *w; six_lock_wakeup(lock, state, SIX_LOCK_read); @@ -752,10 +833,11 @@ struct six_lock_count six_lock_counts(struct six_lock *lock) struct six_lock_count ret; ret.n[SIX_LOCK_read] = !lock->readers - ? lock->state.read_lock + ? atomic64_read(&lock->state) & SIX_STATE_READ_LOCK : pcpu_read_count(lock); - ret.n[SIX_LOCK_intent] = lock->state.intent_lock + lock->intent_lock_recurse; - ret.n[SIX_LOCK_write] = lock->state.seq & 1; + ret.n[SIX_LOCK_intent] = !!(atomic64_read(&lock->state) & SIX_LOCK_HELD_intent) + + lock->intent_lock_recurse; + ret.n[SIX_LOCK_write] = !!(atomic64_read(&lock->state) & SIX_LOCK_HELD_write); return ret; } @@ -765,17 +847,15 @@ void six_lock_readers_add(struct six_lock *lock, int nr) { if (lock->readers) this_cpu_add(*lock->readers, nr); - else if (nr > 0) - atomic64_add(__SIX_VAL(read_lock, nr), &lock->state.counter); - else - atomic64_sub(__SIX_VAL(read_lock, -nr), &lock->state.counter); + else /* reader count starts at bit 0 */ + atomic64_add(nr, &lock->state); } EXPORT_SYMBOL_GPL(six_lock_readers_add); void six_lock_exit(struct six_lock *lock) { WARN_ON(lock->readers && pcpu_read_count(lock)); - WARN_ON(lock->state.read_lock); + WARN_ON(atomic64_read(&lock->state) & SIX_LOCK_HELD_read); free_percpu(lock->readers); lock->readers = NULL; @@ -785,7 +865,7 @@ EXPORT_SYMBOL_GPL(six_lock_exit); void __six_lock_init(struct six_lock *lock, const char *name, struct lock_class_key *key, enum six_lock_init_flags flags) { - atomic64_set(&lock->state.counter, 0); + atomic64_set(&lock->state, 0); raw_spin_lock_init(&lock->wait_lock); INIT_LIST_HEAD(&lock->wait_list); #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/fs/bcachefs/six.h b/fs/bcachefs/six.h index 5ddabbfb8aba..449589f76628 100644 --- a/fs/bcachefs/six.h +++ b/fs/bcachefs/six.h @@ -68,39 +68,6 @@ #define SIX_LOCK_SEPARATE_LOCKFNS -union six_lock_state { - struct { - atomic64_t counter; - }; - - struct { - u64 v; - }; - - struct { - /* for waitlist_bitnr() */ - unsigned long l; - }; - - struct { - unsigned read_lock:26; - unsigned write_locking:1; - unsigned intent_lock:1; - unsigned nospin:1; - unsigned waiters:3; - /* - * seq works much like in seqlocks: it's incremented every time - * we lock and unlock for write. - * - * If it's odd write lock is held, even unlocked. - * - * Thus readers can unlock, and then lock again later iff it - * hasn't been modified in the meantime. - */ - u32 seq; - }; -}; - enum six_lock_type { SIX_LOCK_read, SIX_LOCK_intent, @@ -108,7 +75,7 @@ enum six_lock_type { }; struct six_lock { - union six_lock_state state; + atomic64_t state; unsigned intent_lock_recurse; struct task_struct *owner; unsigned __percpu *readers; @@ -148,6 +115,11 @@ do { \ __six_lock_init((lock), #lock, &__key, flags); \ } while (0) +static inline u32 six_lock_seq(const struct six_lock *lock) +{ + return atomic64_read(&lock->state) >> 32; +} + bool six_trylock_ip_type(struct six_lock *lock, enum six_lock_type type, unsigned long ip); diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 8027c2a14199..cfb1779d712a 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -420,7 +420,9 @@ TRACE_EVENT(btree_path_relock_fail, else scnprintf(__entry->node, sizeof(__entry->node), "%px", b); __entry->iter_lock_seq = path->l[level].lock_seq; - __entry->node_lock_seq = is_btree_node(path, level) ? path->l[level].b->c.lock.state.seq : 0; + __entry->node_lock_seq = is_btree_node(path, level) + ? six_lock_seq(&path->l[level].b->c.lock) + : 0; ), TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u node %s iter seq %u lock seq %u", @@ -475,7 +477,9 @@ TRACE_EVENT(btree_path_upgrade_fail, __entry->read_count = c.n[SIX_LOCK_read]; __entry->intent_count = c.n[SIX_LOCK_read]; __entry->iter_lock_seq = path->l[level].lock_seq; - __entry->node_lock_seq = is_btree_node(path, level) ? path->l[level].b->c.lock.state.seq : 0; + __entry->node_lock_seq = is_btree_node(path, level) + ? six_lock_seq(&path->l[level].b->c.lock) + : 0; ), TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u locked %u held %u:%u lock count %u:%u iter seq %u lock seq %u", |