summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-24 10:18:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-24 10:18:23 -0700
commit2319be135672f6e45aa937bceaae6c2668c7867c (patch)
tree5d2210243fac4345bbb204064f9602fe9477b785 /kernel
parent143a6252e1b8ab424b4b293512a97cca7295c182 (diff)
parent8491d1bdf5de152f27fc941e2dcdc4e66c950542 (diff)
downloadlinux-stable-2319be135672f6e45aa937bceaae6c2668c7867c.tar.gz
linux-stable-2319be135672f6e45aa937bceaae6c2668c7867c.tar.bz2
linux-stable-2319be135672f6e45aa937bceaae6c2668c7867c.zip
Merge tag 'locking-core-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: - rwsem cleanups & optimizations/fixes: - Conditionally wake waiters in reader/writer slowpaths - Always try to wake waiters in out_nolock path - Add try_cmpxchg64() implementation, with arch optimizations - and use it to micro-optimize sched_clock_{local,remote}() - Various force-inlining fixes to address objdump instrumentation-check warnings - Add lock contention tracepoints: lock:contention_begin lock:contention_end - Misc smaller fixes & cleanups * tag 'locking-core-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/clock: Use try_cmpxchg64 in sched_clock_{local,remote} locking/atomic/x86: Introduce arch_try_cmpxchg64 locking/atomic: Add generic try_cmpxchg64 support futex: Remove a PREEMPT_RT_FULL reference. locking/qrwlock: Change "queue rwlock" to "queued rwlock" lockdep: Delete local_irq_enable_in_hardirq() locking/mutex: Make contention tracepoints more consistent wrt adaptive spinning locking: Apply contention tracepoints in the slow path locking: Add lock contention tracepoints locking/rwsem: Always try to wake waiters in out_nolock path locking/rwsem: Conditionally wake waiters in reader/writer slowpaths locking/rwsem: No need to check for handoff bit if wait queue empty lockdep: Fix -Wunused-parameter for _THIS_IP_ x86/mm: Force-inline __phys_addr_nodebug() x86/kvm/svm: Force-inline GHCB accessors task_stack, x86/cea: Force-inline stack helpers
Diffstat (limited to 'kernel')
-rw-r--r--kernel/entry/common.c6
-rw-r--r--kernel/futex/pi.c2
-rw-r--r--kernel/locking/lockdep.c23
-rw-r--r--kernel/locking/mutex.c18
-rw-r--r--kernel/locking/percpu-rwsem.c5
-rw-r--r--kernel/locking/qrwlock.c17
-rw-r--r--kernel/locking/qspinlock.c5
-rw-r--r--kernel/locking/rtmutex.c11
-rw-r--r--kernel/locking/rwbase_rt.c7
-rw-r--r--kernel/locking/rwsem.c130
-rw-r--r--kernel/locking/semaphore.c15
-rw-r--r--kernel/sched/clock.c4
-rw-r--r--kernel/sched/idle.c2
-rw-r--r--kernel/trace/trace_preemptirq.c4
14 files changed, 167 insertions, 82 deletions
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 9e63923c5a0f..032f164abe7c 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -126,7 +126,7 @@ static __always_inline void __exit_to_user_mode(void)
{
instrumentation_begin();
trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ lockdep_hardirqs_on_prepare();
instrumentation_end();
user_enter_irqoff();
@@ -416,7 +416,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
instrumentation_begin();
/* Tell the tracer that IRET will enable interrupts */
trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ lockdep_hardirqs_on_prepare();
instrumentation_end();
rcu_irq_exit();
lockdep_hardirqs_on(CALLER_ADDR0);
@@ -465,7 +465,7 @@ void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state)
ftrace_nmi_exit();
if (irq_state.lockdep) {
trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ lockdep_hardirqs_on_prepare();
}
instrumentation_end();
diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c
index 183b28c32c83..ce2889f12375 100644
--- a/kernel/futex/pi.c
+++ b/kernel/futex/pi.c
@@ -1005,7 +1005,7 @@ retry_private:
rt_mutex_init_waiter(&rt_waiter);
/*
- * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
+ * On PREEMPT_RT, when hb->lock becomes an rt_mutex, we must not
* hold it while doing rt_mutex_start_proxy(), because then it will
* include hb->lock in the blocking chain, even through we'll not in
* fact hold it while blocking. This will lead it to report -EDEADLK
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index c06cab6546ed..a6e671b8608d 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -60,7 +60,6 @@
#include "lockdep_internals.h"
-#define CREATE_TRACE_POINTS
#include <trace/events/lock.h>
#ifdef CONFIG_PROVE_LOCKING
@@ -1380,7 +1379,7 @@ static struct lock_list *alloc_list_entry(void)
*/
static int add_lock_to_list(struct lock_class *this,
struct lock_class *links_to, struct list_head *head,
- unsigned long ip, u16 distance, u8 dep,
+ u16 distance, u8 dep,
const struct lock_trace *trace)
{
struct lock_list *entry;
@@ -3133,19 +3132,15 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
* to the previous lock's dependency list:
*/
ret = add_lock_to_list(hlock_class(next), hlock_class(prev),
- &hlock_class(prev)->locks_after,
- next->acquire_ip, distance,
- calc_dep(prev, next),
- *trace);
+ &hlock_class(prev)->locks_after, distance,
+ calc_dep(prev, next), *trace);
if (!ret)
return 0;
ret = add_lock_to_list(hlock_class(prev), hlock_class(next),
- &hlock_class(next)->locks_before,
- next->acquire_ip, distance,
- calc_depb(prev, next),
- *trace);
+ &hlock_class(next)->locks_before, distance,
+ calc_depb(prev, next), *trace);
if (!ret)
return 0;
@@ -4236,14 +4231,13 @@ static void __trace_hardirqs_on_caller(void)
/**
* lockdep_hardirqs_on_prepare - Prepare for enabling interrupts
- * @ip: Caller address
*
* Invoked before a possible transition to RCU idle from exit to user or
* guest mode. This ensures that all RCU operations are done before RCU
* stops watching. After the RCU transition lockdep_hardirqs_on() has to be
* invoked to set the final state.
*/
-void lockdep_hardirqs_on_prepare(unsigned long ip)
+void lockdep_hardirqs_on_prepare(void)
{
if (unlikely(!debug_locks))
return;
@@ -4840,8 +4834,7 @@ EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
static void
print_lock_nested_lock_not_held(struct task_struct *curr,
- struct held_lock *hlock,
- unsigned long ip)
+ struct held_lock *hlock)
{
if (!debug_locks_off())
return;
@@ -5017,7 +5010,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
chain_key = iterate_chain_key(chain_key, hlock_id(hlock));
if (nest_lock && !__lock_is_held(nest_lock, -1)) {
- print_lock_nested_lock_not_held(curr, hlock, ip);
+ print_lock_nested_lock_not_held(curr, hlock);
return 0;
}
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 5e3585950ec8..d973fe6041bf 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -30,6 +30,9 @@
#include <linux/debug_locks.h>
#include <linux/osq_lock.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/lock.h>
+
#ifndef CONFIG_PREEMPT_RT
#include "mutex.h"
@@ -599,12 +602,14 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
preempt_disable();
mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
+ trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN);
if (__mutex_trylock(lock) ||
mutex_optimistic_spin(lock, ww_ctx, NULL)) {
/* got the lock, yay! */
lock_acquired(&lock->dep_map, ip);
if (ww_ctx)
ww_mutex_set_context_fastpath(ww, ww_ctx);
+ trace_contention_end(lock, 0);
preempt_enable();
return 0;
}
@@ -641,6 +646,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
}
set_current_state(state);
+ trace_contention_begin(lock, LCB_F_MUTEX);
for (;;) {
bool first;
@@ -680,10 +686,16 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
* state back to RUNNING and fall through the next schedule(),
* or we must see its unlock and acquire.
*/
- if (__mutex_trylock_or_handoff(lock, first) ||
- (first && mutex_optimistic_spin(lock, ww_ctx, &waiter)))
+ if (__mutex_trylock_or_handoff(lock, first))
break;
+ if (first) {
+ trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN);
+ if (mutex_optimistic_spin(lock, ww_ctx, &waiter))
+ break;
+ trace_contention_begin(lock, LCB_F_MUTEX);
+ }
+
raw_spin_lock(&lock->wait_lock);
}
raw_spin_lock(&lock->wait_lock);
@@ -707,6 +719,7 @@ acquired:
skip_wait:
/* got the lock - cleanup and rejoice! */
lock_acquired(&lock->dep_map, ip);
+ trace_contention_end(lock, 0);
if (ww_ctx)
ww_mutex_lock_acquired(ww, ww_ctx);
@@ -719,6 +732,7 @@ err:
__set_current_state(TASK_RUNNING);
__mutex_remove_waiter(lock, &waiter);
err_early_kill:
+ trace_contention_end(lock, ret);
raw_spin_unlock(&lock->wait_lock);
debug_mutex_free_waiter(&waiter);
mutex_release(&lock->dep_map, ip);
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index c9fdae94e098..5fe4c5495ba3 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -9,6 +9,7 @@
#include <linux/sched/task.h>
#include <linux/sched/debug.h>
#include <linux/errno.h>
+#include <trace/events/lock.h>
int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
const char *name, struct lock_class_key *key)
@@ -171,9 +172,11 @@ bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
if (try)
return false;
+ trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_READ);
preempt_enable();
percpu_rwsem_wait(sem, /* .reader = */ true);
preempt_disable();
+ trace_contention_end(sem, 0);
return true;
}
@@ -216,6 +219,7 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
{
might_sleep();
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
+ trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_WRITE);
/* Notify readers to take the slow path. */
rcu_sync_enter(&sem->rss);
@@ -237,6 +241,7 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
/* Wait for all active readers to complete. */
rcuwait_wait_event(&sem->writer, readers_active_check(sem), TASK_UNINTERRUPTIBLE);
+ trace_contention_end(sem, 0);
}
EXPORT_SYMBOL_GPL(percpu_down_write);
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index ec36b73f4733..2e1600906c9f 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -12,10 +12,11 @@
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/spinlock.h>
+#include <trace/events/lock.h>
/**
- * queued_read_lock_slowpath - acquire read lock of a queue rwlock
- * @lock: Pointer to queue rwlock structure
+ * queued_read_lock_slowpath - acquire read lock of a queued rwlock
+ * @lock: Pointer to queued rwlock structure
*/
void queued_read_lock_slowpath(struct qrwlock *lock)
{
@@ -34,6 +35,8 @@ void queued_read_lock_slowpath(struct qrwlock *lock)
}
atomic_sub(_QR_BIAS, &lock->cnts);
+ trace_contention_begin(lock, LCB_F_SPIN | LCB_F_READ);
+
/*
* Put the reader into the wait queue
*/
@@ -51,17 +54,21 @@ void queued_read_lock_slowpath(struct qrwlock *lock)
* Signal the next one in queue to become queue head
*/
arch_spin_unlock(&lock->wait_lock);
+
+ trace_contention_end(lock, 0);
}
EXPORT_SYMBOL(queued_read_lock_slowpath);
/**
- * queued_write_lock_slowpath - acquire write lock of a queue rwlock
- * @lock : Pointer to queue rwlock structure
+ * queued_write_lock_slowpath - acquire write lock of a queued rwlock
+ * @lock : Pointer to queued rwlock structure
*/
void queued_write_lock_slowpath(struct qrwlock *lock)
{
int cnts;
+ trace_contention_begin(lock, LCB_F_SPIN | LCB_F_WRITE);
+
/* Put the writer into the wait queue */
arch_spin_lock(&lock->wait_lock);
@@ -79,5 +86,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
} while (!atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED));
unlock:
arch_spin_unlock(&lock->wait_lock);
+
+ trace_contention_end(lock, 0);
}
EXPORT_SYMBOL(queued_write_lock_slowpath);
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index cbff6ba53d56..65a9a10caa6f 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -22,6 +22,7 @@
#include <linux/prefetch.h>
#include <asm/byteorder.h>
#include <asm/qspinlock.h>
+#include <trace/events/lock.h>
/*
* Include queued spinlock statistics code
@@ -401,6 +402,8 @@ pv_queue:
idx = node->count++;
tail = encode_tail(smp_processor_id(), idx);
+ trace_contention_begin(lock, LCB_F_SPIN);
+
/*
* 4 nodes are allocated based on the assumption that there will
* not be nested NMIs taking spinlocks. That may not be true in
@@ -554,6 +557,8 @@ locked:
pv_kick_node(lock, next);
release:
+ trace_contention_end(lock, 0);
+
/*
* release the node
*/
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 8555c4efe97c..7779ee8abc2a 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -24,6 +24,8 @@
#include <linux/sched/wake_q.h>
#include <linux/ww_mutex.h>
+#include <trace/events/lock.h>
+
#include "rtmutex_common.h"
#ifndef WW_RT
@@ -1579,6 +1581,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
set_current_state(state);
+ trace_contention_begin(lock, LCB_F_RT);
+
ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
if (likely(!ret))
ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
@@ -1601,6 +1605,9 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
* unconditionally. We might have to fix that up.
*/
fixup_rt_mutex_waiters(lock);
+
+ trace_contention_end(lock, ret);
+
return ret;
}
@@ -1683,6 +1690,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
/* Save current state and set state to TASK_RTLOCK_WAIT */
current_save_and_set_rtlock_wait_state();
+ trace_contention_begin(lock, LCB_F_RT);
+
task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
for (;;) {
@@ -1712,6 +1721,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
*/
fixup_rt_mutex_waiters(lock);
debug_rt_mutex_free_waiter(&waiter);
+
+ trace_contention_end(lock, 0);
}
static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
index 6fd3162e4098..c201aadb9301 100644
--- a/kernel/locking/rwbase_rt.c
+++ b/kernel/locking/rwbase_rt.c
@@ -112,6 +112,8 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
* Reader2 to call up_read(), which might be unbound.
*/
+ trace_contention_begin(rwb, LCB_F_RT | LCB_F_READ);
+
/*
* For rwlocks this returns 0 unconditionally, so the below
* !ret conditionals are optimized out.
@@ -130,6 +132,8 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
raw_spin_unlock_irq(&rtm->wait_lock);
if (!ret)
rwbase_rtmutex_unlock(rtm);
+
+ trace_contention_end(rwb, ret);
return ret;
}
@@ -247,11 +251,13 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
goto out_unlock;
rwbase_set_and_save_current_state(state);
+ trace_contention_begin(rwb, LCB_F_RT | LCB_F_WRITE);
for (;;) {
/* Optimized out for rwlocks */
if (rwbase_signal_pending_state(state, current)) {
rwbase_restore_current_state();
__rwbase_write_unlock(rwb, 0, flags);
+ trace_contention_end(rwb, -EINTR);
return -EINTR;
}
@@ -265,6 +271,7 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
set_current_state(state);
}
rwbase_restore_current_state();
+ trace_contention_end(rwb, 0);
out_unlock:
raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index acde5d6f1254..9d1db4a54d34 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -27,6 +27,7 @@
#include <linux/export.h>
#include <linux/rwsem.h>
#include <linux/atomic.h>
+#include <trace/events/lock.h>
#ifndef CONFIG_PREEMPT_RT
#include "lock_events.h"
@@ -375,16 +376,19 @@ rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
*
* Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of
* this function. Modify with care.
+ *
+ * Return: true if wait_list isn't empty and false otherwise
*/
-static inline void
+static inline bool
rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
{
lockdep_assert_held(&sem->wait_lock);
list_del(&waiter->list);
if (likely(!list_empty(&sem->wait_list)))
- return;
+ return true;
atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count);
+ return false;
}
/*
@@ -559,6 +563,33 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
}
/*
+ * Remove a waiter and try to wake up other waiters in the wait queue
+ * This function is called from the out_nolock path of both the reader and
+ * writer slowpaths with wait_lock held. It releases the wait_lock and
+ * optionally wake up waiters before it returns.
+ */
+static inline void
+rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
+ struct wake_q_head *wake_q)
+ __releases(&sem->wait_lock)
+{
+ bool first = rwsem_first_waiter(sem) == waiter;
+
+ wake_q_init(wake_q);
+
+ /*
+ * If the wait_list isn't empty and the waiter to be deleted is
+ * the first waiter, we wake up the remaining waiters as they may
+ * be eligible to acquire or spin on the lock.
+ */
+ if (rwsem_del_waiter(sem, waiter) && first)
+ rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q);
+ raw_spin_unlock_irq(&sem->wait_lock);
+ if (!wake_q_empty(wake_q))
+ wake_up_q(wake_q);
+}
+
+/*
* This function must be called with the sem->wait_lock held to prevent
* race conditions between checking the rwsem wait list and setting the
* sem->count accordingly.
@@ -901,7 +932,7 @@ done:
*/
static inline void clear_nonspinnable(struct rw_semaphore *sem)
{
- if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE))
+ if (unlikely(rwsem_test_oflags(sem, RWSEM_NONSPINNABLE)))
atomic_long_andnot(RWSEM_NONSPINNABLE, &sem->owner);
}
@@ -926,6 +957,31 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
#endif
/*
+ * Prepare to wake up waiter(s) in the wait queue by putting them into the
+ * given wake_q if the rwsem lock owner isn't a writer. If rwsem is likely
+ * reader-owned, wake up read lock waiters in queue front or wake up any
+ * front waiter otherwise.
+
+ * This is being called from both reader and writer slow paths.
+ */
+static inline void rwsem_cond_wake_waiter(struct rw_semaphore *sem, long count,
+ struct wake_q_head *wake_q)
+{
+ enum rwsem_wake_type wake_type;
+
+ if (count & RWSEM_WRITER_MASK)
+ return;
+
+ if (count & RWSEM_READER_MASK) {
+ wake_type = RWSEM_WAKE_READERS;
+ } else {
+ wake_type = RWSEM_WAKE_ANY;
+ clear_nonspinnable(sem);
+ }
+ rwsem_mark_wake(sem, wake_type, wake_q);
+}
+
+/*
* Wait for the read lock to be granted
*/
static struct rw_semaphore __sched *
@@ -935,7 +991,6 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
long rcnt = (count >> RWSEM_READER_SHIFT);
struct rwsem_waiter waiter;
DEFINE_WAKE_Q(wake_q);
- bool wake = false;
/*
* To prevent a constant stream of readers from starving a sleeping
@@ -977,12 +1032,11 @@ queue:
if (list_empty(&sem->wait_list)) {
/*
* In case the wait queue is empty and the lock isn't owned
- * by a writer or has the handoff bit set, this reader can
- * exit the slowpath and return immediately as its
- * RWSEM_READER_BIAS has already been set in the count.
+ * by a writer, this reader can exit the slowpath and return
+ * immediately as its RWSEM_READER_BIAS has already been set
+ * in the count.
*/
- if (!(atomic_long_read(&sem->count) &
- (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
+ if (!(atomic_long_read(&sem->count) & RWSEM_WRITER_MASK)) {
/* Provide lock ACQUIRE */
smp_acquire__after_ctrl_dep();
raw_spin_unlock_irq(&sem->wait_lock);
@@ -997,22 +1051,13 @@ queue:
/* we're now waiting on the lock, but no longer actively locking */
count = atomic_long_add_return(adjustment, &sem->count);
- /*
- * If there are no active locks, wake the front queued process(es).
- *
- * If there are no writers and we are first in the queue,
- * wake our own waiter to join the existing active readers !
- */
- if (!(count & RWSEM_LOCK_MASK)) {
- clear_nonspinnable(sem);
- wake = true;
- }
- if (wake || (!(count & RWSEM_WRITER_MASK) &&
- (adjustment & RWSEM_FLAG_WAITERS)))
- rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
-
+ rwsem_cond_wake_waiter(sem, count, &wake_q);
raw_spin_unlock_irq(&sem->wait_lock);
- wake_up_q(&wake_q);
+
+ if (!wake_q_empty(&wake_q))
+ wake_up_q(&wake_q);
+
+ trace_contention_begin(sem, LCB_F_READ);
/* wait to be given the lock */
for (;;) {
@@ -1035,13 +1080,14 @@ queue:
__set_current_state(TASK_RUNNING);
lockevent_inc(rwsem_rlock);
+ trace_contention_end(sem, 0);
return sem;
out_nolock:
- rwsem_del_waiter(sem, &waiter);
- raw_spin_unlock_irq(&sem->wait_lock);
+ rwsem_del_wake_waiter(sem, &waiter, &wake_q);
__set_current_state(TASK_RUNNING);
lockevent_inc(rwsem_rlock_fail);
+ trace_contention_end(sem, -EINTR);
return ERR_PTR(-EINTR);
}
@@ -1051,7 +1097,6 @@ out_nolock:
static struct rw_semaphore __sched *
rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
{
- long count;
struct rwsem_waiter waiter;
DEFINE_WAKE_Q(wake_q);
@@ -1075,23 +1120,8 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
/* we're now waiting on the lock */
if (rwsem_first_waiter(sem) != &waiter) {
- count = atomic_long_read(&sem->count);
-
- /*
- * If there were already threads queued before us and:
- * 1) there are no active locks, wake the front
- * queued process(es) as the handoff bit might be set.
- * 2) there are no active writers and some readers, the lock
- * must be read owned; so we try to wake any read lock
- * waiters that were queued ahead of us.
- */
- if (count & RWSEM_WRITER_MASK)
- goto wait;
-
- rwsem_mark_wake(sem, (count & RWSEM_READER_MASK)
- ? RWSEM_WAKE_READERS
- : RWSEM_WAKE_ANY, &wake_q);
-
+ rwsem_cond_wake_waiter(sem, atomic_long_read(&sem->count),
+ &wake_q);
if (!wake_q_empty(&wake_q)) {
/*
* We want to minimize wait_lock hold time especially
@@ -1099,16 +1129,16 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
*/
raw_spin_unlock_irq(&sem->wait_lock);
wake_up_q(&wake_q);
- wake_q_init(&wake_q); /* Used again, reinit */
raw_spin_lock_irq(&sem->wait_lock);
}
} else {
atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
}
-wait:
/* wait until we successfully acquire the lock */
set_current_state(state);
+ trace_contention_begin(sem, LCB_F_WRITE);
+
for (;;) {
if (rwsem_try_write_lock(sem, &waiter)) {
/* rwsem_try_write_lock() implies ACQUIRE on success */
@@ -1148,17 +1178,15 @@ trylock_again:
__set_current_state(TASK_RUNNING);
raw_spin_unlock_irq(&sem->wait_lock);
lockevent_inc(rwsem_wlock);
+ trace_contention_end(sem, 0);
return sem;
out_nolock:
__set_current_state(TASK_RUNNING);
raw_spin_lock_irq(&sem->wait_lock);
- rwsem_del_waiter(sem, &waiter);
- if (!list_empty(&sem->wait_list))
- rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
- raw_spin_unlock_irq(&sem->wait_lock);
- wake_up_q(&wake_q);
+ rwsem_del_wake_waiter(sem, &waiter, &wake_q);
lockevent_inc(rwsem_wlock_fail);
+ trace_contention_end(sem, -EINTR);
return ERR_PTR(-EINTR);
}
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index 9ee381e4d2a4..f2654d2fe43a 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -32,6 +32,7 @@
#include <linux/semaphore.h>
#include <linux/spinlock.h>
#include <linux/ftrace.h>
+#include <trace/events/lock.h>
static noinline void __down(struct semaphore *sem);
static noinline int __down_interruptible(struct semaphore *sem);
@@ -205,7 +206,7 @@ struct semaphore_waiter {
* constant, and thus optimised away by the compiler. Likewise the
* 'timeout' parameter for the cases without timeouts.
*/
-static inline int __sched __down_common(struct semaphore *sem, long state,
+static inline int __sched ___down_common(struct semaphore *sem, long state,
long timeout)
{
struct semaphore_waiter waiter;
@@ -236,6 +237,18 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
return -EINTR;
}
+static inline int __sched __down_common(struct semaphore *sem, long state,
+ long timeout)
+{
+ int ret;
+
+ trace_contention_begin(sem, 0);
+ ret = ___down_common(sem, state, timeout);
+ trace_contention_end(sem, ret);
+
+ return ret;
+}
+
static noinline void __sched __down(struct semaphore *sem)
{
__down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index d9272d9061a3..e374c0c923da 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -287,7 +287,7 @@ again:
clock = wrap_max(clock, min_clock);
clock = wrap_min(clock, max_clock);
- if (cmpxchg64(&scd->clock, old_clock, clock) != old_clock)
+ if (!try_cmpxchg64(&scd->clock, &old_clock, clock))
goto again;
return clock;
@@ -349,7 +349,7 @@ again:
val = remote_clock;
}
- if (cmpxchg64(ptr, old_val, val) != old_val)
+ if (!try_cmpxchg64(ptr, &old_val, val))
goto again;
return val;
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index ecb0d7052877..f85d7c7caab2 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -102,7 +102,7 @@ void __cpuidle default_idle_call(void)
* last -- this is very similar to the entry code.
*/
trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare(_THIS_IP_);
+ lockdep_hardirqs_on_prepare();
rcu_idle_enter();
lockdep_hardirqs_on(_THIS_IP_);
diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c
index f4938040c228..95b58bd757ce 100644
--- a/kernel/trace/trace_preemptirq.c
+++ b/kernel/trace/trace_preemptirq.c
@@ -46,7 +46,7 @@ void trace_hardirqs_on(void)
this_cpu_write(tracing_irq_cpu, 0);
}
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ lockdep_hardirqs_on_prepare();
lockdep_hardirqs_on(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_on);
@@ -94,7 +94,7 @@ __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
this_cpu_write(tracing_irq_cpu, 0);
}
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ lockdep_hardirqs_on_prepare();
lockdep_hardirqs_on(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_on_caller);