From f65e4fa8e0c6022ad58dc88d1b11b12589ed7f9f Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Thu, 28 Sep 2006 01:45:21 +0100
Subject: [MIPS] Improve branch prediction in ll/sc atomic operations.

Now that finally all supported versions of binutils have functioning
support for .subsection use .subsection to tweak the branch prediction

I did not modify the R10000 errata variants because it seems unclear if
this will invalidate the workaround which actually relies on the cheesy
prediction of branch likely to cause a misspredict if the sc was
successful.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/asm-mips/atomic.h   | 50 ++++++++++++++++++++++++++++++++--------
 include/asm-mips/bitops.h   | 33 +++++++++++++++++++++-----
 include/asm-mips/spinlock.h | 56 +++++++++++++++++++++++++++++++++++----------
 include/asm-mips/system.h   | 20 ++++++++++++----
 4 files changed, 127 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/asm-mips/atomic.h b/include/asm-mips/atomic.h
index c1a2409bb52a..8578869a8bcf 100644
--- a/include/asm-mips/atomic.h
+++ b/include/asm-mips/atomic.h
@@ -69,7 +69,10 @@ static __inline__ void atomic_add(int i, atomic_t * v)
 		"1:	ll	%0, %1		# atomic_add		\n"
 		"	addu	%0, %2					\n"
 		"	sc	%0, %1					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter));
@@ -111,7 +114,10 @@ static __inline__ void atomic_sub(int i, atomic_t * v)
 		"1:	ll	%0, %1		# atomic_sub		\n"
 		"	subu	%0, %2					\n"
 		"	sc	%0, %1					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter));
@@ -155,8 +161,11 @@ static __inline__ int atomic_add_return(int i, atomic_t * v)
 		"1:	ll	%1, %2		# atomic_add_return	\n"
 		"	addu	%0, %1, %3				\n"
 		"	sc	%0, %2					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
 		"	addu	%0, %1, %3				\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -204,8 +213,11 @@ static __inline__ int atomic_sub_return(int i, atomic_t * v)
 		"1:	ll	%1, %2		# atomic_sub_return	\n"
 		"	subu	%0, %1, %3				\n"
 		"	sc	%0, %2					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
 		"	subu	%0, %1, %3				\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -267,10 +279,13 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 		"	bltz	%0, 1f					\n"
 		"	sc	%0, %2					\n"
 		"	.set	noreorder				\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
 		"	 subu	%0, %1, %3				\n"
 		"	.set	reorder					\n"
 		"1:							\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -429,7 +444,10 @@ static __inline__ void atomic64_add(long i, atomic64_t * v)
 		"1:	lld	%0, %1		# atomic64_add		\n"
 		"	addu	%0, %2					\n"
 		"	scd	%0, %1					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter));
@@ -471,7 +489,10 @@ static __inline__ void atomic64_sub(long i, atomic64_t * v)
 		"1:	lld	%0, %1		# atomic64_sub		\n"
 		"	subu	%0, %2					\n"
 		"	scd	%0, %1					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter));
@@ -515,8 +536,11 @@ static __inline__ long atomic64_add_return(long i, atomic64_t * v)
 		"1:	lld	%1, %2		# atomic64_add_return	\n"
 		"	addu	%0, %1, %3				\n"
 		"	scd	%0, %2					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
 		"	addu	%0, %1, %3				\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -564,8 +588,11 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v)
 		"1:	lld	%1, %2		# atomic64_sub_return	\n"
 		"	subu	%0, %1, %3				\n"
 		"	scd	%0, %2					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
 		"	subu	%0, %1, %3				\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -627,10 +654,13 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
 		"	bltz	%0, 1f					\n"
 		"	scd	%0, %2					\n"
 		"	.set	noreorder				\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
 		"	 dsubu	%0, %1, %3				\n"
 		"	.set	reorder					\n"
 		"1:							\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h
index 06445de1324b..06c08228a525 100644
--- a/include/asm-mips/bitops.h
+++ b/include/asm-mips/bitops.h
@@ -68,7 +68,10 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 		"1:	" __LL "%0, %1			# set_bit	\n"
 		"	or	%0, %2					\n"
 		"	" __SC	"%0, %1					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m)
 		: "ir" (1UL << (nr & SZLONG_MASK)), "m" (*m));
@@ -116,7 +119,10 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 		"1:	" __LL "%0, %1			# clear_bit	\n"
 		"	and	%0, %2					\n"
 		"	" __SC "%0, %1					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m)
 		: "ir" (~(1UL << (nr & SZLONG_MASK))), "m" (*m));
@@ -166,7 +172,10 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
 		"1:	" __LL "%0, %1		# change_bit	\n"
 		"	xor	%0, %2				\n"
 		"	" __SC	"%0, %1				\n"
-		"	beqz	%0, 1b				\n"
+		"	beqz	%0, 2f				\n"
+		"	.subsection 2				\n"
+		"2:	b	1b				\n"
+		"	.previous				\n"
 		"	.set	mips0				\n"
 		: "=&r" (temp), "=m" (*m)
 		: "ir" (1UL << (nr & SZLONG_MASK)), "m" (*m));
@@ -222,8 +231,12 @@ static inline int test_and_set_bit(unsigned long nr,
 		"1:	" __LL "%0, %1		# test_and_set_bit	\n"
 		"	or	%2, %0, %3				\n"
 		"	" __SC	"%2, %1					\n"
-		"	beqz	%2, 1b					\n"
+		"	beqz	%2, 2f					\n"
 		"	 and	%2, %0, %3				\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	 nop						\n"
+		"	.previous					\n"
 		"	.set	pop					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
@@ -290,8 +303,12 @@ static inline int test_and_clear_bit(unsigned long nr,
 		"	or	%2, %0, %3				\n"
 		"	xor	%2, %3					\n"
 		"	" __SC 	"%2, %1					\n"
-		"	beqz	%2, 1b					\n"
+		"	beqz	%2, 2f					\n"
 		"	 and	%2, %0, %3				\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	 nop						\n"
+		"	.previous					\n"
 		"	.set	pop					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
@@ -356,8 +373,12 @@ static inline int test_and_change_bit(unsigned long nr,
 		"1:	" __LL	"%0, %1		# test_and_change_bit	\n"
 		"	xor	%2, %0, %3				\n"
 		"	" __SC	"\t%2, %1				\n"
-		"	beqz	%2, 1b					\n"
+		"	beqz	%2, 2f					\n"
 		"	 and	%2, %0, %3				\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	 nop						\n"
+		"	.previous					\n"
 		"	.set	pop					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
diff --git a/include/asm-mips/spinlock.h b/include/asm-mips/spinlock.h
index fc3217fc1118..f1755d28a36a 100644
--- a/include/asm-mips/spinlock.h
+++ b/include/asm-mips/spinlock.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1999, 2000, 06 by Ralf Baechle
+ * Copyright (C) 1999, 2000, 06 Ralf Baechle (ralf@linux-mips.org)
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
  */
 #ifndef _ASM_SPINLOCK_H
@@ -49,11 +49,18 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 		__asm__ __volatile__(
 		"	.set	noreorder	# __raw_spin_lock	\n"
 		"1:	ll	%1, %2					\n"
-		"	bnez	%1, 1b					\n"
+		"	bnez	%1, 2f					\n"
 		"	 li	%1, 1					\n"
 		"	sc	%1, %0					\n"
-		"	beqz	%1, 1b					\n"
+		"	beqz	%1, 2f					\n"
 		"	 nop						\n"
+		"	.subsection 2					\n"
+		"2:	ll	%1, %2					\n"
+		"	bnez	%1, 2b					\n"
+		"	 li	%1, 1					\n"
+		"	b	1b					\n"
+		"	 nop						\n"
+		"	.previous					\n"
 		"	.set	reorder					\n"
 		: "=m" (lock->lock), "=&r" (tmp)
 		: "m" (lock->lock)
@@ -99,8 +106,12 @@ static inline unsigned int __raw_spin_trylock(raw_spinlock_t *lock)
 		"1:	ll	%0, %3					\n"
 		"	ori	%2, %0, 1				\n"
 		"	sc	%2, %1					\n"
-		"	beqz	%2, 1b					\n"
+		"	beqz	%2, 2f					\n"
 		"	 andi	%2, %0, 1				\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	 nop						\n"
+		"	.previous					\n"
 		"	.set	reorder"
 		: "=&r" (temp), "=m" (lock->lock), "=&r" (res)
 		: "m" (lock->lock)
@@ -154,11 +165,18 @@ static inline void __raw_read_lock(raw_rwlock_t *rw)
 		__asm__ __volatile__(
 		"	.set	noreorder	# __raw_read_lock	\n"
 		"1:	ll	%1, %2					\n"
-		"	bltz	%1, 1b					\n"
+		"	bltz	%1, 2f					\n"
 		"	 addu	%1, 1					\n"
 		"	sc	%1, %0					\n"
 		"	beqz	%1, 1b					\n"
 		"	 nop						\n"
+		"	.subsection 2					\n"
+		"2:	ll	%1, %2					\n"
+		"	bltz	%1, 2b					\n"
+		"	 addu	%1, 1					\n"
+		"	b	1b					\n"
+		"	 nop						\n"
+		"	.previous					\n"
 		"	.set	reorder					\n"
 		: "=m" (rw->lock), "=&r" (tmp)
 		: "m" (rw->lock)
@@ -192,8 +210,12 @@ static inline void __raw_read_unlock(raw_rwlock_t *rw)
 		"1:	ll	%1, %2					\n"
 		"	sub	%1, 1					\n"
 		"	sc	%1, %0					\n"
-		"	beqz	%1, 1b					\n"
+		"	beqz	%1, 2f					\n"
+		"	 nop						\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
 		"	 nop						\n"
+		"	.previous					\n"
 		"	.set	reorder					\n"
 		: "=m" (rw->lock), "=&r" (tmp)
 		: "m" (rw->lock)
@@ -222,11 +244,18 @@ static inline void __raw_write_lock(raw_rwlock_t *rw)
 		__asm__ __volatile__(
 		"	.set	noreorder	# __raw_write_lock	\n"
 		"1:	ll	%1, %2					\n"
-		"	bnez	%1, 1b					\n"
+		"	bnez	%1, 2f					\n"
 		"	 lui	%1, 0x8000				\n"
 		"	sc	%1, %0					\n"
-		"	beqz	%1, 1b					\n"
+		"	beqz	%1, 2f					\n"
+		"	 nop						\n"
+		"	.subsection 2					\n"
+		"2:	ll	%1, %2					\n"
+		"	bnez	%1, 2b					\n"
+		"	 lui	%1, 0x8000				\n"
+		"	b	1b					\n"
 		"	 nop						\n"
+		"	.previous					\n"
 		"	.set	reorder					\n"
 		: "=m" (rw->lock), "=&r" (tmp)
 		: "m" (rw->lock)
@@ -322,12 +351,15 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw)
 		"	bnez	%1, 2f					\n"
 		"	lui	%1, 0x8000				\n"
 		"	sc	%1, %0					\n"
-		"	beqz	%1, 1b					\n"
-		"	 nop						\n"
+		"	beqz	%1, 3f					\n"
+		"	 li	%2, 1					\n"
+		"2:							\n"
 		__WEAK_ORDERING_MB
-		"	li	%2, 1					\n"
+		"	.subsection 2					\n"
+		"3:	b	1b					\n"
+		"	 li	%2, 0					\n"
+		"	.previous					\n"
 		"	.set	reorder					\n"
-		"2:							\n"
 		: "=m" (rw->lock), "=&r" (tmp), "=&r" (ret)
 		: "m" (rw->lock)
 		: "memory");
diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h
index 5e1289c85ed9..597a3743f6a1 100644
--- a/include/asm-mips/system.h
+++ b/include/asm-mips/system.h
@@ -110,7 +110,10 @@ static inline unsigned long __xchg_u32(volatile int * m, unsigned int val)
 		"	move	%2, %z4					\n"
 		"	.set	mips3					\n"
 		"	sc	%2, %1					\n"
-		"	beqz	%2, 1b					\n"
+		"	beqz	%2, 2f					\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (retval), "=m" (*m), "=&r" (dummy)
 		: "R" (*m), "Jr" (val)
@@ -155,7 +158,10 @@ static inline __u64 __xchg_u64(volatile __u64 * m, __u64 val)
 		"1:	lld	%0, %3			# xchg_u64	\n"
 		"	move	%2, %z4					\n"
 		"	scd	%2, %1					\n"
-		"	beqz	%2, 1b					\n"
+		"	beqz	%2, 2f					\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (retval), "=m" (*m), "=&r" (dummy)
 		: "R" (*m), "Jr" (val)
@@ -232,8 +238,11 @@ static inline unsigned long __cmpxchg_u32(volatile int * m, unsigned long old,
 		"	move	$1, %z4					\n"
 		"	.set	mips3					\n"
 		"	sc	$1, %1					\n"
-		"	beqz	$1, 1b					\n"
+		"	beqz	$1, 3f					\n"
 		"2:							\n"
+		"	.subsection 2					\n"
+		"3:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	pop					\n"
 		: "=&r" (retval), "=R" (*m)
 		: "R" (*m), "Jr" (old), "Jr" (new)
@@ -283,8 +292,11 @@ static inline unsigned long __cmpxchg_u64(volatile int * m, unsigned long old,
 		"	bne	%0, %z3, 2f				\n"
 		"	move	$1, %z4					\n"
 		"	scd	$1, %1					\n"
-		"	beqz	$1, 1b					\n"
+		"	beqz	$1, 3f					\n"
 		"2:							\n"
+		"	.subsection 2					\n"
+		"3:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	pop					\n"
 		: "=&r" (retval), "=R" (*m)
 		: "R" (*m), "Jr" (old), "Jr" (new)
-- 
cgit v1.2.3