summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-05-22 14:13:22 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-05-22 14:13:22 -0700
commitf8a6e48c6c6dc30dbd423a3f4b082df625664730 (patch)
tree19b68bfffb5483634ec52f0c39a657f3cb54b929
parent5f16eb0549ab502906fb2a10147dad4b9dc185c4 (diff)
parentb9b60b3199b70fe3ce74ff493b1870ccd7554134 (diff)
downloadlinux-f8a6e48c6c6dc30dbd423a3f4b082df625664730.tar.gz
linux-f8a6e48c6c6dc30dbd423a3f4b082df625664730.tar.bz2
linux-f8a6e48c6c6dc30dbd423a3f4b082df625664730.zip
Merge local branch 'x86-codegen'
Merge trivial x86 code generation annoyances - Introduce helper macros for clang asm input problems - use said macros to improve trivially stupid code generation issues in bitops and array_index_mask_nospec - also improve codegen with 32-bit array index comparisons None of these really matter, but I look at code generation and profiles fairly regularly, and these misfeatures caused the generated code to look really odd and distract from the real issues. * branch 'x86-codegen' of local tree: x86: improve bitop code generation with clang x86: improve array_index_mask_nospec() code generation clang: work around asm input constraint problems
-rw-r--r--arch/x86/include/asm/barrier.h24
-rw-r--r--arch/x86/include/asm/bitops.h10
-rw-r--r--include/linux/compiler-clang.h10
-rw-r--r--include/linux/compiler_types.h9
4 files changed, 34 insertions, 19 deletions
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 63bdc6b85219..7b44b3c4cce1 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -33,20 +33,16 @@
* Returns:
* 0 - (index < size)
*/
-static __always_inline unsigned long array_index_mask_nospec(unsigned long index,
- unsigned long size)
-{
- unsigned long mask;
-
- asm volatile ("cmp %1,%2; sbb %0,%0;"
- :"=r" (mask)
- :"g"(size),"r" (index)
- :"cc");
- return mask;
-}
-
-/* Override the default implementation from linux/nospec.h. */
-#define array_index_mask_nospec array_index_mask_nospec
+#define array_index_mask_nospec(idx,sz) ({ \
+ typeof((idx)+(sz)) __idx = (idx); \
+ typeof(__idx) __sz = (sz); \
+ unsigned long __mask; \
+ asm volatile ("cmp %1,%2; sbb %0,%0" \
+ :"=r" (__mask) \
+ :ASM_INPUT_G (__sz), \
+ "r" (__idx) \
+ :"cc"); \
+ __mask; })
/* Prevent speculative execution past this barrier. */
#define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC)
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 990eb686ca67..b96d45944c59 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -250,7 +250,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word)
{
asm("rep; bsf %1,%0"
: "=r" (word)
- : "rm" (word));
+ : ASM_INPUT_RM (word));
return word;
}
@@ -297,7 +297,7 @@ static __always_inline unsigned long __fls(unsigned long word)
asm("bsr %1,%0"
: "=r" (word)
- : "rm" (word));
+ : ASM_INPUT_RM (word));
return word;
}
@@ -320,7 +320,7 @@ static __always_inline int variable_ffs(int x)
*/
asm("bsfl %1,%0"
: "=r" (r)
- : "rm" (x), "0" (-1));
+ : ASM_INPUT_RM (x), "0" (-1));
#elif defined(CONFIG_X86_CMOV)
asm("bsfl %1,%0\n\t"
"cmovzl %2,%0"
@@ -377,7 +377,7 @@ static __always_inline int fls(unsigned int x)
*/
asm("bsrl %1,%0"
: "=r" (r)
- : "rm" (x), "0" (-1));
+ : ASM_INPUT_RM (x), "0" (-1));
#elif defined(CONFIG_X86_CMOV)
asm("bsrl %1,%0\n\t"
"cmovzl %2,%0"
@@ -416,7 +416,7 @@ static __always_inline int fls64(__u64 x)
*/
asm("bsrq %1,%q0"
: "+r" (bitpos)
- : "rm" (x));
+ : ASM_INPUT_RM (x));
return bitpos + 1;
}
#else
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 49feac0162a5..4c1a39dcb624 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -118,3 +118,13 @@
#define __diag_ignore_all(option, comment) \
__diag_clang(13, ignore, option)
+
+/*
+ * clang has horrible behavior with "g" or "rm" constraints for asm
+ * inputs, turning them into something worse than "m". Avoid using
+ * constraints with multiple possible uses (but "ir" seems to be ok):
+ *
+ * https://github.com/llvm/llvm-project/issues/20571
+ */
+#define ASM_INPUT_G "ir"
+#define ASM_INPUT_RM "r"
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index d1a9dbb8e1a7..93600de3800b 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -409,6 +409,15 @@ struct ftrace_likely_data {
#define asm_goto_output(x...) asm volatile goto(x)
#endif
+/*
+ * Clang has trouble with constraints with multiple
+ * alternative behaviors (mainly "g" and "rm").
+ */
+#ifndef ASM_INPUT_G
+ #define ASM_INPUT_G "g"
+ #define ASM_INPUT_RM "rm"
+#endif
+
#ifdef CONFIG_CC_HAS_ASM_INLINE
#define asm_inline asm __inline
#else