summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/lib')
-rw-r--r--arch/powerpc/lib/Makefile4
-rw-r--r--arch/powerpc/lib/code-patching.c238
-rw-r--r--arch/powerpc/lib/feature-fixups.c173
-rw-r--r--arch/powerpc/lib/qspinlock.c997
-rw-r--r--arch/powerpc/lib/sstep.c21
-rw-r--r--arch/powerpc/lib/test_emulate_step_exec_instr.S2
6 files changed, 1286 insertions, 149 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 8560c912186d..4de71cbf6e8e 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -52,7 +52,9 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
memcpy_64.o copy_mc_64.o
-ifndef CONFIG_PPC_QUEUED_SPINLOCKS
+ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+obj-$(CONFIG_SMP) += qspinlock.o
+else
obj64-$(CONFIG_SMP) += locks.o
endif
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index ad0cf3108dd0..b00112d7ad46 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -4,12 +4,17 @@
*/
#include <linux/kprobes.h>
+#include <linux/mmu_context.h>
+#include <linux/random.h>
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/cpuhotplug.h>
#include <linux/uaccess.h>
#include <linux/jump_label.h>
+#include <asm/debug.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/page.h>
#include <asm/code-patching.h>
@@ -41,12 +46,59 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
return __patch_instruction(addr, instr, addr);
}
-#ifdef CONFIG_STRICT_KERNEL_RWX
-static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
+struct patch_context {
+ union {
+ struct vm_struct *area;
+ struct mm_struct *mm;
+ };
+ unsigned long addr;
+ pte_t *pte;
+};
+
+static DEFINE_PER_CPU(struct patch_context, cpu_patching_context);
static int map_patch_area(void *addr, unsigned long text_poke_addr);
static void unmap_patch_area(unsigned long addr);
+static bool mm_patch_enabled(void)
+{
+ return IS_ENABLED(CONFIG_SMP) && radix_enabled();
+}
+
+/*
+ * The following applies for Radix MMU. Hash MMU has different requirements,
+ * and so is not supported.
+ *
+ * Changing mm requires context synchronising instructions on both sides of
+ * the context switch, as well as a hwsync between the last instruction for
+ * which the address of an associated storage access was translated using
+ * the current context.
+ *
+ * switch_mm_irqs_off() performs an isync after the context switch. It is
+ * the responsibility of the caller to perform the CSI and hwsync before
+ * starting/stopping the temp mm.
+ */
+static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm)
+{
+ struct mm_struct *orig_mm = current->active_mm;
+
+ lockdep_assert_irqs_disabled();
+ switch_mm_irqs_off(orig_mm, temp_mm, current);
+
+ WARN_ON(!mm_is_thread_local(temp_mm));
+
+ suspend_breakpoints();
+ return orig_mm;
+}
+
+static void stop_using_temp_mm(struct mm_struct *temp_mm,
+ struct mm_struct *orig_mm)
+{
+ lockdep_assert_irqs_disabled();
+ switch_mm_irqs_off(temp_mm, orig_mm, current);
+ restore_breakpoints();
+}
+
static int text_area_cpu_up(unsigned int cpu)
{
struct vm_struct *area;
@@ -68,29 +120,108 @@ static int text_area_cpu_up(unsigned int cpu)
unmap_patch_area(addr);
- this_cpu_write(text_poke_area, area);
+ this_cpu_write(cpu_patching_context.area, area);
+ this_cpu_write(cpu_patching_context.addr, addr);
+ this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr));
return 0;
}
static int text_area_cpu_down(unsigned int cpu)
{
- free_vm_area(this_cpu_read(text_poke_area));
+ free_vm_area(this_cpu_read(cpu_patching_context.area));
+ this_cpu_write(cpu_patching_context.area, NULL);
+ this_cpu_write(cpu_patching_context.addr, 0);
+ this_cpu_write(cpu_patching_context.pte, NULL);
+ return 0;
+}
+
+static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr)
+{
+ struct mmu_gather tlb;
+
+ tlb_gather_mmu(&tlb, mm);
+ free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0);
+ mmput(mm);
+}
+
+static int text_area_cpu_up_mm(unsigned int cpu)
+{
+ struct mm_struct *mm;
+ unsigned long addr;
+ pte_t *pte;
+ spinlock_t *ptl;
+
+ mm = mm_alloc();
+ if (WARN_ON(!mm))
+ goto fail_no_mm;
+
+ /*
+ * Choose a random page-aligned address from the interval
+ * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE].
+ * The lower address bound is PAGE_SIZE to avoid the zero-page.
+ */
+ addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT;
+
+ /*
+ * PTE allocation uses GFP_KERNEL which means we need to
+ * pre-allocate the PTE here because we cannot do the
+ * allocation during patching when IRQs are disabled.
+ *
+ * Using get_locked_pte() to avoid open coding, the lock
+ * is unnecessary.
+ */
+ pte = get_locked_pte(mm, addr, &ptl);
+ if (!pte)
+ goto fail_no_pte;
+ pte_unmap_unlock(pte, ptl);
+
+ this_cpu_write(cpu_patching_context.mm, mm);
+ this_cpu_write(cpu_patching_context.addr, addr);
+
+ return 0;
+
+fail_no_pte:
+ put_patching_mm(mm, addr);
+fail_no_mm:
+ return -ENOMEM;
+}
+
+static int text_area_cpu_down_mm(unsigned int cpu)
+{
+ put_patching_mm(this_cpu_read(cpu_patching_context.mm),
+ this_cpu_read(cpu_patching_context.addr));
+
+ this_cpu_write(cpu_patching_context.mm, NULL);
+ this_cpu_write(cpu_patching_context.addr, 0);
+
return 0;
}
static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done);
-/*
- * Although BUG_ON() is rude, in this case it should only happen if ENOMEM, and
- * we judge it as being preferable to a kernel that will crash later when
- * someone tries to use patch_instruction().
- */
void __init poking_init(void)
{
- BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
- "powerpc/text_poke:online", text_area_cpu_up,
- text_area_cpu_down));
+ int ret;
+
+ if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+ return;
+
+ if (mm_patch_enabled())
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "powerpc/text_poke_mm:online",
+ text_area_cpu_up_mm,
+ text_area_cpu_down_mm);
+ else
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "powerpc/text_poke:online",
+ text_area_cpu_up,
+ text_area_cpu_down);
+
+ /* cpuhp_setup_state returns >= 0 on success */
+ if (WARN_ON(ret < 0))
+ return;
+
static_branch_enable(&poking_init_done);
}
@@ -147,6 +278,56 @@ static void unmap_patch_area(unsigned long addr)
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
}
+static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
+{
+ int err;
+ u32 *patch_addr;
+ unsigned long text_poke_addr;
+ pte_t *pte;
+ unsigned long pfn = get_patch_pfn(addr);
+ struct mm_struct *patching_mm;
+ struct mm_struct *orig_mm;
+ spinlock_t *ptl;
+
+ patching_mm = __this_cpu_read(cpu_patching_context.mm);
+ text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
+ patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+
+ pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
+ if (!pte)
+ return -ENOMEM;
+
+ __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+
+ /* order PTE update before use, also serves as the hwsync */
+ asm volatile("ptesync": : :"memory");
+
+ /* order context switch after arbitrary prior code */
+ isync();
+
+ orig_mm = start_using_temp_mm(patching_mm);
+
+ err = __patch_instruction(addr, instr, patch_addr);
+
+ /* hwsync performed by __patch_instruction (sync) if successful */
+ if (err)
+ mb(); /* sync */
+
+ /* context synchronisation performed by __patch_instruction (isync or exception) */
+ stop_using_temp_mm(patching_mm, orig_mm);
+
+ pte_clear(patching_mm, text_poke_addr, pte);
+ /*
+ * ptesync to order PTE update before TLB invalidation done
+ * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
+ */
+ local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
+
+ pte_unmap_unlock(pte, ptl);
+
+ return err;
+}
+
static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
{
int err;
@@ -155,10 +336,10 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
pte_t *pte;
unsigned long pfn = get_patch_pfn(addr);
- text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK;
+ text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
- pte = virt_to_kpte(text_poke_addr);
+ pte = __this_cpu_read(cpu_patching_context.pte);
__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
/* See ptesync comment in radix__set_pte_at() */
if (radix_enabled())
@@ -172,7 +353,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
return err;
}
-static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
+int patch_instruction(u32 *addr, ppc_inst_t instr)
{
int err;
unsigned long flags;
@@ -182,34 +363,19 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
* when text_poke_area is not ready, but we still need
* to allow patching. We just do the plain old patching
*/
- if (!static_branch_likely(&poking_init_done))
+ if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) ||
+ !static_branch_likely(&poking_init_done))
return raw_patch_instruction(addr, instr);
local_irq_save(flags);
- err = __do_patch_instruction(addr, instr);
+ if (mm_patch_enabled())
+ err = __do_patch_instruction_mm(addr, instr);
+ else
+ err = __do_patch_instruction(addr, instr);
local_irq_restore(flags);
return err;
}
-#else /* !CONFIG_STRICT_KERNEL_RWX */
-
-static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
-{
- return raw_patch_instruction(addr, instr);
-}
-
-#endif /* CONFIG_STRICT_KERNEL_RWX */
-
-__ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free);
-
-int patch_instruction(u32 *addr, ppc_inst_t instr)
-{
- /* Make sure we aren't patching a freed init section */
- if (static_branch_likely(&init_mem_is_free) && init_section_contains(addr, 4))
- return 0;
-
- return do_patch_instruction(addr, instr);
-}
NOKPROBE_SYMBOL(patch_instruction);
int patch_branch(u32 *addr, unsigned long target, int flags)
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 31f40f544de5..80def1c2afcb 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -117,10 +117,64 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
}
}
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+static bool is_fixup_addr_valid(void *dest, size_t size)
+{
+ return system_state < SYSTEM_FREEING_INITMEM ||
+ !init_section_contains(dest, size);
+}
+
+static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num)
+{
+ int i;
+
+ for (i = 0; start < end; start++, i++) {
+ int j;
+ unsigned int *dest = (void *)start + *start;
+
+ if (!is_fixup_addr_valid(dest, sizeof(*instrs) * num))
+ continue;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ for (j = 0; j < num; j++)
+ patch_instruction(dest + j, ppc_inst(instrs[j]));
+ }
+ return i;
+}
+#endif
+
#ifdef CONFIG_PPC_BOOK3S_64
+static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs,
+ bool do_fallback, void *fallback)
+{
+ int i;
+
+ for (i = 0; start < end; start++, i++) {
+ unsigned int *dest = (void *)start + *start;
+
+ if (!is_fixup_addr_valid(dest, sizeof(*instrs) * 3))
+ continue;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ // See comment in do_entry_flush_fixups() RE order of patching
+ if (do_fallback) {
+ patch_instruction(dest, ppc_inst(instrs[0]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_branch(dest + 1, (unsigned long)fallback, BRANCH_SET_LINK);
+ } else {
+ patch_instruction(dest + 1, ppc_inst(instrs[1]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_instruction(dest, ppc_inst(instrs[0]));
+ }
+ }
+ return i;
+}
+
static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
{
- unsigned int instrs[3], *dest;
+ unsigned int instrs[3];
long *start, *end;
int i;
@@ -144,23 +198,8 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
}
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
-
- // See comment in do_entry_flush_fixups() RE order of patching
- if (types & STF_BARRIER_FALLBACK) {
- patch_instruction(dest, ppc_inst(instrs[0]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- patch_branch(dest + 1,
- (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK);
- } else {
- patch_instruction(dest + 1, ppc_inst(instrs[1]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- patch_instruction(dest, ppc_inst(instrs[0]));
- }
- }
+ i = do_patch_entry_fixups(start, end, instrs, types & STF_BARRIER_FALLBACK,
+ &stf_barrier_fallback);
printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
(types == STF_BARRIER_NONE) ? "no" :
@@ -172,7 +211,7 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
{
- unsigned int instrs[6], *dest;
+ unsigned int instrs[6];
long *start, *end;
int i;
@@ -206,18 +245,8 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */
}
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
+ i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
- pr_devel("patching dest %lx\n", (unsigned long)dest);
-
- patch_instruction(dest, ppc_inst(instrs[0]));
- patch_instruction(dest + 1, ppc_inst(instrs[1]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- patch_instruction(dest + 3, ppc_inst(instrs[3]));
- patch_instruction(dest + 4, ppc_inst(instrs[4]));
- patch_instruction(dest + 5, ppc_inst(instrs[5]));
- }
printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i,
(types == STF_BARRIER_NONE) ? "no" :
(types == STF_BARRIER_FALLBACK) ? "fallback" :
@@ -274,7 +303,7 @@ void do_stf_barrier_fixups(enum stf_barrier_type types)
void do_uaccess_flush_fixups(enum l1d_flush_type types)
{
- unsigned int instrs[4], *dest;
+ unsigned int instrs[4];
long *start, *end;
int i;
@@ -300,17 +329,7 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types)
if (types & L1D_FLUSH_MTTRIG)
instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
-
- patch_instruction(dest, ppc_inst(instrs[0]));
-
- patch_instruction(dest + 1, ppc_inst(instrs[1]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- patch_instruction(dest + 3, ppc_inst(instrs[3]));
- }
+ i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i,
(types == L1D_FLUSH_NONE) ? "no" :
@@ -325,7 +344,7 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types)
static int __do_entry_flush_fixups(void *data)
{
enum l1d_flush_type types = *(enum l1d_flush_type *)data;
- unsigned int instrs[3], *dest;
+ unsigned int instrs[3];
long *start, *end;
int i;
@@ -375,42 +394,13 @@ static int __do_entry_flush_fixups(void *data)
start = PTRRELOC(&__start___entry_flush_fixup);
end = PTRRELOC(&__stop___entry_flush_fixup);
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
-
- if (types == L1D_FLUSH_FALLBACK) {
- patch_instruction(dest, ppc_inst(instrs[0]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- patch_branch(dest + 1,
- (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK);
- } else {
- patch_instruction(dest + 1, ppc_inst(instrs[1]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- patch_instruction(dest, ppc_inst(instrs[0]));
- }
- }
+ i = do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK,
+ &entry_flush_fallback);
start = PTRRELOC(&__start___scv_entry_flush_fixup);
end = PTRRELOC(&__stop___scv_entry_flush_fixup);
- for (; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
-
- if (types == L1D_FLUSH_FALLBACK) {
- patch_instruction(dest, ppc_inst(instrs[0]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- patch_branch(dest + 1,
- (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK);
- } else {
- patch_instruction(dest + 1, ppc_inst(instrs[1]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- patch_instruction(dest, ppc_inst(instrs[0]));
- }
- }
-
+ i += do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK,
+ &scv_entry_flush_fallback);
printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i,
(types == L1D_FLUSH_NONE) ? "no" :
@@ -438,7 +428,7 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
static int __do_rfi_flush_fixups(void *data)
{
enum l1d_flush_type types = *(enum l1d_flush_type *)data;
- unsigned int instrs[3], *dest;
+ unsigned int instrs[3];
long *start, *end;
int i;
@@ -462,15 +452,7 @@ static int __do_rfi_flush_fixups(void *data)
if (types & L1D_FLUSH_MTTRIG)
instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
-
- patch_instruction(dest, ppc_inst(instrs[0]));
- patch_instruction(dest + 1, ppc_inst(instrs[1]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- }
+ i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i,
(types == L1D_FLUSH_NONE) ? "no" :
@@ -512,7 +494,7 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
{
- unsigned int instr, *dest;
+ unsigned int instr;
long *start, *end;
int i;
@@ -526,12 +508,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_
instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
}
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
- patch_instruction(dest, ppc_inst(instr));
- }
+ i = do_patch_fixups(start, end, &instr, 1);
printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
}
@@ -553,7 +530,7 @@ void do_barrier_nospec_fixups(bool enable)
#ifdef CONFIG_PPC_E500
void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
{
- unsigned int instr[2], *dest;
+ unsigned int instr[2];
long *start, *end;
int i;
@@ -569,13 +546,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_
instr[1] = PPC_RAW_SYNC();
}
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
- patch_instruction(dest, ppc_inst(instr[0]));
- patch_instruction(dest + 1, ppc_inst(instr[1]));
- }
+ i = do_patch_fixups(start, end, instr, ARRAY_SIZE(instr));
printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
}
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
new file mode 100644
index 000000000000..e4bd145255d0
--- /dev/null
+++ b/arch/powerpc/lib/qspinlock.c
@@ -0,0 +1,997 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+#include <linux/percpu.h>
+#include <linux/processor.h>
+#include <linux/smp.h>
+#include <linux/topology.h>
+#include <linux/sched/clock.h>
+#include <asm/qspinlock.h>
+#include <asm/paravirt.h>
+
+#define MAX_NODES 4
+
+struct qnode {
+ struct qnode *next;
+ struct qspinlock *lock;
+ int cpu;
+ int yield_cpu;
+ u8 locked; /* 1 if lock acquired */
+};
+
+struct qnodes {
+ int count;
+ struct qnode nodes[MAX_NODES];
+};
+
+/* Tuning parameters */
+static int steal_spins __read_mostly = (1 << 5);
+static int remote_steal_spins __read_mostly = (1 << 2);
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+static const bool maybe_stealers = true;
+#else
+static bool maybe_stealers __read_mostly = true;
+#endif
+static int head_spins __read_mostly = (1 << 8);
+
+static bool pv_yield_owner __read_mostly = true;
+static bool pv_yield_allow_steal __read_mostly = false;
+static bool pv_spin_on_preempted_owner __read_mostly = false;
+static bool pv_sleepy_lock __read_mostly = true;
+static bool pv_sleepy_lock_sticky __read_mostly = false;
+static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
+static int pv_sleepy_lock_factor __read_mostly = 256;
+static bool pv_yield_prev __read_mostly = true;
+static bool pv_yield_propagate_owner __read_mostly = true;
+static bool pv_prod_head __read_mostly = false;
+
+static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
+static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
+
+#if _Q_SPIN_SPEC_BARRIER == 1
+#define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
+#else
+#define spec_barrier() do { } while (0)
+#endif
+
+static __always_inline bool recently_sleepy(void)
+{
+ /* pv_sleepy_lock is true when this is called */
+ if (pv_sleepy_lock_interval_ns) {
+ u64 seen = this_cpu_read(sleepy_lock_seen_clock);
+
+ if (seen) {
+ u64 delta = sched_clock() - seen;
+ if (delta < pv_sleepy_lock_interval_ns)
+ return true;
+ this_cpu_write(sleepy_lock_seen_clock, 0);
+ }
+ }
+
+ return false;
+}
+
+static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
+{
+ if (paravirt && sleepy)
+ return steal_spins * pv_sleepy_lock_factor;
+ else
+ return steal_spins;
+}
+
+static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
+{
+ if (paravirt && sleepy)
+ return remote_steal_spins * pv_sleepy_lock_factor;
+ else
+ return remote_steal_spins;
+}
+
+static __always_inline int get_head_spins(bool paravirt, bool sleepy)
+{
+ if (paravirt && sleepy)
+ return head_spins * pv_sleepy_lock_factor;
+ else
+ return head_spins;
+}
+
+static inline u32 encode_tail_cpu(int cpu)
+{
+ return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
+}
+
+static inline int decode_tail_cpu(u32 val)
+{
+ return (val >> _Q_TAIL_CPU_OFFSET) - 1;
+}
+
+static inline int get_owner_cpu(u32 val)
+{
+ return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
+}
+
+/*
+ * Try to acquire the lock if it was not already locked. If the tail matches
+ * mytail then clear it, otherwise leave it unchnaged. Return previous value.
+ *
+ * This is used by the head of the queue to acquire the lock and clean up
+ * its tail if it was the last one queued.
+ */
+static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
+{
+ u32 newval = queued_spin_encode_locked_val();
+ u32 prev, tmp;
+
+ asm volatile(
+"1: lwarx %0,0,%2,%7 # trylock_clean_tail \n"
+ /* This test is necessary if there could be stealers */
+" andi. %1,%0,%5 \n"
+" bne 3f \n"
+ /* Test whether the lock tail == mytail */
+" and %1,%0,%6 \n"
+" cmpw 0,%1,%3 \n"
+ /* Merge the new locked value */
+" or %1,%1,%4 \n"
+" bne 2f \n"
+ /* If the lock tail matched, then clear it, otherwise leave it. */
+" andc %1,%1,%6 \n"
+"2: stwcx. %1,0,%2 \n"
+" bne- 1b \n"
+"\t" PPC_ACQUIRE_BARRIER " \n"
+"3: \n"
+ : "=&r" (prev), "=&r" (tmp)
+ : "r" (&lock->val), "r"(tail), "r" (newval),
+ "i" (_Q_LOCKED_VAL),
+ "r" (_Q_TAIL_CPU_MASK),
+ "i" (_Q_SPIN_EH_HINT)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+/*
+ * Publish our tail, replacing previous tail. Return previous value.
+ *
+ * This provides a release barrier for publishing node, this pairs with the
+ * acquire barrier in get_tail_qnode() when the next CPU finds this tail
+ * value.
+ */
+static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
+{
+ u32 prev, tmp;
+
+ asm volatile(
+"\t" PPC_RELEASE_BARRIER " \n"
+"1: lwarx %0,0,%2 # publish_tail_cpu \n"
+" andc %1,%0,%4 \n"
+" or %1,%1,%3 \n"
+" stwcx. %1,0,%2 \n"
+" bne- 1b \n"
+ : "=&r" (prev), "=&r"(tmp)
+ : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+static __always_inline u32 set_mustq(struct qspinlock *lock)
+{
+ u32 prev;
+
+ asm volatile(
+"1: lwarx %0,0,%1 # set_mustq \n"
+" or %0,%0,%2 \n"
+" stwcx. %0,0,%1 \n"
+" bne- 1b \n"
+ : "=&r" (prev)
+ : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+static __always_inline u32 clear_mustq(struct qspinlock *lock)
+{
+ u32 prev;
+
+ asm volatile(
+"1: lwarx %0,0,%1 # clear_mustq \n"
+" andc %0,%0,%2 \n"
+" stwcx. %0,0,%1 \n"
+" bne- 1b \n"
+ : "=&r" (prev)
+ : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old)
+{
+ u32 prev;
+ u32 new = old | _Q_SLEEPY_VAL;
+
+ BUG_ON(!(old & _Q_LOCKED_VAL));
+ BUG_ON(old & _Q_SLEEPY_VAL);
+
+ asm volatile(
+"1: lwarx %0,0,%1 # try_set_sleepy \n"
+" cmpw 0,%0,%2 \n"
+" bne- 2f \n"
+" stwcx. %3,0,%1 \n"
+" bne- 1b \n"
+"2: \n"
+ : "=&r" (prev)
+ : "r" (&lock->val), "r"(old), "r" (new)
+ : "cr0", "memory");
+
+ return likely(prev == old);
+}
+
+static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
+{
+ if (pv_sleepy_lock) {
+ if (pv_sleepy_lock_interval_ns)
+ this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+ if (!(val & _Q_SLEEPY_VAL))
+ try_set_sleepy(lock, val);
+ }
+}
+
+static __always_inline void seen_sleepy_lock(void)
+{
+ if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
+ this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+}
+
+static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val)
+{
+ if (pv_sleepy_lock) {
+ if (pv_sleepy_lock_interval_ns)
+ this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+ if (val & _Q_LOCKED_VAL) {
+ if (!(val & _Q_SLEEPY_VAL))
+ try_set_sleepy(lock, val);
+ }
+ }
+}
+
+static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
+{
+ int cpu = decode_tail_cpu(val);
+ struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu);
+ int idx;
+
+ /*
+ * After publishing the new tail and finding a previous tail in the
+ * previous val (which is the control dependency), this barrier
+ * orders the release barrier in publish_tail_cpu performed by the
+ * last CPU, with subsequently looking at its qnode structures
+ * after the barrier.
+ */
+ smp_acquire__after_ctrl_dep();
+
+ for (idx = 0; idx < MAX_NODES; idx++) {
+ struct qnode *qnode = &qnodesp->nodes[idx];
+ if (qnode->lock == lock)
+ return qnode;
+ }
+
+ BUG();
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
+{
+ int owner;
+ u32 yield_count;
+ bool preempted = false;
+
+ BUG_ON(!(val & _Q_LOCKED_VAL));
+
+ if (!paravirt)
+ goto relax;
+
+ if (!pv_yield_owner)
+ goto relax;
+
+ owner = get_owner_cpu(val);
+ yield_count = yield_count_of(owner);
+
+ if ((yield_count & 1) == 0)
+ goto relax; /* owner vcpu is running */
+
+ spin_end();
+
+ seen_sleepy_owner(lock, val);
+ preempted = true;
+
+ /*
+ * Read the lock word after sampling the yield count. On the other side
+ * there may a wmb because the yield count update is done by the
+ * hypervisor preemption and the value update by the OS, however this
+ * ordering might reduce the chance of out of order accesses and
+ * improve the heuristic.
+ */
+ smp_rmb();
+
+ if (READ_ONCE(lock->val) == val) {
+ if (mustq)
+ clear_mustq(lock);
+ yield_to_preempted(owner, yield_count);
+ if (mustq)
+ set_mustq(lock);
+ spin_begin();
+
+ /* Don't relax if we yielded. Maybe we should? */
+ return preempted;
+ }
+ spin_begin();
+relax:
+ spin_cpu_relax();
+
+ return preempted;
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+{
+ return __yield_to_locked_owner(lock, val, paravirt, false);
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+{
+ bool mustq = false;
+
+ if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
+ mustq = true;
+
+ return __yield_to_locked_owner(lock, val, paravirt, mustq);
+}
+
+static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
+{
+ struct qnode *next;
+ int owner;
+
+ if (!paravirt)
+ return;
+ if (!pv_yield_propagate_owner)
+ return;
+
+ owner = get_owner_cpu(val);
+ if (*set_yield_cpu == owner)
+ return;
+
+ next = READ_ONCE(node->next);
+ if (!next)
+ return;
+
+ if (vcpu_is_preempted(owner)) {
+ next->yield_cpu = owner;
+ *set_yield_cpu = owner;
+ } else if (*set_yield_cpu != -1) {
+ next->yield_cpu = owner;
+ *set_yield_cpu = owner;
+ }
+}
+
+/* Called inside spin_begin() */
+static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt)
+{
+ int prev_cpu = decode_tail_cpu(val);
+ u32 yield_count;
+ int yield_cpu;
+ bool preempted = false;
+
+ if (!paravirt)
+ goto relax;
+
+ if (!pv_yield_propagate_owner)
+ goto yield_prev;
+
+ yield_cpu = READ_ONCE(node->yield_cpu);
+ if (yield_cpu == -1) {
+ /* Propagate back the -1 CPU */
+ if (node->next && node->next->yield_cpu != -1)
+ node->next->yield_cpu = yield_cpu;
+ goto yield_prev;
+ }
+
+ yield_count = yield_count_of(yield_cpu);
+ if ((yield_count & 1) == 0)
+ goto yield_prev; /* owner vcpu is running */
+
+ spin_end();
+
+ preempted = true;
+ seen_sleepy_node(lock, val);
+
+ smp_rmb();
+
+ if (yield_cpu == node->yield_cpu) {
+ if (node->next && node->next->yield_cpu != yield_cpu)
+ node->next->yield_cpu = yield_cpu;
+ yield_to_preempted(yield_cpu, yield_count);
+ spin_begin();
+ return preempted;
+ }
+ spin_begin();
+
+yield_prev:
+ if (!pv_yield_prev)
+ goto relax;
+
+ yield_count = yield_count_of(prev_cpu);
+ if ((yield_count & 1) == 0)
+ goto relax; /* owner vcpu is running */
+
+ spin_end();
+
+ preempted = true;
+ seen_sleepy_node(lock, val);
+
+ smp_rmb(); /* See __yield_to_locked_owner comment */
+
+ if (!node->locked) {
+ yield_to_preempted(prev_cpu, yield_count);
+ spin_begin();
+ return preempted;
+ }
+ spin_begin();
+
+relax:
+ spin_cpu_relax();
+
+ return preempted;
+}
+
+static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
+{
+ if (iters >= get_steal_spins(paravirt, sleepy))
+ return true;
+
+ if (IS_ENABLED(CONFIG_NUMA) &&
+ (iters >= get_remote_steal_spins(paravirt, sleepy))) {
+ int cpu = get_owner_cpu(val);
+ if (numa_node_id() != cpu_to_node(cpu))
+ return true;
+ }
+ return false;
+}
+
+static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
+{
+ bool seen_preempted = false;
+ bool sleepy = false;
+ int iters = 0;
+ u32 val;
+
+ if (!steal_spins) {
+ /* XXX: should spin_on_preempted_owner do anything here? */
+ return false;
+ }
+
+ /* Attempt to steal the lock */
+ spin_begin();
+ do {
+ bool preempted = false;
+
+ val = READ_ONCE(lock->val);
+ if (val & _Q_MUST_Q_VAL)
+ break;
+ spec_barrier();
+
+ if (unlikely(!(val & _Q_LOCKED_VAL))) {
+ spin_end();
+ if (__queued_spin_trylock_steal(lock))
+ return true;
+ spin_begin();
+ } else {
+ preempted = yield_to_locked_owner(lock, val, paravirt);
+ }
+
+ if (paravirt && pv_sleepy_lock) {
+ if (!sleepy) {
+ if (val & _Q_SLEEPY_VAL) {
+ seen_sleepy_lock();
+ sleepy = true;
+ } else if (recently_sleepy()) {
+ sleepy = true;
+ }
+ }
+ if (pv_sleepy_lock_sticky && seen_preempted &&
+ !(val & _Q_SLEEPY_VAL)) {
+ if (try_set_sleepy(lock, val))
+ val |= _Q_SLEEPY_VAL;
+ }
+ }
+
+ if (preempted) {
+ seen_preempted = true;
+ sleepy = true;
+ if (!pv_spin_on_preempted_owner)
+ iters++;
+ /*
+ * pv_spin_on_preempted_owner don't increase iters
+ * while the owner is preempted -- we won't interfere
+ * with it by definition. This could introduce some
+ * latency issue if we continually observe preempted
+ * owners, but hopefully that's a rare corner case of
+ * a badly oversubscribed system.
+ */
+ } else {
+ iters++;
+ }
+ } while (!steal_break(val, iters, paravirt, sleepy));
+
+ spin_end();
+
+ return false;
+}
+
+static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt)
+{
+ struct qnodes *qnodesp;
+ struct qnode *next, *node;
+ u32 val, old, tail;
+ bool seen_preempted = false;
+ bool sleepy = false;
+ bool mustq = false;
+ int idx;
+ int set_yield_cpu = -1;
+ int iters = 0;
+
+ BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
+
+ qnodesp = this_cpu_ptr(&qnodes);
+ if (unlikely(qnodesp->count >= MAX_NODES)) {
+ spec_barrier();
+ while (!queued_spin_trylock(lock))
+ cpu_relax();
+ return;
+ }
+
+ idx = qnodesp->count++;
+ /*
+ * Ensure that we increment the head node->count before initialising
+ * the actual node. If the compiler is kind enough to reorder these
+ * stores, then an IRQ could overwrite our assignments.
+ */
+ barrier();
+ node = &qnodesp->nodes[idx];
+ node->next = NULL;
+ node->lock = lock;
+ node->cpu = smp_processor_id();
+ node->yield_cpu = -1;
+ node->locked = 0;
+
+ tail = encode_tail_cpu(node->cpu);
+
+ old = publish_tail_cpu(lock, tail);
+
+ /*
+ * If there was a previous node; link it and wait until reaching the
+ * head of the waitqueue.
+ */
+ if (old & _Q_TAIL_CPU_MASK) {
+ struct qnode *prev = get_tail_qnode(lock, old);
+
+ /* Link @node into the waitqueue. */
+ WRITE_ONCE(prev->next, node);
+
+ /* Wait for mcs node lock to be released */
+ spin_begin();
+ while (!node->locked) {
+ spec_barrier();
+
+ if (yield_to_prev(lock, node, old, paravirt))
+ seen_preempted = true;
+ }
+ spec_barrier();
+ spin_end();
+
+ /* Clear out stale propagated yield_cpu */
+ if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1)
+ node->yield_cpu = -1;
+
+ smp_rmb(); /* acquire barrier for the mcs lock */
+
+ /*
+ * Generic qspinlocks have this prefetch here, but it seems
+ * like it could cause additional line transitions because
+ * the waiter will keep loading from it.
+ */
+ if (_Q_SPIN_PREFETCH_NEXT) {
+ next = READ_ONCE(node->next);
+ if (next)
+ prefetchw(next);
+ }
+ }
+
+ /* We're at the head of the waitqueue, wait for the lock. */
+again:
+ spin_begin();
+ for (;;) {
+ bool preempted;
+
+ val = READ_ONCE(lock->val);
+ if (!(val & _Q_LOCKED_VAL))
+ break;
+ spec_barrier();
+
+ if (paravirt && pv_sleepy_lock && maybe_stealers) {
+ if (!sleepy) {
+ if (val & _Q_SLEEPY_VAL) {
+ seen_sleepy_lock();
+ sleepy = true;
+ } else if (recently_sleepy()) {
+ sleepy = true;
+ }
+ }
+ if (pv_sleepy_lock_sticky && seen_preempted &&
+ !(val & _Q_SLEEPY_VAL)) {
+ if (try_set_sleepy(lock, val))
+ val |= _Q_SLEEPY_VAL;
+ }
+ }
+
+ propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
+ preempted = yield_head_to_locked_owner(lock, val, paravirt);
+ if (!maybe_stealers)
+ continue;
+
+ if (preempted)
+ seen_preempted = true;
+
+ if (paravirt && preempted) {
+ sleepy = true;
+
+ if (!pv_spin_on_preempted_owner)
+ iters++;
+ } else {
+ iters++;
+ }
+
+ if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
+ mustq = true;
+ set_mustq(lock);
+ val |= _Q_MUST_Q_VAL;
+ }
+ }
+ spec_barrier();
+ spin_end();
+
+ /* If we're the last queued, must clean up the tail. */
+ old = trylock_clean_tail(lock, tail);
+ if (unlikely(old & _Q_LOCKED_VAL)) {
+ BUG_ON(!maybe_stealers);
+ goto again; /* Can only be true if maybe_stealers. */
+ }
+
+ if ((old & _Q_TAIL_CPU_MASK) == tail)
+ goto release; /* We were the tail, no next. */
+
+ /* There is a next, must wait for node->next != NULL (MCS protocol) */
+ next = READ_ONCE(node->next);
+ if (!next) {
+ spin_begin();
+ while (!(next = READ_ONCE(node->next)))
+ cpu_relax();
+ spin_end();
+ }
+ spec_barrier();
+
+ /*
+ * Unlock the next mcs waiter node. Release barrier is not required
+ * here because the acquirer is only accessing the lock word, and
+ * the acquire barrier we took the lock with orders that update vs
+ * this store to locked. The corresponding barrier is the smp_rmb()
+ * acquire barrier for mcs lock, above.
+ */
+ if (paravirt && pv_prod_head) {
+ int next_cpu = next->cpu;
+ WRITE_ONCE(next->locked, 1);
+ if (_Q_SPIN_MISO)
+ asm volatile("miso" ::: "memory");
+ if (vcpu_is_preempted(next_cpu))
+ prod_cpu(next_cpu);
+ } else {
+ WRITE_ONCE(next->locked, 1);
+ if (_Q_SPIN_MISO)
+ asm volatile("miso" ::: "memory");
+ }
+
+release:
+ qnodesp->count--; /* release the node */
+}
+
+void queued_spin_lock_slowpath(struct qspinlock *lock)
+{
+ /*
+ * This looks funny, but it induces the compiler to inline both
+ * sides of the branch rather than share code as when the condition
+ * is passed as the paravirt argument to the functions.
+ */
+ if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
+ if (try_to_steal_lock(lock, true)) {
+ spec_barrier();
+ return;
+ }
+ queued_spin_lock_mcs_queue(lock, true);
+ } else {
+ if (try_to_steal_lock(lock, false)) {
+ spec_barrier();
+ return;
+ }
+ queued_spin_lock_mcs_queue(lock, false);
+ }
+}
+EXPORT_SYMBOL(queued_spin_lock_slowpath);
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void pv_spinlocks_init(void)
+{
+}
+#endif
+
+#include <linux/debugfs.h>
+static int steal_spins_set(void *data, u64 val)
+{
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+ /* MAYBE_STEAL remains true */
+ steal_spins = val;
+#else
+ static DEFINE_MUTEX(lock);
+
+ /*
+ * The lock slow path has a !maybe_stealers case that can assume
+ * the head of queue will not see concurrent waiters. That waiter
+ * is unsafe in the presence of stealers, so must keep them away
+ * from one another.
+ */
+
+ mutex_lock(&lock);
+ if (val && !steal_spins) {
+ maybe_stealers = true;
+ /* wait for queue head waiter to go away */
+ synchronize_rcu();
+ steal_spins = val;
+ } else if (!val && steal_spins) {
+ steal_spins = val;
+ /* wait for all possible stealers to go away */
+ synchronize_rcu();
+ maybe_stealers = false;
+ } else {
+ steal_spins = val;
+ }
+ mutex_unlock(&lock);
+#endif
+
+ return 0;
+}
+
+static int steal_spins_get(void *data, u64 *val)
+{
+ *val = steal_spins;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
+
+static int remote_steal_spins_set(void *data, u64 val)
+{
+ remote_steal_spins = val;
+
+ return 0;
+}
+
+static int remote_steal_spins_get(void *data, u64 *val)
+{
+ *val = remote_steal_spins;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n");
+
+static int head_spins_set(void *data, u64 val)
+{
+ head_spins = val;
+
+ return 0;
+}
+
+static int head_spins_get(void *data, u64 *val)
+{
+ *val = head_spins;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
+
+static int pv_yield_owner_set(void *data, u64 val)
+{
+ pv_yield_owner = !!val;
+
+ return 0;
+}
+
+static int pv_yield_owner_get(void *data, u64 *val)
+{
+ *val = pv_yield_owner;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
+
+static int pv_yield_allow_steal_set(void *data, u64 val)
+{
+ pv_yield_allow_steal = !!val;
+
+ return 0;
+}
+
+static int pv_yield_allow_steal_get(void *data, u64 *val)
+{
+ *val = pv_yield_allow_steal;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
+
+static int pv_spin_on_preempted_owner_set(void *data, u64 val)
+{
+ pv_spin_on_preempted_owner = !!val;
+
+ return 0;
+}
+
+static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
+{
+ *val = pv_spin_on_preempted_owner;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
+
+static int pv_sleepy_lock_set(void *data, u64 val)
+{
+ pv_sleepy_lock = !!val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n");
+
+static int pv_sleepy_lock_sticky_set(void *data, u64 val)
+{
+ pv_sleepy_lock_sticky = !!val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_sticky_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock_sticky;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n");
+
+static int pv_sleepy_lock_interval_ns_set(void *data, u64 val)
+{
+ pv_sleepy_lock_interval_ns = val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock_interval_ns;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n");
+
+static int pv_sleepy_lock_factor_set(void *data, u64 val)
+{
+ pv_sleepy_lock_factor = val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_factor_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock_factor;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n");
+
+static int pv_yield_prev_set(void *data, u64 val)
+{
+ pv_yield_prev = !!val;
+
+ return 0;
+}
+
+static int pv_yield_prev_get(void *data, u64 *val)
+{
+ *val = pv_yield_prev;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
+
+static int pv_yield_propagate_owner_set(void *data, u64 val)
+{
+ pv_yield_propagate_owner = !!val;
+
+ return 0;
+}
+
+static int pv_yield_propagate_owner_get(void *data, u64 *val)
+{
+ *val = pv_yield_propagate_owner;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n");
+
+static int pv_prod_head_set(void *data, u64 val)
+{
+ pv_prod_head = !!val;
+
+ return 0;
+}
+
+static int pv_prod_head_get(void *data, u64 *val)
+{
+ *val = pv_prod_head;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n");
+
+static __init int spinlock_debugfs_init(void)
+{
+ debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
+ debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins);
+ debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
+ if (is_shared_processor()) {
+ debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
+ debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
+ debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
+ debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock);
+ debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky);
+ debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
+ debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
+ debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
+ debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
+ debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
+ }
+
+ return 0;
+}
+device_initcall(spinlock_debugfs_init);
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 398b5694aeb7..38158b77a801 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -2284,15 +2284,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
op->type = MKOP(STCX, 0, 4);
break;
-#ifdef __powerpc64__
- case 84: /* ldarx */
- op->type = MKOP(LARX, 0, 8);
- break;
-
- case 214: /* stdcx. */
- op->type = MKOP(STCX, 0, 8);
- break;
-
+#ifdef CONFIG_PPC_HAS_LBARX_LHARX
case 52: /* lbarx */
op->type = MKOP(LARX, 0, 1);
break;
@@ -2308,6 +2300,15 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
case 726: /* sthcx. */
op->type = MKOP(STCX, 0, 2);
break;
+#endif
+#ifdef __powerpc64__
+ case 84: /* ldarx */
+ op->type = MKOP(LARX, 0, 8);
+ break;
+
+ case 214: /* stdcx. */
+ op->type = MKOP(STCX, 0, 8);
+ break;
case 276: /* lqarx */
if (!((rd & 1) || rd == ra || rd == rb))
@@ -3334,7 +3335,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
err = 0;
val = 0;
switch (size) {
-#ifdef __powerpc64__
+#ifdef CONFIG_PPC_HAS_LBARX_LHARX
case 1:
__get_user_asmx(val, ea, err, "lbarx");
break;
diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S
index 5473f9d03df3..e2b646a4f7fa 100644
--- a/arch/powerpc/lib/test_emulate_step_exec_instr.S
+++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S
@@ -16,7 +16,7 @@ _GLOBAL(exec_instr)
/*
* Stack frame layout (INT_FRAME_SIZE bytes)
- * In-memory pt_regs (SP + STACK_FRAME_OVERHEAD)
+ * In-memory pt_regs (SP + STACK_INT_FRAME_REGS)
* Scratch space (SP + 8)
* Back chain (SP + 0)
*/