From aaba098fe6ce594ae6f963dc041be6307e499f19 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Tue, 9 Apr 2019 10:52:40 +0100 Subject: arm64: HWCAP: add support for AT_HWCAP2 As we will exhaust the first 32 bits of AT_HWCAP let's start exposing AT_HWCAP2 to userspace to give us up to 64 caps. Whilst it's possible to use the remaining 32 bits of AT_HWCAP, we prefer to expand into AT_HWCAP2 in order to provide a consistent view to userspace between ILP32 and LP64. However internal to the kernel we prefer to continue to use the full space of elf_hwcap. To reduce complexity and allow for future expansion, we now represent hwcaps in the kernel as ordinals and use a KERNEL_HWCAP_ prefix. This allows us to support automatic feature based module loading for all our hwcaps. We introduce cpu_set_feature to set hwcaps which complements the existing cpu_have_feature helper. These helpers allow us to clean up existing direct uses of elf_hwcap and reduce any future effort required to move beyond 64 caps. For convenience we also introduce cpu_{have,set}_named_feature which makes use of the cpu_feature macro to allow providing a hwcap name without a {KERNEL_}HWCAP_ prefix. Signed-off-by: Andrew Murray [will: use const_ilog2() and tweak documentation] Signed-off-by: Will Deacon --- drivers/clocksource/arm_arch_timer.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/clocksource/arm_arch_timer.c') diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index aa4ec53281ce..6cc8aff83805 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -833,7 +833,11 @@ static void arch_timer_evtstrm_enable(int divider) cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT) | ARCH_TIMER_VIRT_EVT_EN; arch_timer_set_cntkctl(cntkctl); +#ifdef CONFIG_ARM64 + cpu_set_named_feature(EVTSTRM); +#else elf_hwcap |= HWCAP_EVTSTRM; +#endif #ifdef CONFIG_COMPAT compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM; #endif @@ -1055,7 +1059,11 @@ static int arch_timer_cpu_pm_notify(struct notifier_block *self, } else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) { arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl)); +#ifdef CONFIG_ARM64 + if (cpu_have_named_feature(EVTSTRM)) +#else if (elf_hwcap & HWCAP_EVTSTRM) +#endif cpumask_set_cpu(smp_processor_id(), &evtstrm_available); } return NOTIFY_OK; -- cgit v1.2.3 From 0f80cad3124f986d0e46c14d46b8da06d87a2bf4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Apr 2019 13:03:51 +0100 Subject: arm64: Restrict ARM64_ERRATUM_1188873 mitigation to AArch32 We currently deal with ARM64_ERRATUM_1188873 by always trapping EL0 accesses for both instruction sets. Although nothing wrong comes out of that, people trying to squeeze the last drop of performance from buggy HW find this over the top. Oh well. Let's change the mitigation by flipping the counter enable bit on return to userspace. Non-broken HW gets an extra branch on the fast path, which is hopefully not the end of the world. The arch timer workaround is also removed. Acked-by: Daniel Lezcano Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- drivers/clocksource/arm_arch_timer.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'drivers/clocksource/arm_arch_timer.c') diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index aa4ec53281ce..da11a9508b77 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -319,13 +319,6 @@ static u64 notrace arm64_858921_read_cntvct_el0(void) } #endif -#ifdef CONFIG_ARM64_ERRATUM_1188873 -static u64 notrace arm64_1188873_read_cntvct_el0(void) -{ - return read_sysreg(cntvct_el0); -} -#endif - #ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1 /* * The low bits of the counter registers are indeterminate while bit 10 or @@ -457,14 +450,6 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { .read_cntvct_el0 = arm64_858921_read_cntvct_el0, }, #endif -#ifdef CONFIG_ARM64_ERRATUM_1188873 - { - .match_type = ate_match_local_cap_id, - .id = (void *)ARM64_WORKAROUND_1188873, - .desc = "ARM erratum 1188873", - .read_cntvct_el0 = arm64_1188873_read_cntvct_el0, - }, -#endif #ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1 { .match_type = ate_match_dt, -- cgit v1.2.3 From 5ef19a161cfa88a59508979e2f39d3d092c1d5c0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Apr 2019 16:49:04 +0100 Subject: clocksource/arm_arch_timer: Direcly assign set_next_event workaround When a given timer is affected by an erratum and requires an alternative implementation of set_next_event, we do a rather complicated dance to detect and call the workaround on each set_next_event call. This is clearly idiotic, as we can perfectly detect whether this CPU requires a workaround while setting up the clock event device. This only requires the CPU-specific detection to be done a bit earlier, and we can then safely override the set_next_event pointer if we have a workaround associated to that CPU. Acked-by: Mark Rutland Acked-by; Daniel Lezcano Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- drivers/clocksource/arm_arch_timer.c | 46 +++++++----------------------------- 1 file changed, 8 insertions(+), 38 deletions(-) (limited to 'drivers/clocksource/arm_arch_timer.c') diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index da11a9508b77..b2a88a64aab4 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -598,36 +598,12 @@ static void arch_timer_check_ool_workaround(enum arch_timer_erratum_match_type t local ? "local" : "global", wa->desc); } -#define erratum_handler(fn, r, ...) \ -({ \ - bool __val; \ - if (needs_unstable_timer_counter_workaround()) { \ - const struct arch_timer_erratum_workaround *__wa; \ - __wa = __this_cpu_read(timer_unstable_counter_workaround); \ - if (__wa && __wa->fn) { \ - r = __wa->fn(__VA_ARGS__); \ - __val = true; \ - } else { \ - __val = false; \ - } \ - } else { \ - __val = false; \ - } \ - __val; \ -}) - static bool arch_timer_this_cpu_has_cntvct_wa(void) { - const struct arch_timer_erratum_workaround *wa; - - wa = __this_cpu_read(timer_unstable_counter_workaround); - return wa && wa->read_cntvct_el0; + return has_erratum_handler(read_cntvct_el0); } #else #define arch_timer_check_ool_workaround(t,a) do { } while(0) -#define erratum_set_next_event_tval_virt(...) ({BUG(); 0;}) -#define erratum_set_next_event_tval_phys(...) ({BUG(); 0;}) -#define erratum_handler(fn, r, ...) ({false;}) #define arch_timer_this_cpu_has_cntvct_wa() ({false;}) #endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */ @@ -721,11 +697,6 @@ static __always_inline void set_next_event(const int access, unsigned long evt, static int arch_timer_set_next_event_virt(unsigned long evt, struct clock_event_device *clk) { - int ret; - - if (erratum_handler(set_next_event_virt, ret, evt, clk)) - return ret; - set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk); return 0; } @@ -733,11 +704,6 @@ static int arch_timer_set_next_event_virt(unsigned long evt, static int arch_timer_set_next_event_phys(unsigned long evt, struct clock_event_device *clk) { - int ret; - - if (erratum_handler(set_next_event_phys, ret, evt, clk)) - return ret; - set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk); return 0; } @@ -762,6 +728,10 @@ static void __arch_timer_setup(unsigned type, clk->features = CLOCK_EVT_FEAT_ONESHOT; if (type == ARCH_TIMER_TYPE_CP15) { + typeof(clk->set_next_event) sne; + + arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL); + if (arch_timer_c3stop) clk->features |= CLOCK_EVT_FEAT_C3STOP; clk->name = "arch_sys_timer"; @@ -772,20 +742,20 @@ static void __arch_timer_setup(unsigned type, case ARCH_TIMER_VIRT_PPI: clk->set_state_shutdown = arch_timer_shutdown_virt; clk->set_state_oneshot_stopped = arch_timer_shutdown_virt; - clk->set_next_event = arch_timer_set_next_event_virt; + sne = erratum_handler(set_next_event_virt); break; case ARCH_TIMER_PHYS_SECURE_PPI: case ARCH_TIMER_PHYS_NONSECURE_PPI: case ARCH_TIMER_HYP_PPI: clk->set_state_shutdown = arch_timer_shutdown_phys; clk->set_state_oneshot_stopped = arch_timer_shutdown_phys; - clk->set_next_event = arch_timer_set_next_event_phys; + sne = erratum_handler(set_next_event_phys); break; default: BUG(); } - arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL); + clk->set_next_event = sne; } else { clk->features |= CLOCK_EVT_FEAT_DYNIRQ; clk->name = "arch_mem_timer"; -- cgit v1.2.3 From a862fc2254bdbcee3b5da4f730984e5d8393a2f1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Apr 2019 16:49:06 +0100 Subject: clocksource/arm_arch_timer: Remove use of workaround static key The use of a static key in a hotplug path has proved to be a real nightmare, and makes it impossible to have scream-free lockdep kernel. Let's remove the static key altogether, and focus on something saner. Acked-by: Mark Rutland Acked-by: Daniel Lezcano Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- drivers/clocksource/arm_arch_timer.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) (limited to 'drivers/clocksource/arm_arch_timer.c') diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index b2a88a64aab4..8f22976247c0 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -365,8 +365,6 @@ static u32 notrace sun50i_a64_read_cntv_tval_el0(void) DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround); EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround); -DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled); -EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled); static void erratum_set_next_event_tval_generic(const int access, unsigned long evt, struct clock_event_device *clk) @@ -537,12 +535,6 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa per_cpu(timer_unstable_counter_workaround, i) = wa; } - /* - * Use the locked version, as we're called from the CPU - * hotplug framework. Otherwise, we end-up in deadlock-land. - */ - static_branch_enable_cpuslocked(&arch_timer_read_ool_enabled); - /* * Don't use the vdso fastpath if errata require using the * out-of-line counter accessor. We may change our mind pretty @@ -558,7 +550,7 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa static void arch_timer_check_ool_workaround(enum arch_timer_erratum_match_type type, void *arg) { - const struct arch_timer_erratum_workaround *wa; + const struct arch_timer_erratum_workaround *wa, *__wa; ate_match_fn_t match_fn = NULL; bool local = false; @@ -582,16 +574,13 @@ static void arch_timer_check_ool_workaround(enum arch_timer_erratum_match_type t if (!wa) return; - if (needs_unstable_timer_counter_workaround()) { - const struct arch_timer_erratum_workaround *__wa; - __wa = __this_cpu_read(timer_unstable_counter_workaround); - if (__wa && wa != __wa) - pr_warn("Can't enable workaround for %s (clashes with %s\n)", - wa->desc, __wa->desc); + __wa = __this_cpu_read(timer_unstable_counter_workaround); + if (__wa && wa != __wa) + pr_warn("Can't enable workaround for %s (clashes with %s\n)", + wa->desc, __wa->desc); - if (__wa) - return; - } + if (__wa) + return; arch_timer_enable_workaround(wa, local); pr_info("Enabling %s workaround for %s\n", -- cgit v1.2.3 From 0ea415390cd345b7d09e8c9ebd4b68adfe873043 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Apr 2019 16:49:07 +0100 Subject: clocksource/arm_arch_timer: Use arch_timer_read_counter to access stable counters Instead of always going via arch_counter_get_cntvct_stable to access the counter workaround, let's have arch_timer_read_counter point to the right method. For that, we need to track whether any CPU in the system has a workaround for the counter. This is done by having an atomic variable tracking this. Acked-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- drivers/clocksource/arm_arch_timer.c | 48 +++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 4 deletions(-) (limited to 'drivers/clocksource/arm_arch_timer.c') diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 8f22976247c0..27acc9eb0f7c 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -152,6 +152,26 @@ u32 arch_timer_reg_read(int access, enum arch_timer_reg reg, return val; } +static u64 arch_counter_get_cntpct_stable(void) +{ + return __arch_counter_get_cntpct_stable(); +} + +static u64 arch_counter_get_cntpct(void) +{ + return __arch_counter_get_cntpct(); +} + +static u64 arch_counter_get_cntvct_stable(void) +{ + return __arch_counter_get_cntvct_stable(); +} + +static u64 arch_counter_get_cntvct(void) +{ + return __arch_counter_get_cntvct(); +} + /* * Default to cp15 based access because arm64 uses this function for * sched_clock() before DT is probed and the cp15 method is guaranteed @@ -365,6 +385,7 @@ static u32 notrace sun50i_a64_read_cntv_tval_el0(void) DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround); EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround); +static atomic_t timer_unstable_counter_workaround_in_use = ATOMIC_INIT(0); static void erratum_set_next_event_tval_generic(const int access, unsigned long evt, struct clock_event_device *clk) @@ -535,6 +556,9 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa per_cpu(timer_unstable_counter_workaround, i) = wa; } + if (wa->read_cntvct_el0 || wa->read_cntpct_el0) + atomic_set(&timer_unstable_counter_workaround_in_use, 1); + /* * Don't use the vdso fastpath if errata require using the * out-of-line counter accessor. We may change our mind pretty @@ -591,9 +615,15 @@ static bool arch_timer_this_cpu_has_cntvct_wa(void) { return has_erratum_handler(read_cntvct_el0); } + +static bool arch_timer_counter_has_wa(void) +{ + return atomic_read(&timer_unstable_counter_workaround_in_use); +} #else #define arch_timer_check_ool_workaround(t,a) do { } while(0) #define arch_timer_this_cpu_has_cntvct_wa() ({false;}) +#define arch_timer_counter_has_wa() ({false;}) #endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */ static __always_inline irqreturn_t timer_handler(const int access, @@ -942,12 +972,22 @@ static void __init arch_counter_register(unsigned type) /* Register the CP15 based counter if we have one */ if (type & ARCH_TIMER_TYPE_CP15) { + u64 (*rd)(void); + if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) || - arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) - arch_timer_read_counter = arch_counter_get_cntvct; - else - arch_timer_read_counter = arch_counter_get_cntpct; + arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) { + if (arch_timer_counter_has_wa()) + rd = arch_counter_get_cntvct_stable; + else + rd = arch_counter_get_cntvct; + } else { + if (arch_timer_counter_has_wa()) + rd = arch_counter_get_cntpct_stable; + else + rd = arch_counter_get_cntpct; + } + arch_timer_read_counter = rd; clocksource_counter.archdata.vdso_direct = vdso_default; } else { arch_timer_read_counter = arch_counter_get_cntvct_mem; -- cgit v1.2.3