diff options
Diffstat (limited to 'arch/nds32/kernel/perf_event_cpu.c')
-rw-r--r-- | arch/nds32/kernel/perf_event_cpu.c | 1500 |
1 files changed, 0 insertions, 1500 deletions
diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c deleted file mode 100644 index a78a879e7ef1..000000000000 --- a/arch/nds32/kernel/perf_event_cpu.c +++ /dev/null @@ -1,1500 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2008-2017 Andes Technology Corporation - * - * Reference ARMv7: Jean Pihet <jpihet@mvista.com> - * 2010 (c) MontaVista Software, LLC. - */ - -#include <linux/perf_event.h> -#include <linux/bitmap.h> -#include <linux/export.h> -#include <linux/kernel.h> -#include <linux/of.h> -#include <linux/platform_device.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/pm_runtime.h> -#include <linux/ftrace.h> -#include <linux/uaccess.h> -#include <linux/sched/clock.h> -#include <linux/percpu-defs.h> - -#include <asm/pmu.h> -#include <asm/irq_regs.h> -#include <asm/nds32.h> -#include <asm/stacktrace.h> -#include <asm/perf_event.h> -#include <nds32_intrinsic.h> - -/* Set at runtime when we know what CPU type we are. */ -static struct nds32_pmu *cpu_pmu; - -static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); -static void nds32_pmu_start(struct nds32_pmu *cpu_pmu); -static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu); -static struct platform_device_id cpu_pmu_plat_device_ids[] = { - {.name = "nds32-pfm"}, - {}, -}; - -static int nds32_pmu_map_cache_event(const unsigned int (*cache_map) - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], u64 config) -{ - unsigned int cache_type, cache_op, cache_result, ret; - - cache_type = (config >> 0) & 0xff; - if (cache_type >= PERF_COUNT_HW_CACHE_MAX) - return -EINVAL; - - cache_op = (config >> 8) & 0xff; - if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) - return -EINVAL; - - cache_result = (config >> 16) & 0xff; - if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; - - if (ret == CACHE_OP_UNSUPPORTED) - return -ENOENT; - - return ret; -} - -static int -nds32_pmu_map_hw_event(const unsigned int (*event_map)[PERF_COUNT_HW_MAX], - u64 config) -{ - int mapping; - - if (config >= PERF_COUNT_HW_MAX) - return -ENOENT; - - mapping = (*event_map)[config]; - return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; -} - -static int nds32_pmu_map_raw_event(u32 raw_event_mask, u64 config) -{ - int ev_type = (int)(config & raw_event_mask); - int idx = config >> 8; - - switch (idx) { - case 0: - ev_type = PFM_OFFSET_MAGIC_0 + ev_type; - if (ev_type >= SPAV3_0_SEL_LAST || ev_type <= SPAV3_0_SEL_BASE) - return -ENOENT; - break; - case 1: - ev_type = PFM_OFFSET_MAGIC_1 + ev_type; - if (ev_type >= SPAV3_1_SEL_LAST || ev_type <= SPAV3_1_SEL_BASE) - return -ENOENT; - break; - case 2: - ev_type = PFM_OFFSET_MAGIC_2 + ev_type; - if (ev_type >= SPAV3_2_SEL_LAST || ev_type <= SPAV3_2_SEL_BASE) - return -ENOENT; - break; - default: - return -ENOENT; - } - - return ev_type; -} - -int -nds32_pmu_map_event(struct perf_event *event, - const unsigned int (*event_map)[PERF_COUNT_HW_MAX], - const unsigned int (*cache_map) - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask) -{ - u64 config = event->attr.config; - - switch (event->attr.type) { - case PERF_TYPE_HARDWARE: - return nds32_pmu_map_hw_event(event_map, config); - case PERF_TYPE_HW_CACHE: - return nds32_pmu_map_cache_event(cache_map, config); - case PERF_TYPE_RAW: - return nds32_pmu_map_raw_event(raw_event_mask, config); - } - - return -ENOENT; -} - -static int nds32_spav3_map_event(struct perf_event *event) -{ - return nds32_pmu_map_event(event, &nds32_pfm_perf_map, - &nds32_pfm_perf_cache_map, SOFTWARE_EVENT_MASK); -} - -static inline u32 nds32_pfm_getreset_flags(void) -{ - /* Read overflow status */ - u32 val = __nds32__mfsr(NDS32_SR_PFM_CTL); - u32 old_val = val; - - /* Write overflow bit to clear status, and others keep it 0 */ - u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]; - - __nds32__mtsr(val | ov_flag, NDS32_SR_PFM_CTL); - - return old_val; -} - -static inline int nds32_pfm_has_overflowed(u32 pfm) -{ - u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]; - - return pfm & ov_flag; -} - -static inline int nds32_pfm_counter_has_overflowed(u32 pfm, int idx) -{ - u32 mask = 0; - - switch (idx) { - case 0: - mask = PFM_CTL_OVF[0]; - break; - case 1: - mask = PFM_CTL_OVF[1]; - break; - case 2: - mask = PFM_CTL_OVF[2]; - break; - default: - pr_err("%s index wrong\n", __func__); - break; - } - return pfm & mask; -} - -/* - * Set the next IRQ period, based on the hwc->period_left value. - * To be called with the event disabled in hw: - */ -int nds32_pmu_event_set_period(struct perf_event *event) -{ - struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - s64 left = local64_read(&hwc->period_left); - s64 period = hwc->sample_period; - int ret = 0; - - /* The period may have been changed by PERF_EVENT_IOC_PERIOD */ - if (unlikely(period != hwc->last_period)) - left = period - (hwc->last_period - left); - - if (unlikely(left <= -period)) { - left = period; - local64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - if (unlikely(left <= 0)) { - left += period; - local64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - if (left > (s64)nds32_pmu->max_period) - left = nds32_pmu->max_period; - - /* - * The hw event starts counting from this event offset, - * mark it to be able to extract future "deltas": - */ - local64_set(&hwc->prev_count, (u64)(-left)); - - nds32_pmu->write_counter(event, (u64)(-left) & nds32_pmu->max_period); - - perf_event_update_userpage(event); - - return ret; -} - -static irqreturn_t nds32_pmu_handle_irq(int irq_num, void *dev) -{ - u32 pfm; - struct perf_sample_data data; - struct nds32_pmu *cpu_pmu = (struct nds32_pmu *)dev; - struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); - struct pt_regs *regs; - int idx; - /* - * Get and reset the IRQ flags - */ - pfm = nds32_pfm_getreset_flags(); - - /* - * Did an overflow occur? - */ - if (!nds32_pfm_has_overflowed(pfm)) - return IRQ_NONE; - - /* - * Handle the counter(s) overflow(s) - */ - regs = get_irq_regs(); - - nds32_pmu_stop(cpu_pmu); - for (idx = 0; idx < cpu_pmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - /* Ignore if we don't have an event. */ - if (!event) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!nds32_pfm_counter_has_overflowed(pfm, idx)) - continue; - - hwc = &event->hw; - nds32_pmu_event_update(event); - perf_sample_data_init(&data, 0, hwc->last_period); - if (!nds32_pmu_event_set_period(event)) - continue; - - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(event); - } - nds32_pmu_start(cpu_pmu); - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static inline int nds32_pfm_counter_valid(struct nds32_pmu *cpu_pmu, int idx) -{ - return ((idx >= 0) && (idx < cpu_pmu->num_events)); -} - -static inline int nds32_pfm_disable_counter(int idx) -{ - unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL); - u32 mask = 0; - - mask = PFM_CTL_EN[idx]; - val &= ~mask; - val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); - __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); - return idx; -} - -/* - * Add an event filter to a given event. - */ -static int nds32_pmu_set_event_filter(struct hw_perf_event *event, - struct perf_event_attr *attr) -{ - unsigned long config_base = 0; - int idx = event->idx; - unsigned long no_kernel_tracing = 0; - unsigned long no_user_tracing = 0; - /* If index is -1, do not do anything */ - if (idx == -1) - return 0; - - no_kernel_tracing = PFM_CTL_KS[idx]; - no_user_tracing = PFM_CTL_KU[idx]; - /* - * Default: enable both kernel and user mode tracing. - */ - if (attr->exclude_user) - config_base |= no_user_tracing; - - if (attr->exclude_kernel) - config_base |= no_kernel_tracing; - - /* - * Install the filter into config_base as this is used to - * construct the event type. - */ - event->config_base |= config_base; - return 0; -} - -static inline void nds32_pfm_write_evtsel(int idx, u32 evnum) -{ - u32 offset = 0; - u32 ori_val = __nds32__mfsr(NDS32_SR_PFM_CTL); - u32 ev_mask = 0; - u32 no_kernel_mask = 0; - u32 no_user_mask = 0; - u32 val; - - offset = PFM_CTL_OFFSEL[idx]; - /* Clear previous mode selection, and write new one */ - no_kernel_mask = PFM_CTL_KS[idx]; - no_user_mask = PFM_CTL_KU[idx]; - ori_val &= ~no_kernel_mask; - ori_val &= ~no_user_mask; - if (evnum & no_kernel_mask) - ori_val |= no_kernel_mask; - - if (evnum & no_user_mask) - ori_val |= no_user_mask; - - /* Clear previous event selection */ - ev_mask = PFM_CTL_SEL[idx]; - ori_val &= ~ev_mask; - evnum &= SOFTWARE_EVENT_MASK; - - /* undo the linear mapping */ - evnum = get_converted_evet_hw_num(evnum); - val = ori_val | (evnum << offset); - val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); - __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); -} - -static inline int nds32_pfm_enable_counter(int idx) -{ - unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL); - u32 mask = 0; - - mask = PFM_CTL_EN[idx]; - val |= mask; - val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); - __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); - return idx; -} - -static inline int nds32_pfm_enable_intens(int idx) -{ - unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL); - u32 mask = 0; - - mask = PFM_CTL_IE[idx]; - val |= mask; - val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); - __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); - return idx; -} - -static inline int nds32_pfm_disable_intens(int idx) -{ - unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL); - u32 mask = 0; - - mask = PFM_CTL_IE[idx]; - val &= ~mask; - val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); - __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); - return idx; -} - -static int event_requires_mode_exclusion(struct perf_event_attr *attr) -{ - /* Other modes NDS32 does not support */ - return attr->exclude_user || attr->exclude_kernel; -} - -static void nds32_pmu_enable_event(struct perf_event *event) -{ - unsigned long flags; - unsigned int evnum = 0; - struct hw_perf_event *hwc = &event->hw; - struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu); - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); - int idx = hwc->idx; - - if (!nds32_pfm_counter_valid(cpu_pmu, idx)) { - pr_err("CPU enabling wrong pfm counter IRQ enable\n"); - return; - } - - /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); - - /* - * Disable counter - */ - nds32_pfm_disable_counter(idx); - - /* - * Check whether we need to exclude the counter from certain modes. - */ - if ((!cpu_pmu->set_event_filter || - cpu_pmu->set_event_filter(hwc, &event->attr)) && - event_requires_mode_exclusion(&event->attr)) { - pr_notice - ("NDS32 performance counters do not support mode exclusion\n"); - hwc->config_base = 0; - } - /* Write event */ - evnum = hwc->config_base; - nds32_pfm_write_evtsel(idx, evnum); - - /* - * Enable interrupt for this counter - */ - nds32_pfm_enable_intens(idx); - - /* - * Enable counter - */ - nds32_pfm_enable_counter(idx); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); -} - -static void nds32_pmu_disable_event(struct perf_event *event) -{ - unsigned long flags; - struct hw_perf_event *hwc = &event->hw; - struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu); - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); - int idx = hwc->idx; - - if (!nds32_pfm_counter_valid(cpu_pmu, idx)) { - pr_err("CPU disabling wrong pfm counter IRQ enable %d\n", idx); - return; - } - - /* - * Disable counter and interrupt - */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); - - /* - * Disable counter - */ - nds32_pfm_disable_counter(idx); - - /* - * Disable interrupt for this counter - */ - nds32_pfm_disable_intens(idx); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); -} - -static inline u32 nds32_pmu_read_counter(struct perf_event *event) -{ - struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - u32 count = 0; - - if (!nds32_pfm_counter_valid(cpu_pmu, idx)) { - pr_err("CPU reading wrong counter %d\n", idx); - } else { - switch (idx) { - case PFMC0: - count = __nds32__mfsr(NDS32_SR_PFMC0); - break; - case PFMC1: - count = __nds32__mfsr(NDS32_SR_PFMC1); - break; - case PFMC2: - count = __nds32__mfsr(NDS32_SR_PFMC2); - break; - default: - pr_err - ("%s: CPU has no performance counters %d\n", - __func__, idx); - } - } - return count; -} - -static inline void nds32_pmu_write_counter(struct perf_event *event, u32 value) -{ - struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - if (!nds32_pfm_counter_valid(cpu_pmu, idx)) { - pr_err("CPU writing wrong counter %d\n", idx); - } else { - switch (idx) { - case PFMC0: - __nds32__mtsr_isb(value, NDS32_SR_PFMC0); - break; - case PFMC1: - __nds32__mtsr_isb(value, NDS32_SR_PFMC1); - break; - case PFMC2: - __nds32__mtsr_isb(value, NDS32_SR_PFMC2); - break; - default: - pr_err - ("%s: CPU has no performance counters %d\n", - __func__, idx); - } - } -} - -static int nds32_pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - int idx; - struct hw_perf_event *hwc = &event->hw; - /* - * Current implementation maps cycles, instruction count and cache-miss - * to specific counter. - * However, multiple of the 3 counters are able to count these events. - * - * - * SOFTWARE_EVENT_MASK mask for getting event num , - * This is defined by Jia-Rung, you can change the polocies. - * However, do not exceed 8 bits. This is hardware specific. - * The last number is SPAv3_2_SEL_LAST. - */ - unsigned long evtype = hwc->config_base & SOFTWARE_EVENT_MASK; - - idx = get_converted_event_idx(evtype); - /* - * Try to get the counter for correpsonding event - */ - if (evtype == SPAV3_0_SEL_TOTAL_CYCLES) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - if (!test_and_set_bit(NDS32_IDX_COUNTER0, cpuc->used_mask)) - return NDS32_IDX_COUNTER0; - if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask)) - return NDS32_IDX_COUNTER1; - } else if (evtype == SPAV3_1_SEL_COMPLETED_INSTRUCTION) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - else if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask)) - return NDS32_IDX_COUNTER1; - else if (!test_and_set_bit - (NDS32_IDX_CYCLE_COUNTER, cpuc->used_mask)) - return NDS32_IDX_CYCLE_COUNTER; - } else { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - } - return -EAGAIN; -} - -static void nds32_pmu_start(struct nds32_pmu *cpu_pmu) -{ - unsigned long flags; - unsigned int val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); - - raw_spin_lock_irqsave(&events->pmu_lock, flags); - - /* Enable all counters , NDS PFM has 3 counters */ - val = __nds32__mfsr(NDS32_SR_PFM_CTL); - val |= (PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]); - val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); - __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); -} - -static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu) -{ - unsigned long flags; - unsigned int val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); - - raw_spin_lock_irqsave(&events->pmu_lock, flags); - - /* Disable all counters , NDS PFM has 3 counters */ - val = __nds32__mfsr(NDS32_SR_PFM_CTL); - val &= ~(PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]); - val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); - __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); -} - -static void nds32_pmu_reset(void *info) -{ - u32 val = 0; - - val |= (PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); - __nds32__mtsr(val, NDS32_SR_PFM_CTL); - __nds32__mtsr(0, NDS32_SR_PFM_CTL); - __nds32__mtsr(0, NDS32_SR_PFMC0); - __nds32__mtsr(0, NDS32_SR_PFMC1); - __nds32__mtsr(0, NDS32_SR_PFMC2); -} - -static void nds32_pmu_init(struct nds32_pmu *cpu_pmu) -{ - cpu_pmu->handle_irq = nds32_pmu_handle_irq; - cpu_pmu->enable = nds32_pmu_enable_event; - cpu_pmu->disable = nds32_pmu_disable_event; - cpu_pmu->read_counter = nds32_pmu_read_counter; - cpu_pmu->write_counter = nds32_pmu_write_counter; - cpu_pmu->get_event_idx = nds32_pmu_get_event_idx; - cpu_pmu->start = nds32_pmu_start; - cpu_pmu->stop = nds32_pmu_stop; - cpu_pmu->reset = nds32_pmu_reset; - cpu_pmu->max_period = 0xFFFFFFFF; /* Maximum counts */ -}; - -static u32 nds32_read_num_pfm_events(void) -{ - /* NDS32 SPAv3 PMU support 3 counter */ - return 3; -} - -static int device_pmu_init(struct nds32_pmu *cpu_pmu) -{ - nds32_pmu_init(cpu_pmu); - /* - * This name should be devive-specific name, whatever you like :) - * I think "PMU" will be a good generic name. - */ - cpu_pmu->name = "nds32v3-pmu"; - cpu_pmu->map_event = nds32_spav3_map_event; - cpu_pmu->num_events = nds32_read_num_pfm_events(); - cpu_pmu->set_event_filter = nds32_pmu_set_event_filter; - return 0; -} - -/* - * CPU PMU identification and probing. - */ -static int probe_current_pmu(struct nds32_pmu *pmu) -{ - int ret; - - get_cpu(); - ret = -ENODEV; - /* - * If ther are various CPU types with its own PMU, initialize with - * - * the corresponding one - */ - device_pmu_init(pmu); - put_cpu(); - return ret; -} - -static void nds32_pmu_enable(struct pmu *pmu) -{ - struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu); - struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events(); - int enabled = bitmap_weight(hw_events->used_mask, - nds32_pmu->num_events); - - if (enabled) - nds32_pmu->start(nds32_pmu); -} - -static void nds32_pmu_disable(struct pmu *pmu) -{ - struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu); - - nds32_pmu->stop(nds32_pmu); -} - -static void nds32_pmu_release_hardware(struct nds32_pmu *nds32_pmu) -{ - nds32_pmu->free_irq(nds32_pmu); - pm_runtime_put_sync(&nds32_pmu->plat_device->dev); -} - -static irqreturn_t nds32_pmu_dispatch_irq(int irq, void *dev) -{ - struct nds32_pmu *nds32_pmu = (struct nds32_pmu *)dev; - int ret; - u64 start_clock, finish_clock; - - start_clock = local_clock(); - ret = nds32_pmu->handle_irq(irq, dev); - finish_clock = local_clock(); - - perf_sample_event_took(finish_clock - start_clock); - return ret; -} - -static int nds32_pmu_reserve_hardware(struct nds32_pmu *nds32_pmu) -{ - int err; - struct platform_device *pmu_device = nds32_pmu->plat_device; - - if (!pmu_device) - return -ENODEV; - - pm_runtime_get_sync(&pmu_device->dev); - err = nds32_pmu->request_irq(nds32_pmu, nds32_pmu_dispatch_irq); - if (err) { - nds32_pmu_release_hardware(nds32_pmu); - return err; - } - - return 0; -} - -static int -validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events, - struct perf_event *event) -{ - struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); - - if (is_software_event(event)) - return 1; - - if (event->pmu != pmu) - return 0; - - if (event->state < PERF_EVENT_STATE_OFF) - return 1; - - if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) - return 1; - - return nds32_pmu->get_event_idx(hw_events, event) >= 0; -} - -static int validate_group(struct perf_event *event) -{ - struct perf_event *sibling, *leader = event->group_leader; - struct pmu_hw_events fake_pmu; - DECLARE_BITMAP(fake_used_mask, MAX_COUNTERS); - /* - * Initialize the fake PMU. We only need to populate the - * used_mask for the purposes of validation. - */ - memset(fake_used_mask, 0, sizeof(fake_used_mask)); - - if (!validate_event(event->pmu, &fake_pmu, leader)) - return -EINVAL; - - for_each_sibling_event(sibling, leader) { - if (!validate_event(event->pmu, &fake_pmu, sibling)) - return -EINVAL; - } - - if (!validate_event(event->pmu, &fake_pmu, event)) - return -EINVAL; - - return 0; -} - -static int __hw_perf_event_init(struct perf_event *event) -{ - struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - int mapping; - - mapping = nds32_pmu->map_event(event); - - if (mapping < 0) { - pr_debug("event %x:%llx not supported\n", event->attr.type, - event->attr.config); - return mapping; - } - - /* - * We don't assign an index until we actually place the event onto - * hardware. Use -1 to signify that we haven't decided where to put it - * yet. For SMP systems, each core has it's own PMU so we can't do any - * clever allocation or constraints checking at this point. - */ - hwc->idx = -1; - hwc->config_base = 0; - hwc->config = 0; - hwc->event_base = 0; - - /* - * Check whether we need to exclude the counter from certain modes. - */ - if ((!nds32_pmu->set_event_filter || - nds32_pmu->set_event_filter(hwc, &event->attr)) && - event_requires_mode_exclusion(&event->attr)) { - pr_debug - ("NDS performance counters do not support mode exclusion\n"); - return -EOPNOTSUPP; - } - - /* - * Store the event encoding into the config_base field. - */ - hwc->config_base |= (unsigned long)mapping; - - if (!hwc->sample_period) { - /* - * For non-sampling runs, limit the sample_period to half - * of the counter width. That way, the new counter value - * is far less likely to overtake the previous one unless - * you have some serious IRQ latency issues. - */ - hwc->sample_period = nds32_pmu->max_period >> 1; - hwc->last_period = hwc->sample_period; - local64_set(&hwc->period_left, hwc->sample_period); - } - - if (event->group_leader != event) { - if (validate_group(event) != 0) - return -EINVAL; - } - - return 0; -} - -static int nds32_pmu_event_init(struct perf_event *event) -{ - struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); - int err = 0; - atomic_t *active_events = &nds32_pmu->active_events; - - /* does not support taken branch sampling */ - if (has_branch_stack(event)) - return -EOPNOTSUPP; - - if (nds32_pmu->map_event(event) == -ENOENT) - return -ENOENT; - - if (!atomic_inc_not_zero(active_events)) { - if (atomic_read(active_events) == 0) { - /* Register irq handler */ - err = nds32_pmu_reserve_hardware(nds32_pmu); - } - - if (!err) - atomic_inc(active_events); - } - - if (err) - return err; - - err = __hw_perf_event_init(event); - - return err; -} - -static void nds32_start(struct perf_event *event, int flags) -{ - struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - /* - * NDS pmu always has to reprogram the period, so ignore - * PERF_EF_RELOAD, see the comment below. - */ - if (flags & PERF_EF_RELOAD) - WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); - - hwc->state = 0; - /* Set the period for the event. */ - nds32_pmu_event_set_period(event); - - nds32_pmu->enable(event); -} - -static int nds32_pmu_add(struct perf_event *event, int flags) -{ - struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); - struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events(); - struct hw_perf_event *hwc = &event->hw; - int idx; - int err = 0; - - perf_pmu_disable(event->pmu); - - /* If we don't have a space for the counter then finish early. */ - idx = nds32_pmu->get_event_idx(hw_events, event); - if (idx < 0) { - err = idx; - goto out; - } - - /* - * If there is an event in the counter we are going to use then make - * sure it is disabled. - */ - event->hw.idx = idx; - nds32_pmu->disable(event); - hw_events->events[idx] = event; - - hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; - if (flags & PERF_EF_START) - nds32_start(event, PERF_EF_RELOAD); - - /* Propagate our changes to the userspace mapping. */ - perf_event_update_userpage(event); - -out: - perf_pmu_enable(event->pmu); - return err; -} - -u64 nds32_pmu_event_update(struct perf_event *event) -{ - struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - u64 delta, prev_raw_count, new_raw_count; - -again: - prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = nds32_pmu->read_counter(event); - - if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) { - goto again; - } - /* - * Whether overflow or not, "unsigned substraction" - * will always get their delta - */ - delta = (new_raw_count - prev_raw_count) & nds32_pmu->max_period; - - local64_add(delta, &event->count); - local64_sub(delta, &hwc->period_left); - - return new_raw_count; -} - -static void nds32_stop(struct perf_event *event, int flags) -{ - struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - /* - * NDS pmu always has to update the counter, so ignore - * PERF_EF_UPDATE, see comments in nds32_start(). - */ - if (!(hwc->state & PERF_HES_STOPPED)) { - nds32_pmu->disable(event); - nds32_pmu_event_update(event); - hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; - } -} - -static void nds32_pmu_del(struct perf_event *event, int flags) -{ - struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); - struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events(); - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - nds32_stop(event, PERF_EF_UPDATE); - hw_events->events[idx] = NULL; - clear_bit(idx, hw_events->used_mask); - - perf_event_update_userpage(event); -} - -static void nds32_pmu_read(struct perf_event *event) -{ - nds32_pmu_event_update(event); -} - -/* Please refer to SPAv3 for more hardware specific details */ -PMU_FORMAT_ATTR(event, "config:0-63"); - -static struct attribute *nds32_arch_formats_attr[] = { - &format_attr_event.attr, - NULL, -}; - -static struct attribute_group nds32_pmu_format_group = { - .name = "format", - .attrs = nds32_arch_formats_attr, -}; - -static ssize_t nds32_pmu_cpumask_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return 0; -} - -static DEVICE_ATTR(cpus, 0444, nds32_pmu_cpumask_show, NULL); - -static struct attribute *nds32_pmu_common_attrs[] = { - &dev_attr_cpus.attr, - NULL, -}; - -static struct attribute_group nds32_pmu_common_group = { - .attrs = nds32_pmu_common_attrs, -}; - -static const struct attribute_group *nds32_pmu_attr_groups[] = { - &nds32_pmu_format_group, - &nds32_pmu_common_group, - NULL, -}; - -static void nds32_init(struct nds32_pmu *nds32_pmu) -{ - atomic_set(&nds32_pmu->active_events, 0); - - nds32_pmu->pmu = (struct pmu) { - .pmu_enable = nds32_pmu_enable, - .pmu_disable = nds32_pmu_disable, - .attr_groups = nds32_pmu_attr_groups, - .event_init = nds32_pmu_event_init, - .add = nds32_pmu_add, - .del = nds32_pmu_del, - .start = nds32_start, - .stop = nds32_stop, - .read = nds32_pmu_read, - }; -} - -int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type) -{ - nds32_init(nds32_pmu); - pm_runtime_enable(&nds32_pmu->plat_device->dev); - pr_info("enabled with %s PMU driver, %d counters available\n", - nds32_pmu->name, nds32_pmu->num_events); - return perf_pmu_register(&nds32_pmu->pmu, nds32_pmu->name, type); -} - -static struct pmu_hw_events *cpu_pmu_get_cpu_events(void) -{ - return this_cpu_ptr(&cpu_hw_events); -} - -static int cpu_pmu_request_irq(struct nds32_pmu *cpu_pmu, irq_handler_t handler) -{ - int err, irq, irqs; - struct platform_device *pmu_device = cpu_pmu->plat_device; - - if (!pmu_device) - return -ENODEV; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (irqs < 1) { - pr_err("no irqs for PMUs defined\n"); - return -ENODEV; - } - - irq = platform_get_irq(pmu_device, 0); - err = request_irq(irq, handler, IRQF_NOBALANCING, "nds32-pfm", - cpu_pmu); - if (err) { - pr_err("unable to request IRQ%d for NDS PMU counters\n", - irq); - return err; - } - return 0; -} - -static void cpu_pmu_free_irq(struct nds32_pmu *cpu_pmu) -{ - int irq; - struct platform_device *pmu_device = cpu_pmu->plat_device; - - irq = platform_get_irq(pmu_device, 0); - if (irq >= 0) - free_irq(irq, cpu_pmu); -} - -static void cpu_pmu_init(struct nds32_pmu *cpu_pmu) -{ - int cpu; - struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); - - raw_spin_lock_init(&events->pmu_lock); - - cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events; - cpu_pmu->request_irq = cpu_pmu_request_irq; - cpu_pmu->free_irq = cpu_pmu_free_irq; - - /* Ensure the PMU has sane values out of reset. */ - if (cpu_pmu->reset) - on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); -} - -static const struct of_device_id cpu_pmu_of_device_ids[] = { - {.compatible = "andestech,nds32v3-pmu", - .data = device_pmu_init}, - {}, -}; - -static int cpu_pmu_device_probe(struct platform_device *pdev) -{ - const struct of_device_id *of_id; - int (*init_fn)(struct nds32_pmu *nds32_pmu); - struct device_node *node = pdev->dev.of_node; - struct nds32_pmu *pmu; - int ret = -ENODEV; - - if (cpu_pmu) { - pr_notice("[perf] attempt to register multiple PMU devices!\n"); - return -ENOSPC; - } - - pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); - if (!pmu) - return -ENOMEM; - - of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node); - if (node && of_id) { - init_fn = of_id->data; - ret = init_fn(pmu); - } else { - ret = probe_current_pmu(pmu); - } - - if (ret) { - pr_notice("[perf] failed to probe PMU!\n"); - goto out_free; - } - - cpu_pmu = pmu; - cpu_pmu->plat_device = pdev; - cpu_pmu_init(cpu_pmu); - ret = nds32_pmu_register(cpu_pmu, PERF_TYPE_RAW); - - if (!ret) - return 0; - -out_free: - pr_notice("[perf] failed to register PMU devices!\n"); - kfree(pmu); - return ret; -} - -static struct platform_driver cpu_pmu_driver = { - .driver = { - .name = "nds32-pfm", - .of_match_table = cpu_pmu_of_device_ids, - }, - .probe = cpu_pmu_device_probe, - .id_table = cpu_pmu_plat_device_ids, -}; - -static int __init register_pmu_driver(void) -{ - int err = 0; - - err = platform_driver_register(&cpu_pmu_driver); - if (err) - pr_notice("[perf] PMU initialization failed\n"); - else - pr_notice("[perf] PMU initialization done\n"); - - return err; -} - -device_initcall(register_pmu_driver); - -/* - * References: arch/nds32/kernel/traps.c:__dump() - * You will need to know the NDS ABI first. - */ -static int unwind_frame_kernel(struct stackframe *frame) -{ - int graph = 0; -#ifdef CONFIG_FRAME_POINTER - /* 0x3 means misalignment */ - if (!kstack_end((void *)frame->fp) && - !((unsigned long)frame->fp & 0x3) && - ((unsigned long)frame->fp >= TASK_SIZE)) { - /* - * The array index is based on the ABI, the below graph - * illustrate the reasons. - * Function call procedure: "smw" and "lmw" will always - * update SP and FP for you automatically. - * - * Stack Relative Address - * | | 0 - * ---- - * |LP| <-- SP(before smw) <-- FP(after smw) -1 - * ---- - * |FP| -2 - * ---- - * | | <-- SP(after smw) -3 - */ - frame->lp = ((unsigned long *)frame->fp)[-1]; - frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET]; - /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */ - if (__kernel_text_address(frame->lp)) - frame->lp = ftrace_graph_ret_addr - (NULL, &graph, frame->lp, NULL); - - return 0; - } else { - return -EPERM; - } -#else - /* - * You can refer to arch/nds32/kernel/traps.c:__dump() - * Treat "sp" as "fp", but the "sp" is one frame ahead of "fp". - * And, the "sp" is not always correct. - * - * Stack Relative Address - * | | 0 - * ---- - * |LP| <-- SP(before smw) -1 - * ---- - * | | <-- SP(after smw) -2 - * ---- - */ - if (!kstack_end((void *)frame->sp)) { - frame->lp = ((unsigned long *)frame->sp)[1]; - /* TODO: How to deal with the value in first - * "sp" is not correct? - */ - if (__kernel_text_address(frame->lp)) - frame->lp = ftrace_graph_ret_addr - (tsk, &graph, frame->lp, NULL); - - frame->sp = ((unsigned long *)frame->sp) + 1; - - return 0; - } else { - return -EPERM; - } -#endif -} - -static void notrace -walk_stackframe(struct stackframe *frame, - int (*fn_record)(struct stackframe *, void *), - void *data) -{ - while (1) { - int ret; - - if (fn_record(frame, data)) - break; - - ret = unwind_frame_kernel(frame); - if (ret < 0) - break; - } -} - -/* - * Gets called by walk_stackframe() for every stackframe. This will be called - * whist unwinding the stackframe and is like a subroutine return so we use - * the PC. - */ -static int callchain_trace(struct stackframe *fr, void *data) -{ - struct perf_callchain_entry_ctx *entry = data; - - perf_callchain_store(entry, fr->lp); - return 0; -} - -/* - * Get the return address for a single stackframe and return a pointer to the - * next frame tail. - */ -static unsigned long -user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp) -{ - struct frame_tail buftail; - unsigned long lp = 0; - unsigned long *user_frame_tail = - (unsigned long *)(fp - (unsigned long)sizeof(buftail)); - - /* Check accessibility of one struct frame_tail beyond */ - if (!access_ok(user_frame_tail, sizeof(buftail))) - return 0; - if (__copy_from_user_inatomic - (&buftail, user_frame_tail, sizeof(buftail))) - return 0; - - /* - * Refer to unwind_frame_kernel() for more illurstration - */ - lp = buftail.stack_lp; /* ((unsigned long *)fp)[-1] */ - fp = buftail.stack_fp; /* ((unsigned long *)fp)[FP_OFFSET] */ - perf_callchain_store(entry, lp); - return fp; -} - -static unsigned long -user_backtrace_opt_size(struct perf_callchain_entry_ctx *entry, - unsigned long fp) -{ - struct frame_tail_opt_size buftail; - unsigned long lp = 0; - - unsigned long *user_frame_tail = - (unsigned long *)(fp - (unsigned long)sizeof(buftail)); - - /* Check accessibility of one struct frame_tail beyond */ - if (!access_ok(user_frame_tail, sizeof(buftail))) - return 0; - if (__copy_from_user_inatomic - (&buftail, user_frame_tail, sizeof(buftail))) - return 0; - - /* - * Refer to unwind_frame_kernel() for more illurstration - */ - lp = buftail.stack_lp; /* ((unsigned long *)fp)[-1] */ - fp = buftail.stack_fp; /* ((unsigned long *)fp)[FP_OFFSET] */ - - perf_callchain_store(entry, lp); - return fp; -} - -/* - * This will be called when the target is in user mode - * This function will only be called when we use - * "PERF_SAMPLE_CALLCHAIN" in - * kernel/events/core.c:perf_prepare_sample() - * - * How to trigger perf_callchain_[user/kernel] : - * $ perf record -e cpu-clock --call-graph fp ./program - * $ perf report --call-graph - */ -unsigned long leaf_fp; -void -perf_callchain_user(struct perf_callchain_entry_ctx *entry, - struct pt_regs *regs) -{ - unsigned long fp = 0; - unsigned long gp = 0; - unsigned long lp = 0; - unsigned long sp = 0; - unsigned long *user_frame_tail; - - leaf_fp = 0; - - perf_callchain_store(entry, regs->ipc); - fp = regs->fp; - gp = regs->gp; - lp = regs->lp; - sp = regs->sp; - if (entry->nr < PERF_MAX_STACK_DEPTH && - (unsigned long)fp && !((unsigned long)fp & 0x7) && fp > sp) { - user_frame_tail = - (unsigned long *)(fp - (unsigned long)sizeof(fp)); - - if (!access_ok(user_frame_tail, sizeof(fp))) - return; - - if (__copy_from_user_inatomic - (&leaf_fp, user_frame_tail, sizeof(fp))) - return; - - if (leaf_fp == lp) { - /* - * Maybe this is non leaf function - * with optimize for size, - * or maybe this is the function - * with optimize for size - */ - struct frame_tail buftail; - - user_frame_tail = - (unsigned long *)(fp - - (unsigned long)sizeof(buftail)); - - if (!access_ok(user_frame_tail, sizeof(buftail))) - return; - - if (__copy_from_user_inatomic - (&buftail, user_frame_tail, sizeof(buftail))) - return; - - if (buftail.stack_fp == gp) { - /* non leaf function with optimize - * for size condition - */ - struct frame_tail_opt_size buftail_opt_size; - - user_frame_tail = - (unsigned long *)(fp - (unsigned long) - sizeof(buftail_opt_size)); - - if (!access_ok(user_frame_tail, - sizeof(buftail_opt_size))) - return; - - if (__copy_from_user_inatomic - (&buftail_opt_size, user_frame_tail, - sizeof(buftail_opt_size))) - return; - - perf_callchain_store(entry, lp); - fp = buftail_opt_size.stack_fp; - - while ((entry->nr < PERF_MAX_STACK_DEPTH) && - (unsigned long)fp && - !((unsigned long)fp & 0x7) && - fp > sp) { - sp = fp; - fp = user_backtrace_opt_size(entry, fp); - } - - } else { - /* this is the function - * without optimize for size - */ - fp = buftail.stack_fp; - perf_callchain_store(entry, lp); - while ((entry->nr < PERF_MAX_STACK_DEPTH) && - (unsigned long)fp && - !((unsigned long)fp & 0x7) && - fp > sp) { - sp = fp; - fp = user_backtrace(entry, fp); - } - } - } else { - /* this is leaf function */ - fp = leaf_fp; - perf_callchain_store(entry, lp); - - /* previous function callcahin */ - while ((entry->nr < PERF_MAX_STACK_DEPTH) && - (unsigned long)fp && - !((unsigned long)fp & 0x7) && fp > sp) { - sp = fp; - fp = user_backtrace(entry, fp); - } - } - return; - } -} - -/* This will be called when the target is in kernel mode */ -void -perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, - struct pt_regs *regs) -{ - struct stackframe fr; - - fr.fp = regs->fp; - fr.lp = regs->lp; - fr.sp = regs->sp; - walk_stackframe(&fr, callchain_trace, entry); -} - -unsigned long perf_instruction_pointer(struct pt_regs *regs) -{ - return instruction_pointer(regs); -} - -unsigned long perf_misc_flags(struct pt_regs *regs) -{ - int misc = 0; - - if (user_mode(regs)) - misc |= PERF_RECORD_MISC_USER; - else - misc |= PERF_RECORD_MISC_KERNEL; - - return misc; -} |