summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/nds32/Kconfig1
-rw-r--r--arch/nds32/boot/dts/ae3xx.dts5
-rw-r--r--arch/nds32/include/asm/Kbuild1
-rw-r--r--arch/nds32/include/asm/perf_event.h16
-rw-r--r--arch/nds32/include/asm/pmu.h386
-rw-r--r--arch/nds32/include/asm/stacktrace.h39
-rw-r--r--arch/nds32/kernel/Makefile3
-rw-r--r--arch/nds32/kernel/perf_event_cpu.c1223
-rw-r--r--arch/nds32/mm/fault.c13
-rw-r--r--tools/include/asm/barrier.h2
-rw-r--r--tools/perf/arch/nds32/Build1
-rw-r--r--tools/perf/arch/nds32/util/Build1
-rw-r--r--tools/perf/arch/nds32/util/header.c29
-rw-r--r--tools/perf/pmu-events/arch/nds32/mapfile.csv15
-rw-r--r--tools/perf/pmu-events/arch/nds32/n13/atcpmu.json290
15 files changed, 2019 insertions, 6 deletions
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 7a04adacb2f0..97786a865023 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -30,6 +30,7 @@ config NDS32
select HAVE_ARCH_TRACEHOOK
select HAVE_DEBUG_KMEMLEAK
select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_PERF_EVENTS
select IRQ_DOMAIN
select LOCKDEP_SUPPORT
select MODULES_USE_ELF_RELA
diff --git a/arch/nds32/boot/dts/ae3xx.dts b/arch/nds32/boot/dts/ae3xx.dts
index bb39749a6673..16a9f54a805e 100644
--- a/arch/nds32/boot/dts/ae3xx.dts
+++ b/arch/nds32/boot/dts/ae3xx.dts
@@ -82,4 +82,9 @@
interrupts = <18>;
};
};
+
+ pmu {
+ compatible = "andestech,nds32v3-pmu";
+ interrupts= <13>;
+ };
};
diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild
index dbc4e5422550..f81b633d5379 100644
--- a/arch/nds32/include/asm/Kbuild
+++ b/arch/nds32/include/asm/Kbuild
@@ -36,6 +36,7 @@ generic-y += kprobes.h
generic-y += kvm_para.h
generic-y += limits.h
generic-y += local.h
+generic-y += local64.h
generic-y += mm-arch-hooks.h
generic-y += mman.h
generic-y += parport.h
diff --git a/arch/nds32/include/asm/perf_event.h b/arch/nds32/include/asm/perf_event.h
new file mode 100644
index 000000000000..fcdff02acc14
--- /dev/null
+++ b/arch/nds32/include/asm/perf_event.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2008-2018 Andes Technology Corporation */
+
+#ifndef __ASM_PERF_EVENT_H
+#define __ASM_PERF_EVENT_H
+
+/*
+ * This file is request by Perf,
+ * please refer to tools/perf/design.txt for more details
+ */
+struct pt_regs;
+unsigned long perf_instruction_pointer(struct pt_regs *regs);
+unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs) perf_misc_flags(regs)
+
+#endif
diff --git a/arch/nds32/include/asm/pmu.h b/arch/nds32/include/asm/pmu.h
new file mode 100644
index 000000000000..e1ac0b0b8bcf
--- /dev/null
+++ b/arch/nds32/include/asm/pmu.h
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2008-2018 Andes Technology Corporation */
+
+#ifndef __ASM_PMU_H
+#define __ASM_PMU_H
+
+#include <linux/interrupt.h>
+#include <linux/perf_event.h>
+#include <asm/unistd.h>
+#include <asm/bitfield.h>
+
+/* Has special meaning for perf core implementation */
+#define HW_OP_UNSUPPORTED 0x0
+#define C(_x) PERF_COUNT_HW_CACHE_##_x
+#define CACHE_OP_UNSUPPORTED 0x0
+
+/* Enough for both software and hardware defined events */
+#define SOFTWARE_EVENT_MASK 0xFF
+
+#define PFM_OFFSET_MAGIC_0 2 /* DO NOT START FROM 0 */
+#define PFM_OFFSET_MAGIC_1 (PFM_OFFSET_MAGIC_0 + 36)
+#define PFM_OFFSET_MAGIC_2 (PFM_OFFSET_MAGIC_1 + 36)
+
+enum { PFMC0, PFMC1, PFMC2, MAX_COUNTERS };
+
+u32 PFM_CTL_OVF[3] = { PFM_CTL_mskOVF0, PFM_CTL_mskOVF1,
+ PFM_CTL_mskOVF2 };
+u32 PFM_CTL_EN[3] = { PFM_CTL_mskEN0, PFM_CTL_mskEN1,
+ PFM_CTL_mskEN2 };
+u32 PFM_CTL_OFFSEL[3] = { PFM_CTL_offSEL0, PFM_CTL_offSEL1,
+ PFM_CTL_offSEL2 };
+u32 PFM_CTL_IE[3] = { PFM_CTL_mskIE0, PFM_CTL_mskIE1, PFM_CTL_mskIE2 };
+u32 PFM_CTL_KS[3] = { PFM_CTL_mskKS0, PFM_CTL_mskKS1, PFM_CTL_mskKS2 };
+u32 PFM_CTL_KU[3] = { PFM_CTL_mskKU0, PFM_CTL_mskKU1, PFM_CTL_mskKU2 };
+u32 PFM_CTL_SEL[3] = { PFM_CTL_mskSEL0, PFM_CTL_mskSEL1, PFM_CTL_mskSEL2 };
+/*
+ * Perf Events' indices
+ */
+#define NDS32_IDX_CYCLE_COUNTER 0
+#define NDS32_IDX_COUNTER0 1
+#define NDS32_IDX_COUNTER1 2
+
+/* The events for a given PMU register set. */
+struct pmu_hw_events {
+ /*
+ * The events that are active on the PMU for the given index.
+ */
+ struct perf_event *events[MAX_COUNTERS];
+
+ /*
+ * A 1 bit for an index indicates that the counter is being used for
+ * an event. A 0 means that the counter can be used.
+ */
+ unsigned long used_mask[BITS_TO_LONGS(MAX_COUNTERS)];
+
+ /*
+ * Hardware lock to serialize accesses to PMU registers. Needed for the
+ * read/modify/write sequences.
+ */
+ raw_spinlock_t pmu_lock;
+};
+
+struct nds32_pmu {
+ struct pmu pmu;
+ cpumask_t active_irqs;
+ char *name;
+ irqreturn_t (*handle_irq)(int irq_num, void *dev);
+ void (*enable)(struct perf_event *event);
+ void (*disable)(struct perf_event *event);
+ int (*get_event_idx)(struct pmu_hw_events *hw_events,
+ struct perf_event *event);
+ int (*set_event_filter)(struct hw_perf_event *evt,
+ struct perf_event_attr *attr);
+ u32 (*read_counter)(struct perf_event *event);
+ void (*write_counter)(struct perf_event *event, u32 val);
+ void (*start)(struct nds32_pmu *nds32_pmu);
+ void (*stop)(struct nds32_pmu *nds32_pmu);
+ void (*reset)(void *data);
+ int (*request_irq)(struct nds32_pmu *nds32_pmu, irq_handler_t handler);
+ void (*free_irq)(struct nds32_pmu *nds32_pmu);
+ int (*map_event)(struct perf_event *event);
+ int num_events;
+ atomic_t active_events;
+ u64 max_period;
+ struct platform_device *plat_device;
+ struct pmu_hw_events *(*get_hw_events)(void);
+};
+
+#define to_nds32_pmu(p) (container_of(p, struct nds32_pmu, pmu))
+
+int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type);
+
+u64 nds32_pmu_event_update(struct perf_event *event);
+
+int nds32_pmu_event_set_period(struct perf_event *event);
+
+/*
+ * Common NDS32 SPAv3 event types
+ *
+ * Note: An implementation may not be able to count all of these events
+ * but the encodings are considered to be `reserved' in the case that
+ * they are not available.
+ *
+ * SEL_TOTAL_CYCLES will add an offset is due to ZERO is defined as
+ * NOT_SUPPORTED EVENT mapping in generic perf code.
+ * You will need to deal it in the event writing implementation.
+ */
+enum spav3_counter_0_perf_types {
+ SPAV3_0_SEL_BASE = -1 + PFM_OFFSET_MAGIC_0, /* counting symbol */
+ SPAV3_0_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_0,
+ SPAV3_0_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_0,
+ SPAV3_0_SEL_LAST /* counting symbol */
+};
+
+enum spav3_counter_1_perf_types {
+ SPAV3_1_SEL_BASE = -1 + PFM_OFFSET_MAGIC_1, /* counting symbol */
+ SPAV3_1_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_CONDITIONAL_BRANCH = 2 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_TAKEN_CONDITIONAL_BRANCH = 3 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_PREFETCH_INSTRUCTION = 4 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_RET_INST = 5 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_JR_INST = 6 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_JAL_JRAL_INST = 7 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_NOP_INST = 8 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_SCW_INST = 9 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_ISB_DSB_INST = 10 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_CCTL_INST = 11 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_TAKEN_INTERRUPTS = 12 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_LOADS_COMPLETED = 13 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_UITLB_ACCESS = 14 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_UDTLB_ACCESS = 15 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_MTLB_ACCESS = 16 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_CODE_CACHE_ACCESS = 17 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_DATA_DEPENDENCY_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_DATA_CACHE_MISS_STALL_CYCLES = 19 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_DATA_CACHE_ACCESS = 20 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS = 22 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS = 23 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_ILM_ACCESS = 24 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_LSU_BIU_CYCLES = 25 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_HPTWK_BIU_CYCLES = 26 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_DMA_BIU_CYCLES = 27 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_CODE_CACHE_FILL_BIU_CYCLES = 28 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_LEGAL_UNALIGN_DCACHE_ACCESS = 29 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_PUSH25 = 30 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_SYSCALLS_INST = 31 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_LAST /* counting symbol */
+};
+
+enum spav3_counter_2_perf_types {
+ SPAV3_2_SEL_BASE = -1 + PFM_OFFSET_MAGIC_2, /* counting symbol */
+ SPAV3_2_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_CONDITIONAL_BRANCH_MISPREDICT = 2 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_TAKEN_CONDITIONAL_BRANCH_MISPREDICT =
+ 3 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_PREFETCH_INSTRUCTION_CACHE_HIT = 4 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_RET_MISPREDICT = 5 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_IMMEDIATE_J_INST = 6 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_MULTIPLY_INST = 7 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_16_BIT_INST = 8 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_FAILED_SCW_INST = 9 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_LD_AFTER_ST_CONFLICT_REPLAYS = 10 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_TAKEN_EXCEPTIONS = 12 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_STORES_COMPLETED = 13 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_UITLB_MISS = 14 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_UDTLB_MISS = 15 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_MTLB_MISS = 16 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_CODE_CACHE_MISS = 17 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_EMPTY_INST_QUEUE_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_DATA_WRITE_BACK = 19 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_LOAD_DATA_CACHE_MISS = 22 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_STORE_DATA_CACHE_MISS = 23 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_DLM_ACCESS = 24 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_LSU_BIU_REQUEST = 25 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_HPTWK_BIU_REQUEST = 26 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_DMA_BIU_REQUEST = 27 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_CODE_CACHE_FILL_BIU_REQUEST = 28 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_EXTERNAL_EVENTS = 29 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_POP25 = 30 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_LAST /* counting symbol */
+};
+
+/* Get converted event counter index */
+static inline int get_converted_event_idx(unsigned long event)
+{
+ int idx;
+
+ if ((event) > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST) {
+ idx = 0;
+ } else if ((event) > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST) {
+ idx = 1;
+ } else if ((event) > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST) {
+ idx = 2;
+ } else {
+ pr_err("GET_CONVERTED_EVENT_IDX PFM counter range error\n");
+ return -EPERM;
+ }
+
+ return idx;
+}
+
+/* Get converted hardware event number */
+static inline u32 get_converted_evet_hw_num(u32 event)
+{
+ if (event > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST)
+ event -= PFM_OFFSET_MAGIC_0;
+ else if (event > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST)
+ event -= PFM_OFFSET_MAGIC_1;
+ else if (event > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST)
+ event -= PFM_OFFSET_MAGIC_2;
+ else if (event != 0)
+ pr_err("GET_CONVERTED_EVENT_HW_NUM PFM counter range error\n");
+
+ return event;
+}
+
+/*
+ * NDS32 HW events mapping
+ *
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned int nds32_pfm_perf_map[PERF_COUNT_HW_MAX] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = SPAV3_0_SEL_TOTAL_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = SPAV3_1_SEL_COMPLETED_INSTRUCTION,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = SPAV3_1_SEL_DATA_CACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = SPAV3_2_SEL_DATA_CACHE_MISS,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED,
+ [PERF_COUNT_HW_BRANCH_MISSES] = HW_OP_UNSUPPORTED,
+ [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = HW_OP_UNSUPPORTED
+};
+
+static const unsigned int nds32_pfm_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ [C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] =
+ SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS,
+ [C(RESULT_MISS)] =
+ SPAV3_2_SEL_LOAD_DATA_CACHE_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] =
+ SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS,
+ [C(RESULT_MISS)] =
+ SPAV3_2_SEL_STORE_DATA_CACHE_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] =
+ CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] =
+ SPAV3_1_SEL_CODE_CACHE_ACCESS,
+ [C(RESULT_MISS)] =
+ SPAV3_2_SEL_CODE_CACHE_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] =
+ SPAV3_1_SEL_CODE_CACHE_ACCESS,
+ [C(RESULT_MISS)] =
+ SPAV3_2_SEL_CODE_CACHE_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ /* TODO: L2CC */
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ /* NDS32 PMU does not support TLB read/write hit/miss,
+ * However, it can count access/miss, which mixed with read and write.
+ * Therefore, only READ counter will use it.
+ * We do as possible as we can.
+ */
+ [C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] =
+ SPAV3_1_SEL_UDTLB_ACCESS,
+ [C(RESULT_MISS)] =
+ SPAV3_2_SEL_UDTLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] =
+ CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] =
+ SPAV3_1_SEL_UITLB_ACCESS,
+ [C(RESULT_MISS)] =
+ SPAV3_2_SEL_UITLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] =
+ CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] =
+ CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(BPU)] = { /* What is BPU? */
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] =
+ CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] =
+ CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] =
+ CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(NODE)] = { /* What is NODE? */
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] =
+ CACHE_OP_UNSUPPORTED,
+ },
+ },
+};
+
+int nds32_pmu_map_event(struct perf_event *event,
+ const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
+ const unsigned int (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask);
+
+#endif /* __ASM_PMU_H */
diff --git a/arch/nds32/include/asm/stacktrace.h b/arch/nds32/include/asm/stacktrace.h
new file mode 100644
index 000000000000..6bf7c777bda4
--- /dev/null
+++ b/arch/nds32/include/asm/stacktrace.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2008-2018 Andes Technology Corporation */
+
+#ifndef __ASM_STACKTRACE_H
+#define __ASM_STACKTRACE_H
+
+/* Kernel callchain */
+struct stackframe {
+ unsigned long fp;
+ unsigned long sp;
+ unsigned long lp;
+};
+
+/*
+ * struct frame_tail: User callchain
+ * IMPORTANT:
+ * This struct is used for call-stack walking,
+ * the order and types matters.
+ * Do not use array, it only stores sizeof(pointer)
+ *
+ * The details can refer to arch/arm/kernel/perf_event.c
+ */
+struct frame_tail {
+ unsigned long stack_fp;
+ unsigned long stack_lp;
+};
+
+/* For User callchain with optimize for size */
+struct frame_tail_opt_size {
+ unsigned long stack_r6;
+ unsigned long stack_fp;
+ unsigned long stack_gp;
+ unsigned long stack_lp;
+};
+
+extern void
+get_real_ret_addr(unsigned long *addr, struct task_struct *tsk, int *graph);
+
+#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/nds32/kernel/Makefile b/arch/nds32/kernel/Makefile
index 27cded39fa66..f52bd2744f50 100644
--- a/arch/nds32/kernel/Makefile
+++ b/arch/nds32/kernel/Makefile
@@ -4,7 +4,6 @@
CPPFLAGS_vmlinux.lds := -DTEXTADDR=$(TEXTADDR)
AFLAGS_head.o := -DTEXTADDR=$(TEXTADDR)
-
# Object file lists.
obj-y := ex-entry.o ex-exit.o ex-scall.o irq.o \
@@ -16,10 +15,10 @@ obj-$(CONFIG_MODULES) += nds32_ksyms.o module.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_OF) += devtree.o
obj-$(CONFIG_CACHE_L2) += atl2c.o
+obj-$(CONFIG_PERF_EVENTS) += perf_event_cpu.o
extra-y := head.o vmlinux.lds
-
obj-y += vdso/
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o
diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c
new file mode 100644
index 000000000000..a6e723d0fdbc
--- /dev/null
+++ b/arch/nds32/kernel/perf_event_cpu.c
@@ -0,0 +1,1223 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2008-2017 Andes Technology Corporation
+ *
+ * Reference ARMv7: Jean Pihet <jpihet@mvista.com>
+ * 2010 (c) MontaVista Software, LLC.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/bitmap.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/pm_runtime.h>
+#include <linux/ftrace.h>
+#include <linux/uaccess.h>
+#include <linux/sched/clock.h>
+#include <linux/percpu-defs.h>
+
+#include <asm/pmu.h>
+#include <asm/irq_regs.h>
+#include <asm/nds32.h>
+#include <asm/stacktrace.h>
+#include <asm/perf_event.h>
+#include <nds32_intrinsic.h>
+
+/* Set at runtime when we know what CPU type we are. */
+static struct nds32_pmu *cpu_pmu;
+
+static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
+static void nds32_pmu_start(struct nds32_pmu *cpu_pmu);
+static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu);
+static struct platform_device_id cpu_pmu_plat_device_ids[] = {
+ {.name = "nds32-pfm"},
+ {},
+};
+
+static int nds32_pmu_map_cache_event(const unsigned int (*cache_map)
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX], u64 config)
+{
+ unsigned int cache_type, cache_op, cache_result, ret;
+
+ cache_type = (config >> 0) & 0xff;
+ if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+ return -EINVAL;
+
+ cache_op = (config >> 8) & 0xff;
+ if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+ return -EINVAL;
+
+ cache_result = (config >> 16) & 0xff;
+ if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+ return -EINVAL;
+
+ ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
+
+ if (ret == CACHE_OP_UNSUPPORTED)
+ return -ENOENT;
+
+ return ret;
+}
+
+static int
+nds32_pmu_map_hw_event(const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
+ u64 config)
+{
+ int mapping;
+
+ if (config >= PERF_COUNT_HW_MAX)
+ return -ENOENT;
+
+ mapping = (*event_map)[config];
+ return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
+}
+
+static int nds32_pmu_map_raw_event(u32 raw_event_mask, u64 config)
+{
+ int ev_type = (int)(config & raw_event_mask);
+ int idx = config >> 8;
+
+ switch (idx) {
+ case 0:
+ ev_type = PFM_OFFSET_MAGIC_0 + ev_type;
+ if (ev_type >= SPAV3_0_SEL_LAST || ev_type <= SPAV3_0_SEL_BASE)
+ return -ENOENT;
+ break;
+ case 1:
+ ev_type = PFM_OFFSET_MAGIC_1 + ev_type;
+ if (ev_type >= SPAV3_1_SEL_LAST || ev_type <= SPAV3_1_SEL_BASE)
+ return -ENOENT;
+ break;
+ case 2:
+ ev_type = PFM_OFFSET_MAGIC_2 + ev_type;
+ if (ev_type >= SPAV3_2_SEL_LAST || ev_type <= SPAV3_2_SEL_BASE)
+ return -ENOENT;
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ return ev_type;
+}
+
+int
+nds32_pmu_map_event(struct perf_event *event,
+ const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
+ const unsigned int (*cache_map)
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask)
+{
+ u64 config = event->attr.config;
+
+ switch (event->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ return nds32_pmu_map_hw_event(event_map, config);
+ case PERF_TYPE_HW_CACHE:
+ return nds32_pmu_map_cache_event(cache_map, config);
+ case PERF_TYPE_RAW:
+ return nds32_pmu_map_raw_event(raw_event_mask, config);
+ }
+
+ return -ENOENT;
+}
+
+static int nds32_spav3_map_event(struct perf_event *event)
+{
+ return nds32_pmu_map_event(event, &nds32_pfm_perf_map,
+ &nds32_pfm_perf_cache_map, SOFTWARE_EVENT_MASK);
+}
+
+static inline u32 nds32_pfm_getreset_flags(void)
+{
+ /* Read overflow status */
+ u32 val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+ u32 old_val = val;
+
+ /* Write overflow bit to clear status, and others keep it 0 */
+ u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
+
+ __nds32__mtsr(val | ov_flag, NDS32_SR_PFM_CTL);
+
+ return old_val;
+}
+
+static inline int nds32_pfm_has_overflowed(u32 pfm)
+{
+ u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
+
+ return pfm & ov_flag;
+}
+
+static inline int nds32_pfm_counter_has_overflowed(u32 pfm, int idx)
+{
+ u32 mask = 0;
+
+ switch (idx) {
+ case 0:
+ mask = PFM_CTL_OVF[0];
+ break;
+ case 1:
+ mask = PFM_CTL_OVF[1];
+ break;
+ case 2:
+ mask = PFM_CTL_OVF[2];
+ break;
+ default:
+ pr_err("%s index wrong\n", __func__);
+ break;
+ }
+ return pfm & mask;
+}
+
+/*
+ * Set the next IRQ period, based on the hwc->period_left value.
+ * To be called with the event disabled in hw:
+ */
+int nds32_pmu_event_set_period(struct perf_event *event)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ s64 left = local64_read(&hwc->period_left);
+ s64 period = hwc->sample_period;
+ int ret = 0;
+
+ /* The period may have been changed by PERF_EVENT_IOC_PERIOD */
+ if (unlikely(period != hwc->last_period))
+ left = period - (hwc->last_period - left);
+
+ if (unlikely(left <= -period)) {
+ left = period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ ret = 1;
+ }
+
+ if (unlikely(left <= 0)) {
+ left += period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ ret = 1;
+ }
+
+ if (left > (s64)nds32_pmu->max_period)
+ left = nds32_pmu->max_period;
+
+ /*
+ * The hw event starts counting from this event offset,
+ * mark it to be able to extract future "deltas":
+ */
+ local64_set(&hwc->prev_count, (u64)(-left));
+
+ nds32_pmu->write_counter(event, (u64)(-left) & nds32_pmu->max_period);
+
+ perf_event_update_userpage(event);
+
+ return ret;
+}
+
+static irqreturn_t nds32_pmu_handle_irq(int irq_num, void *dev)
+{
+ u32 pfm;
+ struct perf_sample_data data;
+ struct nds32_pmu *cpu_pmu = (struct nds32_pmu *)dev;
+ struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
+ struct pt_regs *regs;
+ int idx;
+ /*
+ * Get and reset the IRQ flags
+ */
+ pfm = nds32_pfm_getreset_flags();
+
+ /*
+ * Did an overflow occur?
+ */
+ if (!nds32_pfm_has_overflowed(pfm))
+ return IRQ_NONE;
+
+ /*
+ * Handle the counter(s) overflow(s)
+ */
+ regs = get_irq_regs();
+
+ nds32_pmu_stop(cpu_pmu);
+ for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
+ struct perf_event *event = cpuc->events[idx];
+ struct hw_perf_event *hwc;
+
+ /* Ignore if we don't have an event. */
+ if (!event)
+ continue;
+
+ /*
+ * We have a single interrupt for all counters. Check that
+ * each counter has overflowed before we process it.
+ */
+ if (!nds32_pfm_counter_has_overflowed(pfm, idx))
+ continue;
+
+ hwc = &event->hw;
+ nds32_pmu_event_update(event);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!nds32_pmu_event_set_period(event))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ cpu_pmu->disable(event);
+ }
+ nds32_pmu_start(cpu_pmu);
+ /*
+ * Handle the pending perf events.
+ *
+ * Note: this call *must* be run with interrupts disabled. For
+ * platforms that can have the PMU interrupts raised as an NMI, this
+ * will not work.
+ */
+ irq_work_run();
+
+ return IRQ_HANDLED;
+}
+
+static inline int nds32_pfm_counter_valid(struct nds32_pmu *cpu_pmu, int idx)
+{
+ return ((idx >= 0) && (idx < cpu_pmu->num_events));
+}
+
+static inline int nds32_pfm_disable_counter(int idx)
+{
+ unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+ u32 mask = 0;
+
+ mask = PFM_CTL_EN[idx];
+ val &= ~mask;
+ val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+ __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+ return idx;
+}
+
+/*
+ * Add an event filter to a given event.
+ */
+static int nds32_pmu_set_event_filter(struct hw_perf_event *event,
+ struct perf_event_attr *attr)
+{
+ unsigned long config_base = 0;
+ int idx = event->idx;
+ unsigned long no_kernel_tracing = 0;
+ unsigned long no_user_tracing = 0;
+ /* If index is -1, do not do anything */
+ if (idx == -1)
+ return 0;
+
+ no_kernel_tracing = PFM_CTL_KS[idx];
+ no_user_tracing = PFM_CTL_KU[idx];
+ /*
+ * Default: enable both kernel and user mode tracing.
+ */
+ if (attr->exclude_user)
+ config_base |= no_user_tracing;
+
+ if (attr->exclude_kernel)
+ config_base |= no_kernel_tracing;
+
+ /*
+ * Install the filter into config_base as this is used to
+ * construct the event type.
+ */
+ event->config_base |= config_base;
+ return 0;
+}
+
+static inline void nds32_pfm_write_evtsel(int idx, u32 evnum)
+{
+ u32 offset = 0;
+ u32 ori_val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+ u32 ev_mask = 0;
+ u32 no_kernel_mask = 0;
+ u32 no_user_mask = 0;
+ u32 val;
+
+ offset = PFM_CTL_OFFSEL[idx];
+ /* Clear previous mode selection, and write new one */
+ no_kernel_mask = PFM_CTL_KS[idx];
+ no_user_mask = PFM_CTL_KU[idx];
+ ori_val &= ~no_kernel_mask;
+ ori_val &= ~no_user_mask;
+ if (evnum & no_kernel_mask)
+ ori_val |= no_kernel_mask;
+
+ if (evnum & no_user_mask)
+ ori_val |= no_user_mask;
+
+ /* Clear previous event selection */
+ ev_mask = PFM_CTL_SEL[idx];
+ ori_val &= ~ev_mask;
+ evnum &= SOFTWARE_EVENT_MASK;
+
+ /* undo the linear mapping */
+ evnum = get_converted_evet_hw_num(evnum);
+ val = ori_val | (evnum << offset);
+ val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+ __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+}
+
+static inline int nds32_pfm_enable_counter(int idx)
+{
+ unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+ u32 mask = 0;
+
+ mask = PFM_CTL_EN[idx];
+ val |= mask;
+ val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+ __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+ return idx;
+}
+
+static inline int nds32_pfm_enable_intens(int idx)
+{
+ unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+ u32 mask = 0;
+
+ mask = PFM_CTL_IE[idx];
+ val |= mask;
+ val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+ __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+ return idx;
+}
+
+static inline int nds32_pfm_disable_intens(int idx)
+{
+ unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+ u32 mask = 0;
+
+ mask = PFM_CTL_IE[idx];
+ val &= ~mask;
+ val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+ __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+ return idx;
+}
+
+static int event_requires_mode_exclusion(struct perf_event_attr *attr)
+{
+ /* Other modes NDS32 does not support */
+ return attr->exclude_user || attr->exclude_kernel;
+}
+
+static void nds32_pmu_enable_event(struct perf_event *event)
+{
+ unsigned long flags;
+ unsigned int evnum = 0;
+ struct hw_perf_event *hwc = &event->hw;
+ struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ int idx = hwc->idx;
+
+ if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU enabling wrong pfm counter IRQ enable\n");
+ return;
+ }
+
+ /*
+ * Enable counter and interrupt, and set the counter to count
+ * the event that we're interested in.
+ */
+ raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+ /*
+ * Disable counter
+ */
+ nds32_pfm_disable_counter(idx);
+
+ /*
+ * Check whether we need to exclude the counter from certain modes.
+ */
+ if ((!cpu_pmu->set_event_filter ||
+ cpu_pmu->set_event_filter(hwc, &event->attr)) &&
+ event_requires_mode_exclusion(&event->attr)) {
+ pr_notice
+ ("NDS32 performance counters do not support mode exclusion\n");
+ hwc->config_base = 0;
+ }
+ /* Write event */
+ evnum = hwc->config_base;
+ nds32_pfm_write_evtsel(idx, evnum);
+
+ /*
+ * Enable interrupt for this counter
+ */
+ nds32_pfm_enable_intens(idx);
+
+ /*
+ * Enable counter
+ */
+ nds32_pfm_enable_counter(idx);
+
+ raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void nds32_pmu_disable_event(struct perf_event *event)
+{
+ unsigned long flags;
+ struct hw_perf_event *hwc = &event->hw;
+ struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ int idx = hwc->idx;
+
+ if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU disabling wrong pfm counter IRQ enable %d\n", idx);
+ return;
+ }
+
+ /*
+ * Disable counter and interrupt
+ */
+ raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+ /*
+ * Disable counter
+ */
+ nds32_pfm_disable_counter(idx);
+
+ /*
+ * Disable interrupt for this counter
+ */
+ nds32_pfm_disable_intens(idx);
+
+ raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static inline u32 nds32_pmu_read_counter(struct perf_event *event)
+{
+ struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+ u32 count = 0;
+
+ if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU reading wrong counter %d\n", idx);
+ } else {
+ switch (idx) {
+ case PFMC0:
+ count = __nds32__mfsr(NDS32_SR_PFMC0);
+ break;
+ case PFMC1:
+ count = __nds32__mfsr(NDS32_SR_PFMC1);
+ break;
+ case PFMC2:
+ count = __nds32__mfsr(NDS32_SR_PFMC2);
+ break;
+ default:
+ pr_err
+ ("%s: CPU has no performance counters %d\n",
+ __func__, idx);
+ }
+ }
+ return count;
+}
+
+static inline void nds32_pmu_write_counter(struct perf_event *event, u32 value)
+{
+ struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU writing wrong counter %d\n", idx);
+ } else {
+ switch (idx) {
+ case PFMC0:
+ __nds32__mtsr_isb(value, NDS32_SR_PFMC0);
+ break;
+ case PFMC1:
+ __nds32__mtsr_isb(value, NDS32_SR_PFMC1);
+ break;
+ case PFMC2:
+ __nds32__mtsr_isb(value, NDS32_SR_PFMC2);
+ break;
+ default:
+ pr_err
+ ("%s: CPU has no performance counters %d\n",
+ __func__, idx);
+ }
+ }
+}
+
+static int nds32_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int idx;
+ struct hw_perf_event *hwc = &event->hw;
+ /*
+ * Current implementation maps cycles, instruction count and cache-miss
+ * to specific counter.
+ * However, multiple of the 3 counters are able to count these events.
+ *
+ *
+ * SOFTWARE_EVENT_MASK mask for getting event num ,
+ * This is defined by Jia-Rung, you can change the polocies.
+ * However, do not exceed 8 bits. This is hardware specific.
+ * The last number is SPAv3_2_SEL_LAST.
+ */
+ unsigned long evtype = hwc->config_base & SOFTWARE_EVENT_MASK;
+
+ idx = get_converted_event_idx(evtype);
+ /*
+ * Try to get the counter for correpsonding event
+ */
+ if (evtype == SPAV3_0_SEL_TOTAL_CYCLES) {
+ if (!test_and_set_bit(idx, cpuc->used_mask))
+ return idx;
+ if (!test_and_set_bit(NDS32_IDX_COUNTER0, cpuc->used_mask))
+ return NDS32_IDX_COUNTER0;
+ if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
+ return NDS32_IDX_COUNTER1;
+ } else if (evtype == SPAV3_1_SEL_COMPLETED_INSTRUCTION) {
+ if (!test_and_set_bit(idx, cpuc->used_mask))
+ return idx;
+ else if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
+ return NDS32_IDX_COUNTER1;
+ else if (!test_and_set_bit
+ (NDS32_IDX_CYCLE_COUNTER, cpuc->used_mask))
+ return NDS32_IDX_CYCLE_COUNTER;
+ } else {
+ if (!test_and_set_bit(idx, cpuc->used_mask))
+ return idx;
+ }
+ return -EAGAIN;
+}
+
+static void nds32_pmu_start(struct nds32_pmu *cpu_pmu)
+{
+ unsigned long flags;
+ unsigned int val;
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+
+ raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+ /* Enable all counters , NDS PFM has 3 counters */
+ val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+ val |= (PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
+ val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+ __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+
+ raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu)
+{
+ unsigned long flags;
+ unsigned int val;
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+
+ raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+ /* Disable all counters , NDS PFM has 3 counters */
+ val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+ val &= ~(PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
+ val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+ __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+
+ raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void nds32_pmu_reset(void *info)
+{
+ u32 val = 0;
+
+ val |= (PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+ __nds32__mtsr(val, NDS32_SR_PFM_CTL);
+ __nds32__mtsr(0, NDS32_SR_PFM_CTL);
+ __nds32__mtsr(0, NDS32_SR_PFMC0);
+ __nds32__mtsr(0, NDS32_SR_PFMC1);
+ __nds32__mtsr(0, NDS32_SR_PFMC2);
+}
+
+static void nds32_pmu_init(struct nds32_pmu *cpu_pmu)
+{
+ cpu_pmu->handle_irq = nds32_pmu_handle_irq;
+ cpu_pmu->enable = nds32_pmu_enable_event;
+ cpu_pmu->disable = nds32_pmu_disable_event;
+ cpu_pmu->read_counter = nds32_pmu_read_counter;
+ cpu_pmu->write_counter = nds32_pmu_write_counter;
+ cpu_pmu->get_event_idx = nds32_pmu_get_event_idx;
+ cpu_pmu->start = nds32_pmu_start;
+ cpu_pmu->stop = nds32_pmu_stop;
+ cpu_pmu->reset = nds32_pmu_reset;
+ cpu_pmu->max_period = 0xFFFFFFFF; /* Maximum counts */
+};
+
+static u32 nds32_read_num_pfm_events(void)
+{
+ /* NDS32 SPAv3 PMU support 3 counter */
+ return 3;
+}
+
+static int device_pmu_init(struct nds32_pmu *cpu_pmu)
+{
+ nds32_pmu_init(cpu_pmu);
+ /*
+ * This name should be devive-specific name, whatever you like :)
+ * I think "PMU" will be a good generic name.
+ */
+ cpu_pmu->name = "nds32v3-pmu";
+ cpu_pmu->map_event = nds32_spav3_map_event;
+ cpu_pmu->num_events = nds32_read_num_pfm_events();
+ cpu_pmu->set_event_filter = nds32_pmu_set_event_filter;
+ return 0;
+}
+
+/*
+ * CPU PMU identification and probing.
+ */
+static int probe_current_pmu(struct nds32_pmu *pmu)
+{
+ int ret;
+
+ get_cpu();
+ ret = -ENODEV;
+ /*
+ * If ther are various CPU types with its own PMU, initialize with
+ *
+ * the corresponding one
+ */
+ device_pmu_init(pmu);
+ put_cpu();
+ return ret;
+}
+
+static void nds32_pmu_enable(struct pmu *pmu)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
+ struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
+ int enabled = bitmap_weight(hw_events->used_mask,
+ nds32_pmu->num_events);
+
+ if (enabled)
+ nds32_pmu->start(nds32_pmu);
+}
+
+static void nds32_pmu_disable(struct pmu *pmu)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
+
+ nds32_pmu->stop(nds32_pmu);
+}
+
+static void nds32_pmu_release_hardware(struct nds32_pmu *nds32_pmu)
+{
+ nds32_pmu->free_irq(nds32_pmu);
+ pm_runtime_put_sync(&nds32_pmu->plat_device->dev);
+}
+
+static irqreturn_t nds32_pmu_dispatch_irq(int irq, void *dev)
+{
+ struct nds32_pmu *nds32_pmu = (struct nds32_pmu *)dev;
+ int ret;
+ u64 start_clock, finish_clock;
+
+ start_clock = local_clock();
+ ret = nds32_pmu->handle_irq(irq, dev);
+ finish_clock = local_clock();
+
+ perf_sample_event_took(finish_clock - start_clock);
+ return ret;
+}
+
+static int nds32_pmu_reserve_hardware(struct nds32_pmu *nds32_pmu)
+{
+ int err;
+ struct platform_device *pmu_device = nds32_pmu->plat_device;
+
+ if (!pmu_device)
+ return -ENODEV;
+
+ pm_runtime_get_sync(&pmu_device->dev);
+ err = nds32_pmu->request_irq(nds32_pmu, nds32_pmu_dispatch_irq);
+ if (err) {
+ nds32_pmu_release_hardware(nds32_pmu);
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
+ struct perf_event *event)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+
+ if (is_software_event(event))
+ return 1;
+
+ if (event->pmu != pmu)
+ return 0;
+
+ if (event->state < PERF_EVENT_STATE_OFF)
+ return 1;
+
+ if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
+ return 1;
+
+ return nds32_pmu->get_event_idx(hw_events, event) >= 0;
+}
+
+static int validate_group(struct perf_event *event)
+{
+ struct perf_event *sibling, *leader = event->group_leader;
+ struct pmu_hw_events fake_pmu;
+ DECLARE_BITMAP(fake_used_mask, MAX_COUNTERS);
+ /*
+ * Initialize the fake PMU. We only need to populate the
+ * used_mask for the purposes of validation.
+ */
+ memset(fake_used_mask, 0, sizeof(fake_used_mask));
+
+ if (!validate_event(event->pmu, &fake_pmu, leader))
+ return -EINVAL;
+
+ for_each_sibling_event(sibling, leader) {
+ if (!validate_event(event->pmu, &fake_pmu, sibling))
+ return -EINVAL;
+ }
+
+ if (!validate_event(event->pmu, &fake_pmu, event))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int mapping;
+
+ mapping = nds32_pmu->map_event(event);
+
+ if (mapping < 0) {
+ pr_debug("event %x:%llx not supported\n", event->attr.type,
+ event->attr.config);
+ return mapping;
+ }
+
+ /*
+ * We don't assign an index until we actually place the event onto
+ * hardware. Use -1 to signify that we haven't decided where to put it
+ * yet. For SMP systems, each core has it's own PMU so we can't do any
+ * clever allocation or constraints checking at this point.
+ */
+ hwc->idx = -1;
+ hwc->config_base = 0;
+ hwc->config = 0;
+ hwc->event_base = 0;
+
+ /*
+ * Check whether we need to exclude the counter from certain modes.
+ */
+ if ((!nds32_pmu->set_event_filter ||
+ nds32_pmu->set_event_filter(hwc, &event->attr)) &&
+ event_requires_mode_exclusion(&event->attr)) {
+ pr_debug
+ ("NDS performance counters do not support mode exclusion\n");
+ return -EOPNOTSUPP;
+ }
+
+ /*
+ * Store the event encoding into the config_base field.
+ */
+ hwc->config_base |= (unsigned long)mapping;
+
+ if (!hwc->sample_period) {
+ /*
+ * For non-sampling runs, limit the sample_period to half
+ * of the counter width. That way, the new counter value
+ * is far less likely to overtake the previous one unless
+ * you have some serious IRQ latency issues.
+ */
+ hwc->sample_period = nds32_pmu->max_period >> 1;
+ hwc->last_period = hwc->sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+ }
+
+ if (event->group_leader != event) {
+ if (validate_group(event) != 0)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nds32_pmu_event_init(struct perf_event *event)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+ int err = 0;
+ atomic_t *active_events = &nds32_pmu->active_events;
+
+ /* does not support taken branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ if (nds32_pmu->map_event(event) == -ENOENT)
+ return -ENOENT;
+
+ if (!atomic_inc_not_zero(active_events)) {
+ if (atomic_read(active_events) == 0) {
+ /* Register irq handler */
+ err = nds32_pmu_reserve_hardware(nds32_pmu);
+ }
+
+ if (!err)
+ atomic_inc(active_events);
+ }
+
+ if (err)
+ return err;
+
+ err = __hw_perf_event_init(event);
+
+ return err;
+}
+
+static void nds32_start(struct perf_event *event, int flags)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ /*
+ * NDS pmu always has to reprogram the period, so ignore
+ * PERF_EF_RELOAD, see the comment below.
+ */
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+ hwc->state = 0;
+ /* Set the period for the event. */
+ nds32_pmu_event_set_period(event);
+
+ nds32_pmu->enable(event);
+}
+
+static int nds32_pmu_add(struct perf_event *event, int flags)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+ struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+ int err = 0;
+
+ perf_pmu_disable(event->pmu);
+
+ /* If we don't have a space for the counter then finish early. */
+ idx = nds32_pmu->get_event_idx(hw_events, event);
+ if (idx < 0) {
+ err = idx;
+ goto out;
+ }
+
+ /*
+ * If there is an event in the counter we are going to use then make
+ * sure it is disabled.
+ */
+ event->hw.idx = idx;
+ nds32_pmu->disable(event);
+ hw_events->events[idx] = event;
+
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ if (flags & PERF_EF_START)
+ nds32_start(event, PERF_EF_RELOAD);
+
+ /* Propagate our changes to the userspace mapping. */
+ perf_event_update_userpage(event);
+
+out:
+ perf_pmu_enable(event->pmu);
+ return err;
+}
+
+u64 nds32_pmu_event_update(struct perf_event *event)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 delta, prev_raw_count, new_raw_count;
+
+again:
+ prev_raw_count = local64_read(&hwc->prev_count);
+ new_raw_count = nds32_pmu->read_counter(event);
+
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count) {
+ goto again;
+ }
+ /*
+ * Whether overflow or not, "unsigned substraction"
+ * will always get their delta
+ */
+ delta = (new_raw_count - prev_raw_count) & nds32_pmu->max_period;
+
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
+
+ return new_raw_count;
+}
+
+static void nds32_stop(struct perf_event *event, int flags)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ /*
+ * NDS pmu always has to update the counter, so ignore
+ * PERF_EF_UPDATE, see comments in nds32_start().
+ */
+ if (!(hwc->state & PERF_HES_STOPPED)) {
+ nds32_pmu->disable(event);
+ nds32_pmu_event_update(event);
+ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ }
+}
+
+static void nds32_pmu_del(struct perf_event *event, int flags)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+ struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ nds32_stop(event, PERF_EF_UPDATE);
+ hw_events->events[idx] = NULL;
+ clear_bit(idx, hw_events->used_mask);
+
+ perf_event_update_userpage(event);
+}
+
+static void nds32_pmu_read(struct perf_event *event)
+{
+ nds32_pmu_event_update(event);
+}
+
+/* Please refer to SPAv3 for more hardware specific details */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *nds32_arch_formats_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group nds32_pmu_format_group = {
+ .name = "format",
+ .attrs = nds32_arch_formats_attr,
+};
+
+static ssize_t nds32_pmu_cpumask_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return 0;
+}
+
+static DEVICE_ATTR(cpus, 0444, nds32_pmu_cpumask_show, NULL);
+
+static struct attribute *nds32_pmu_common_attrs[] = {
+ &dev_attr_cpus.attr,
+ NULL,
+};
+
+static struct attribute_group nds32_pmu_common_group = {
+ .attrs = nds32_pmu_common_attrs,
+};
+
+static const struct attribute_group *nds32_pmu_attr_groups[] = {
+ &nds32_pmu_format_group,
+ &nds32_pmu_common_group,
+ NULL,
+};
+
+static void nds32_init(struct nds32_pmu *nds32_pmu)
+{
+ atomic_set(&nds32_pmu->active_events, 0);
+
+ nds32_pmu->pmu = (struct pmu) {
+ .pmu_enable = nds32_pmu_enable,
+ .pmu_disable = nds32_pmu_disable,
+ .attr_groups = nds32_pmu_attr_groups,
+ .event_init = nds32_pmu_event_init,
+ .add = nds32_pmu_add,
+ .del = nds32_pmu_del,
+ .start = nds32_start,
+ .stop = nds32_stop,
+ .read = nds32_pmu_read,
+ };
+}
+
+int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type)
+{
+ nds32_init(nds32_pmu);
+ pm_runtime_enable(&nds32_pmu->plat_device->dev);
+ pr_info("enabled with %s PMU driver, %d counters available\n",
+ nds32_pmu->name, nds32_pmu->num_events);
+ return perf_pmu_register(&nds32_pmu->pmu, nds32_pmu->name, type);
+}
+
+static struct pmu_hw_events *cpu_pmu_get_cpu_events(void)
+{
+ return this_cpu_ptr(&cpu_hw_events);
+}
+
+static int cpu_pmu_request_irq(struct nds32_pmu *cpu_pmu, irq_handler_t handler)
+{
+ int err, irq, irqs;
+ struct platform_device *pmu_device = cpu_pmu->plat_device;
+
+ if (!pmu_device)
+ return -ENODEV;
+
+ irqs = min(pmu_device->num_resources, num_possible_cpus());
+ if (irqs < 1) {
+ pr_err("no irqs for PMUs defined\n");
+ return -ENODEV;
+ }
+
+ irq = platform_get_irq(pmu_device, 0);
+ err = request_irq(irq, handler, IRQF_NOBALANCING, "nds32-pfm",
+ cpu_pmu);
+ if (err) {
+ pr_err("unable to request IRQ%d for NDS PMU counters\n",
+ irq);
+ return err;
+ }
+ return 0;
+}
+
+static void cpu_pmu_free_irq(struct nds32_pmu *cpu_pmu)
+{
+ int irq;
+ struct platform_device *pmu_device = cpu_pmu->plat_device;
+
+ irq = platform_get_irq(pmu_device, 0);
+ if (irq >= 0)
+ free_irq(irq, cpu_pmu);
+}
+
+static void cpu_pmu_init(struct nds32_pmu *cpu_pmu)
+{
+ int cpu;
+ struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
+
+ raw_spin_lock_init(&events->pmu_lock);
+
+ cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events;
+ cpu_pmu->request_irq = cpu_pmu_request_irq;
+ cpu_pmu->free_irq = cpu_pmu_free_irq;
+
+ /* Ensure the PMU has sane values out of reset. */
+ if (cpu_pmu->reset)
+ on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
+}
+
+const static struct of_device_id cpu_pmu_of_device_ids[] = {
+ {.compatible = "andestech,nds32v3-pmu",
+ .data = device_pmu_init},
+ {},
+};
+
+static int cpu_pmu_device_probe(struct platform_device *pdev)
+{
+ const struct of_device_id *of_id;
+ int (*init_fn)(struct nds32_pmu *nds32_pmu);
+ struct device_node *node = pdev->dev.of_node;
+ struct nds32_pmu *pmu;
+ int ret = -ENODEV;
+
+ if (cpu_pmu) {
+ pr_notice("[perf] attempt to register multiple PMU devices!\n");
+ return -ENOSPC;
+ }
+
+ pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+ if (!pmu)
+ return -ENOMEM;
+
+ of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node);
+ if (node && of_id) {
+ init_fn = of_id->data;
+ ret = init_fn(pmu);
+ } else {
+ ret = probe_current_pmu(pmu);
+ }
+
+ if (ret) {
+ pr_notice("[perf] failed to probe PMU!\n");
+ goto out_free;
+ }
+
+ cpu_pmu = pmu;
+ cpu_pmu->plat_device = pdev;
+ cpu_pmu_init(cpu_pmu);
+ ret = nds32_pmu_register(cpu_pmu, PERF_TYPE_RAW);
+
+ if (!ret)
+ return 0;
+
+out_free:
+ pr_notice("[perf] failed to register PMU devices!\n");
+ kfree(pmu);
+ return ret;
+}
+
+static struct platform_driver cpu_pmu_driver = {
+ .driver = {
+ .name = "nds32-pfm",
+ .of_match_table = cpu_pmu_of_device_ids,
+ },
+ .probe = cpu_pmu_device_probe,
+ .id_table = cpu_pmu_plat_device_ids,
+};
+
+static int __init register_pmu_driver(void)
+{
+ int err = 0;
+
+ err = platform_driver_register(&cpu_pmu_driver);
+ if (err)
+ pr_notice("[perf] PMU initialization failed\n");
+ else
+ pr_notice("[perf] PMU initialization done\n");
+
+ return err;
+}
+
+device_initcall(register_pmu_driver);
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+ /* However, NDS32 does not support virtualization */
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+ return perf_guest_cbs->get_guest_ip();
+
+ return instruction_pointer(regs);
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+ int misc = 0;
+
+ /* However, NDS32 does not support virtualization */
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ if (perf_guest_cbs->is_user_mode())
+ misc |= PERF_RECORD_MISC_GUEST_USER;
+ else
+ misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+ } else {
+ if (user_mode(regs))
+ misc |= PERF_RECORD_MISC_USER;
+ else
+ misc |= PERF_RECORD_MISC_KERNEL;
+ }
+
+ return misc;
+}
diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c
index b740534b152c..68d5f2a27f38 100644
--- a/arch/nds32/mm/fault.c
+++ b/arch/nds32/mm/fault.c
@@ -9,6 +9,7 @@
#include <linux/init.h>
#include <linux/hardirq.h>
#include <linux/uaccess.h>
+#include <linux/perf_event.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -169,8 +170,6 @@ good_area:
mask = VM_EXEC;
else {
mask = VM_READ | VM_WRITE;
- if (vma->vm_flags & VM_WRITE)
- flags |= FAULT_FLAG_WRITE;
}
} else if (entry == ENTRY_TLB_MISC) {
switch (error_code & ITYPE_mskETYPE) {
@@ -231,11 +230,17 @@ good_area:
* attempt. If we go through a retry, it is extremely likely that the
* page will be found in page cache at that point.
*/
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
+ if (fault & VM_FAULT_MAJOR) {
tsk->maj_flt++;
- else
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
+ 1, regs, addr);
+ } else {
tsk->min_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
+ 1, regs, addr);
+ }
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags |= FAULT_FLAG_TRIED;
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
index 8d378c57cb01..dc696c151e1c 100644
--- a/tools/include/asm/barrier.h
+++ b/tools/include/asm/barrier.h
@@ -24,6 +24,8 @@
#include "../../arch/ia64/include/asm/barrier.h"
#elif defined(__xtensa__)
#include "../../arch/xtensa/include/asm/barrier.h"
+#elif defined(__nds32__)
+#include "../../arch/nds32/include/asm/barrier.h"
#else
#include <asm-generic/barrier.h>
#endif
diff --git a/tools/perf/arch/nds32/Build b/tools/perf/arch/nds32/Build
new file mode 100644
index 000000000000..54afe4a467e7
--- /dev/null
+++ b/tools/perf/arch/nds32/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/tools/perf/arch/nds32/util/Build b/tools/perf/arch/nds32/util/Build
new file mode 100644
index 000000000000..ca623bbf993c
--- /dev/null
+++ b/tools/perf/arch/nds32/util/Build
@@ -0,0 +1 @@
+libperf-y += header.o
diff --git a/tools/perf/arch/nds32/util/header.c b/tools/perf/arch/nds32/util/header.c
new file mode 100644
index 000000000000..ef9dbdbe7968
--- /dev/null
+++ b/tools/perf/arch/nds32/util/header.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2017 Andes Technology Corporation
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <api/fs/fs.h>
+#include "header.h"
+
+#define STR_LEN 1024
+
+char *get_cpuid_str(struct perf_pmu *pmu)
+{
+ /* In nds32, we only have one cpu */
+ char *buf = NULL;
+ struct cpu_map *cpus;
+ const char *sysfs = sysfs__mountpoint();
+
+ if (!sysfs || !pmu || !pmu->cpus)
+ return NULL;
+
+ buf = malloc(STR_LEN);
+ if (!buf)
+ return NULL;
+
+ cpus = cpu_map__get(pmu->cpus);
+ sprintf(buf, "0x%x", cpus->nr - 1);
+ cpu_map__put(cpus);
+ return buf;
+}
diff --git a/tools/perf/pmu-events/arch/nds32/mapfile.csv b/tools/perf/pmu-events/arch/nds32/mapfile.csv
new file mode 100644
index 000000000000..efb395f26883
--- /dev/null
+++ b/tools/perf/pmu-events/arch/nds32/mapfile.csv
@@ -0,0 +1,15 @@
+# Format:
+# MIDR,Version,JSON/file/pathname,Type
+#
+# where
+# MIDR Processor version
+# Variant[23:20] and Revision [3:0] should be zero.
+# Version could be used to track version of of JSON file
+# but currently unused.
+# JSON/file/pathname is the path to JSON file, relative
+# to tools/perf/pmu-events/arch/arm64/.
+# Type is core, uncore etc
+#
+#
+#Family-model,Version,Filename,EventType
+0x0,v3,n13,core
diff --git a/tools/perf/pmu-events/arch/nds32/n13/atcpmu.json b/tools/perf/pmu-events/arch/nds32/n13/atcpmu.json
new file mode 100644
index 000000000000..5347350c360c
--- /dev/null
+++ b/tools/perf/pmu-events/arch/nds32/n13/atcpmu.json
@@ -0,0 +1,290 @@
+[
+ {
+ "PublicDescription": "Conditional branch",
+ "EventCode": "0x102",
+ "EventName": "cond_br",
+ "BriefDescription": "V3 Conditional branch"
+ },
+ {
+ "PublicDescription": "Taken conditional branches",
+ "EventCode": "0x103",
+ "EventName": "taken_cond_br",
+ "BriefDescription": "V3 Taken Conditional branch"
+ },
+ {
+ "PublicDescription": "Prefetch Instruction",
+ "EventCode": "0x104",
+ "EventName": "prefetch_inst",
+ "BriefDescription": "V3 Prefetch Instruction"
+ },
+ {
+ "PublicDescription": "RET Inst",
+ "EventCode": "0x105",
+ "EventName": "ret_inst",
+ "BriefDescription": "V3 RET Inst"
+ },
+ {
+ "PublicDescription": "JR(non-RET) instructions",
+ "EventCode": "0x106",
+ "EventName": "jr_inst",
+ "BriefDescription": "V3 JR(non-RET) instructions"
+ },
+ {
+ "PublicDescription": "JAL/JRAL instructions",
+ "EventCode": "0x107",
+ "EventName": "jal_jral_inst",
+ "BriefDescription": "V3 JAL/JRAL instructions"
+ },
+ {
+ "PublicDescription": "NOP instructions",
+ "EventCode": "0x108",
+ "EventName": "nop_inst",
+ "BriefDescription": "V3 NOP instructions"
+ },
+ {
+ "PublicDescription": "SCW instructions",
+ "EventCode": "0x109",
+ "EventName": "scw_inst",
+ "BriefDescription": "V3 SCW instructions"
+ },
+ {
+ "PublicDescription": "ISB/DSB instructions",
+ "EventCode": "0x10a",
+ "EventName": "isb_dsb_inst",
+ "BriefDescription": "V3 ISB/DSB instructions"
+ },
+ {
+ "PublicDescription": "CCTL instructions",
+ "EventCode": "0x10b",
+ "EventName": "cctl_inst",
+ "BriefDescription": "V3 CCTL instructions"
+ },
+ {
+ "PublicDescription": "Taken Interrupts",
+ "EventCode": "0x10c",
+ "EventName": "taken_interrupts",
+ "BriefDescription": "V3 Taken Interrupts"
+ },
+ {
+ "PublicDescription": "Loads Completed",
+ "EventCode": "0x10d",
+ "EventName": "load_completed",
+ "BriefDescription": "V3 Loads Completed"
+ },
+ {
+ "PublicDescription": "uITLB accesses",
+ "EventCode": "0x10e",
+ "EventName": "uitlb_access",
+ "BriefDescription": "V3 uITLB accesses"
+ },
+ {
+ "PublicDescription": "uDTLB accesses",
+ "EventCode": "0x10f",
+ "EventName": "udtlb_access",
+ "BriefDescription": "V3 uDTLB accesses"
+ },
+ {
+ "PublicDescription": "MTLB accesses",
+ "EventCode": "0x110",
+ "EventName": "mtlb_access",
+ "BriefDescription": "V3 MTLB accesses"
+ },
+ {
+ "PublicDescription": "DATA_DEPENDENCY_STALL_CYCLES",
+ "EventCode": "0x112",
+ "EventName": "data_dependency_stall",
+ "BriefDescription": "V3 DATA_DEPENDENCY_STALL_CYCLES"
+ },
+ {
+ "PublicDescription": "DATA_CACHE_MISS_STALL_CYCLES",
+ "EventCode": "0x113",
+ "EventName": "dcache_miss_stall",
+ "BriefDescription": "V3 DATA_CACHE_MISS_STALL_CYCLES"
+ },
+ {
+ "PublicDescription": "ILM access",
+ "EventCode": "0x118",
+ "EventName": "ilm_access",
+ "BriefDescription": "V3 ILM accesses"
+ },
+ {
+ "PublicDescription": "LSU BIU CYCLES",
+ "EventCode": "0x119",
+ "EventName": "lsu_biu_cycles",
+ "BriefDescription": "V3 LSU BIU CYCLES"
+ },
+ {
+ "PublicDescription": "HPTWK BIU CYCLES",
+ "EventCode": "0x11a",
+ "EventName": "hptwk_biu_cycles",
+ "BriefDescription": "V3 HPTWK BIU CYCLES"
+ },
+ {
+ "PublicDescription": "DMA BIU CYCLES",
+ "EventCode": "0x11b",
+ "EventName": "dma_biu_cycles",
+ "BriefDescription": "V3 DMA BIU CYCLES"
+ },
+ {
+ "PublicDescription": "CODE CACHE FILL BIU CYCLES",
+ "EventCode": "0x11c",
+ "EventName": "icache_fill_biu_cycles",
+ "BriefDescription": "V3 CODE CACHE FILL BIU CYCLES"
+ },
+ {
+ "PublicDescription": "LEAGAL UNALIGN DCACHE ACCESS",
+ "EventCode": "0x11d",
+ "EventName": "legal_unalined_dcache_access",
+ "BriefDescription": "V3 LEAGAL UNALIGN DCACHE ACCESS"
+ },
+ {
+ "PublicDescription": "PUSH25 instructions",
+ "EventCode": "0x11e",
+ "EventName": "push25_inst",
+ "BriefDescription": "V3 PUSH25 instructions"
+ },
+ {
+ "PublicDescription": "SYSCALL instructions",
+ "EventCode": "0x11f",
+ "EventName": "syscall_inst",
+ "BriefDescription": "V3 SYSCALL instructions"
+ },
+ {
+ "PublicDescription": "conditional branch miss",
+ "EventCode": "0x202",
+ "EventName": "cond_br_miss",
+ "BriefDescription": "V3 conditional branch miss"
+ },
+ {
+ "PublicDescription": "taken conditional branch miss",
+ "EventCode": "0x203",
+ "EventName": "taken_cond_br_miss",
+ "BriefDescription": "V3 taken conditional branch miss"
+ },
+ {
+ "PublicDescription": "Prefetch Instructions with cache hit",
+ "EventCode": "0x204",
+ "EventName": "prefetch_icache_hit",
+ "BriefDescription": "V3 Prefetch Instructions with cache hit"
+ },
+ {
+ "PublicDescription": "RET mispredict",
+ "EventCode": "0x205",
+ "EventName": "ret_mispredict",
+ "BriefDescription": "V3 RET mispredict"
+ },
+ {
+ "PublicDescription": "Immediate J instructions",
+ "EventCode": "0x206",
+ "EventName": "imm_j_inst",
+ "BriefDescription": "V3 Immediate J instructions"
+ },
+ {
+ "PublicDescription": "Multiply instructions",
+ "EventCode": "0x207",
+ "EventName": "mul_inst",
+ "BriefDescription": "V3 Multiply instructions"
+ },
+ {
+ "PublicDescription": "16 bits instructions",
+ "EventCode": "0x208",
+ "EventName": "sixteen_bits_inst",
+ "BriefDescription": "V3 16 bits instructions"
+ },
+ {
+ "PublicDescription": "Failed SCW instructions",
+ "EventCode": "0x209",
+ "EventName": "fail_scw_inst",
+ "BriefDescription": "V3 Failed SCW instructions"
+ },
+ {
+ "PublicDescription": "ld-after-st conflict replays",
+ "EventCode": "0x20a",
+ "EventName": "ld_af_st_conflict",
+ "BriefDescription": "V3 ld-after-st conflict replays"
+ },
+ {
+ "PublicDescription": "Exception taken",
+ "EventCode": "0x20c",
+ "EventName": "exception_taken",
+ "BriefDescription": "V3 Exception taken"
+ },
+ {
+ "PublicDescription": "Stores completed",
+ "EventCode": "0x20d",
+ "EventName": "store_completed",
+ "BriefDescription": "V3 Stores completed"
+ },
+ {
+ "PublicDescription": "uITLB miss",
+ "EventCode": "0x20e",
+ "EventName": "uitlb_miss",
+ "BriefDescription": "V3 uITLB miss"
+ },
+ {
+ "PublicDescription": "uDTLB miss",
+ "EventCode": "0x20f",
+ "EventName": "udtlb_miss",
+ "BriefDescription": "V3 uDTLB miss"
+ },
+ {
+ "PublicDescription": "MTLB miss",
+ "EventCode": "0x210",
+ "EventName": "mtlb_miss",
+ "BriefDescription": "V3 MTLB miss"
+ },
+ {
+ "PublicDescription": "Empty instructions queue stall cycles",
+ "EventCode": "0x212",
+ "EventName": "empty_inst_q_stall",
+ "BriefDescription": "V3 Empty instructions queue stall cycles"
+ },
+ {
+ "PublicDescription": "Data write back",
+ "EventCode": "0x213",
+ "EventName": "data_wb",
+ "BriefDescription": "V3 Data write back"
+ },
+ {
+ "PublicDescription": "DLM access",
+ "EventCode": "0x218",
+ "EventName": "dlm_access",
+ "BriefDescription": "V3 DLM access"
+ },
+ {
+ "PublicDescription": "LSU BIU request",
+ "EventCode": "0x219",
+ "EventName": "lsu_biu_req",
+ "BriefDescription": "V3 LSU BIU request"
+ },
+ {
+ "PublicDescription": "HPTWK BIU request",
+ "EventCode": "0x21a",
+ "EventName": "hptwk_biu_req",
+ "BriefDescription": "V3 HPTWK BIU request"
+ },
+ {
+ "PublicDescription": "DMA BIU request",
+ "EventCode": "0x21b",
+ "EventName": "dma_biu_req",
+ "BriefDescription": "V3 DMA BIU request"
+ },
+ {
+ "PublicDescription": "Icache fill BIU request",
+ "EventCode": "0x21c",
+ "EventName": "icache_fill_biu_req",
+ "BriefDescription": "V3 Icache fill BIU request"
+ },
+ {
+ "PublicDescription": "External events",
+ "EventCode": "0x21d",
+ "EventName": "external_events",
+ "BriefDescription": "V3 External events"
+ },
+ {
+ "PublicDescription": "POP25 instructions",
+ "EventCode": "0x21e",
+ "EventName": "pop25_inst",
+ "BriefDescription": "V3 POP25 instructions"
+ },
+]