summaryrefslogtreecommitdiffstats
path: root/arch/nds32/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-29 09:37:03 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-29 09:37:03 -0800
commit889bb74302e5aba85d987b4093344150984d7cda (patch)
treea81f49ee3b866e13a623e77090bbc153210d0091 /arch/nds32/include
parent903b77c631673eeec9e9114e9524171cdf9a2646 (diff)
parente2f3f8b4a497d26bdcd55a53246ec2e613ae0fd4 (diff)
downloadlinux-889bb74302e5aba85d987b4093344150984d7cda.tar.gz
linux-889bb74302e5aba85d987b4093344150984d7cda.tar.bz2
linux-889bb74302e5aba85d987b4093344150984d7cda.zip
Merge tag 'nds32-for-linus-4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/greentime/linux
Pull nds32 updates from Greentime Hu: - Perf support - Power management support - FPU support - Hardware prefetcher support - Build error fixed - Performance enhancement * tag 'nds32-for-linus-4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/greentime/linux: nds32: support hardware prefetcher nds32: Fix the items of hwcap_str ordering issue. math-emu/soft-fp.h: (_FP_ROUND_ZERO) cast 0 to void to fix warning math-emu/op-2.h: Use statement expressions to prevent negative constant shift nds32: support denormalized result through FP emulator nds32: Support FP emulation nds32: nds32 FPU port nds32: Remove duplicated include from pm.c nds32: Power management for nds32 nds32: Add document for NDS32 PMU. nds32: Add perf call-graph support. nds32: Perf porting nds32: Fix bug in bitfield.h nds32: Fix gcc 8.0 compiler option incompatible. nds32: Fill all TLB entries with kernel image mapping nds32: Remove the redundant assignment
Diffstat (limited to 'arch/nds32/include')
-rw-r--r--arch/nds32/include/asm/Kbuild1
-rw-r--r--arch/nds32/include/asm/bitfield.h25
-rw-r--r--arch/nds32/include/asm/elf.h11
-rw-r--r--arch/nds32/include/asm/fpu.h126
-rw-r--r--arch/nds32/include/asm/fpuemu.h32
-rw-r--r--arch/nds32/include/asm/nds32_fpu_inst.h109
-rw-r--r--arch/nds32/include/asm/perf_event.h16
-rw-r--r--arch/nds32/include/asm/pmu.h386
-rw-r--r--arch/nds32/include/asm/processor.h7
-rw-r--r--arch/nds32/include/asm/sfp-machine.h158
-rw-r--r--arch/nds32/include/asm/stacktrace.h39
-rw-r--r--arch/nds32/include/asm/suspend.h11
-rw-r--r--arch/nds32/include/asm/syscalls.h1
-rw-r--r--arch/nds32/include/uapi/asm/auxvec.h7
-rw-r--r--arch/nds32/include/uapi/asm/sigcontext.h14
-rw-r--r--arch/nds32/include/uapi/asm/udftrap.h13
-rw-r--r--arch/nds32/include/uapi/asm/unistd.h2
17 files changed, 956 insertions, 2 deletions
diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild
index dbc4e5422550..f81b633d5379 100644
--- a/arch/nds32/include/asm/Kbuild
+++ b/arch/nds32/include/asm/Kbuild
@@ -36,6 +36,7 @@ generic-y += kprobes.h
generic-y += kvm_para.h
generic-y += limits.h
generic-y += local.h
+generic-y += local64.h
generic-y += mm-arch-hooks.h
generic-y += mman.h
generic-y += parport.h
diff --git a/arch/nds32/include/asm/bitfield.h b/arch/nds32/include/asm/bitfield.h
index 8e84fc385b94..7414fcbbab4e 100644
--- a/arch/nds32/include/asm/bitfield.h
+++ b/arch/nds32/include/asm/bitfield.h
@@ -251,6 +251,11 @@
#define ITYPE_mskSTYPE ( 0xF << ITYPE_offSTYPE )
#define ITYPE_mskCPID ( 0x3 << ITYPE_offCPID )
+/* Additional definitions of ITYPE register for FPU */
+#define FPU_DISABLE_EXCEPTION (0x1 << ITYPE_offSTYPE)
+#define FPU_EXCEPTION (0x2 << ITYPE_offSTYPE)
+#define FPU_CPID 0 /* FPU Co-Processor ID is 0 */
+
#define NDS32_VECTOR_mskNONEXCEPTION 0x78
#define NDS32_VECTOR_offEXCEPTION 8
#define NDS32_VECTOR_offINTERRUPT 9
@@ -692,8 +697,8 @@
#define PFM_CTL_offKU1 13 /* Enable user mode event counting for PFMC1 */
#define PFM_CTL_offKU2 14 /* Enable user mode event counting for PFMC2 */
#define PFM_CTL_offSEL0 15 /* The event selection for PFMC0 */
-#define PFM_CTL_offSEL1 21 /* The event selection for PFMC1 */
-#define PFM_CTL_offSEL2 27 /* The event selection for PFMC2 */
+#define PFM_CTL_offSEL1 16 /* The event selection for PFMC1 */
+#define PFM_CTL_offSEL2 22 /* The event selection for PFMC2 */
/* bit 28:31 reserved */
#define PFM_CTL_mskEN0 ( 0x01 << PFM_CTL_offEN0 )
@@ -735,14 +740,20 @@
#define N13MISC_CTL_offRTP 1 /* Disable Return Target Predictor */
#define N13MISC_CTL_offPTEPF 2 /* Disable HPTWK L2 PTE pefetch */
#define N13MISC_CTL_offSP_SHADOW_EN 4 /* Enable shadow stack pointers */
+#define MISC_CTL_offHWPRE 11 /* Enable HardWare PREFETCH */
/* bit 6, 9:31 reserved */
#define N13MISC_CTL_makBTB ( 0x1 << N13MISC_CTL_offBTB )
#define N13MISC_CTL_makRTP ( 0x1 << N13MISC_CTL_offRTP )
#define N13MISC_CTL_makPTEPF ( 0x1 << N13MISC_CTL_offPTEPF )
#define N13MISC_CTL_makSP_SHADOW_EN ( 0x1 << N13MISC_CTL_offSP_SHADOW_EN )
+#define MISC_CTL_makHWPRE_EN ( 0x1 << MISC_CTL_offHWPRE )
+#ifdef CONFIG_HW_PRE
+#define MISC_init (N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN|MISC_CTL_makHWPRE_EN)
+#else
#define MISC_init (N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN)
+#endif
/******************************************************************************
* PRUSR_ACC_CTL (Privileged Resource User Access Control Registers)
@@ -926,6 +937,7 @@
#define FPCSR_mskDNIT ( 0x1 << FPCSR_offDNIT )
#define FPCSR_mskRIT ( 0x1 << FPCSR_offRIT )
#define FPCSR_mskALL (FPCSR_mskIVO | FPCSR_mskDBZ | FPCSR_mskOVF | FPCSR_mskUDF | FPCSR_mskIEX)
+#define FPCSR_mskALLE_NO_UDFE (FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskIEXE)
#define FPCSR_mskALLE (FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskUDFE | FPCSR_mskIEXE)
#define FPCSR_mskALLT (FPCSR_mskIVOT | FPCSR_mskDBZT | FPCSR_mskOVFT | FPCSR_mskUDFT | FPCSR_mskIEXT |FPCSR_mskDNIT | FPCSR_mskRIT)
@@ -946,6 +958,15 @@
#define FPCFG_mskIMVER ( 0x1F << FPCFG_offIMVER )
#define FPCFG_mskAVER ( 0x1F << FPCFG_offAVER )
+/* 8 Single precision or 4 double precision registers are available */
+#define SP8_DP4_reg 0
+/* 16 Single precision or 8 double precision registers are available */
+#define SP16_DP8_reg 1
+/* 32 Single precision or 16 double precision registers are available */
+#define SP32_DP16_reg 2
+/* 32 Single precision or 32 double precision registers are available */
+#define SP32_DP32_reg 3
+
/******************************************************************************
* fucpr: FUCOP_CTL (FPU and Coprocessor Enable Control Register)
*****************************************************************************/
diff --git a/arch/nds32/include/asm/elf.h b/arch/nds32/include/asm/elf.h
index f5f9cf7e0544..95f3ea253e4c 100644
--- a/arch/nds32/include/asm/elf.h
+++ b/arch/nds32/include/asm/elf.h
@@ -9,6 +9,7 @@
*/
#include <asm/ptrace.h>
+#include <asm/fpu.h>
typedef unsigned long elf_greg_t;
typedef unsigned long elf_freg_t[3];
@@ -159,8 +160,18 @@ struct elf32_hdr;
#endif
+
+#if IS_ENABLED(CONFIG_FPU)
+#define FPU_AUX_ENT NEW_AUX_ENT(AT_FPUCW, FPCSR_INIT)
+#else
+#define FPU_AUX_ENT NEW_AUX_ENT(AT_IGNORE, 0)
+#endif
+
#define ARCH_DLINFO \
do { \
+ /* Optional FPU initialization */ \
+ FPU_AUX_ENT; \
+ \
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
(elf_addr_t)current->mm->context.vdso); \
} while (0)
diff --git a/arch/nds32/include/asm/fpu.h b/arch/nds32/include/asm/fpu.h
new file mode 100644
index 000000000000..019f1bcfc5ee
--- /dev/null
+++ b/arch/nds32/include/asm/fpu.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2005-2018 Andes Technology Corporation */
+
+#ifndef __ASM_NDS32_FPU_H
+#define __ASM_NDS32_FPU_H
+
+#if IS_ENABLED(CONFIG_FPU)
+#ifndef __ASSEMBLY__
+#include <linux/sched/task_stack.h>
+#include <linux/preempt.h>
+#include <asm/ptrace.h>
+
+extern bool has_fpu;
+
+extern void save_fpu(struct task_struct *__tsk);
+extern void load_fpu(const struct fpu_struct *fpregs);
+extern bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs);
+extern int do_fpuemu(struct pt_regs *regs, struct fpu_struct *fpu);
+
+#define test_tsk_fpu(regs) (regs->fucop_ctl & FUCOP_CTL_mskCP0EN)
+
+/*
+ * Initially load the FPU with signalling NANS. This bit pattern
+ * has the property that no matter whether considered as single or as
+ * double precision, it still represents a signalling NAN.
+ */
+
+#define sNAN64 0xFFFFFFFFFFFFFFFFULL
+#define sNAN32 0xFFFFFFFFUL
+
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+/*
+ * Denormalized number is unsupported by nds32 FPU. Hence the operation
+ * is treated as underflow cases when the final result is a denormalized
+ * number. To enhance precision, underflow exception trap should be
+ * enabled by default and kerenl will re-execute it by fpu emulator
+ * when getting underflow exception.
+ */
+#define FPCSR_INIT FPCSR_mskUDFE
+#else
+#define FPCSR_INIT 0x0UL
+#endif
+
+extern const struct fpu_struct init_fpuregs;
+
+static inline void disable_ptreg_fpu(struct pt_regs *regs)
+{
+ regs->fucop_ctl &= ~FUCOP_CTL_mskCP0EN;
+}
+
+static inline void enable_ptreg_fpu(struct pt_regs *regs)
+{
+ regs->fucop_ctl |= FUCOP_CTL_mskCP0EN;
+}
+
+static inline void enable_fpu(void)
+{
+ unsigned long fucop_ctl;
+
+ fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) | FUCOP_CTL_mskCP0EN;
+ __nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL);
+ __nds32__isb();
+}
+
+static inline void disable_fpu(void)
+{
+ unsigned long fucop_ctl;
+
+ fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) & ~FUCOP_CTL_mskCP0EN;
+ __nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL);
+ __nds32__isb();
+}
+
+static inline void lose_fpu(void)
+{
+ preempt_disable();
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+ if (last_task_used_math == current) {
+ last_task_used_math = NULL;
+#else
+ if (test_tsk_fpu(task_pt_regs(current))) {
+#endif
+ save_fpu(current);
+ }
+ disable_ptreg_fpu(task_pt_regs(current));
+ preempt_enable();
+}
+
+static inline void own_fpu(void)
+{
+ preempt_disable();
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+ if (last_task_used_math != current) {
+ if (last_task_used_math != NULL)
+ save_fpu(last_task_used_math);
+ load_fpu(&current->thread.fpu);
+ last_task_used_math = current;
+ }
+#else
+ if (!test_tsk_fpu(task_pt_regs(current))) {
+ load_fpu(&current->thread.fpu);
+ }
+#endif
+ enable_ptreg_fpu(task_pt_regs(current));
+ preempt_enable();
+}
+
+#if !IS_ENABLED(CONFIG_LAZY_FPU)
+static inline void unlazy_fpu(struct task_struct *tsk)
+{
+ preempt_disable();
+ if (test_tsk_fpu(task_pt_regs(tsk)))
+ save_fpu(tsk);
+ preempt_enable();
+}
+#endif /* !CONFIG_LAZY_FPU */
+static inline void clear_fpu(struct pt_regs *regs)
+{
+ preempt_disable();
+ if (test_tsk_fpu(regs))
+ disable_ptreg_fpu(regs);
+ preempt_enable();
+}
+#endif /* CONFIG_FPU */
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_NDS32_FPU_H */
diff --git a/arch/nds32/include/asm/fpuemu.h b/arch/nds32/include/asm/fpuemu.h
new file mode 100644
index 000000000000..c4bd0c7faa75
--- /dev/null
+++ b/arch/nds32/include/asm/fpuemu.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2005-2018 Andes Technology Corporation */
+
+#ifndef __ARCH_NDS32_FPUEMU_H
+#define __ARCH_NDS32_FPUEMU_H
+
+/*
+ * single precision
+ */
+
+void fadds(void *ft, void *fa, void *fb);
+void fsubs(void *ft, void *fa, void *fb);
+void fmuls(void *ft, void *fa, void *fb);
+void fdivs(void *ft, void *fa, void *fb);
+void fs2d(void *ft, void *fa);
+void fsqrts(void *ft, void *fa);
+void fnegs(void *ft, void *fa);
+int fcmps(void *ft, void *fa, void *fb, int cop);
+
+/*
+ * double precision
+ */
+void faddd(void *ft, void *fa, void *fb);
+void fsubd(void *ft, void *fa, void *fb);
+void fmuld(void *ft, void *fa, void *fb);
+void fdivd(void *ft, void *fa, void *fb);
+void fsqrtd(void *ft, void *fa);
+void fd2s(void *ft, void *fa);
+void fnegd(void *ft, void *fa);
+int fcmpd(void *ft, void *fa, void *fb, int cop);
+
+#endif /* __ARCH_NDS32_FPUEMU_H */
diff --git a/arch/nds32/include/asm/nds32_fpu_inst.h b/arch/nds32/include/asm/nds32_fpu_inst.h
new file mode 100644
index 000000000000..1e4b86a90a48
--- /dev/null
+++ b/arch/nds32/include/asm/nds32_fpu_inst.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2005-2018 Andes Technology Corporation */
+
+#ifndef __NDS32_FPU_INST_H
+#define __NDS32_FPU_INST_H
+
+#define cop0_op 0x35
+
+/*
+ * COP0 field of opcodes.
+ */
+#define fs1_op 0x0
+#define fs2_op 0x4
+#define fd1_op 0x8
+#define fd2_op 0xc
+
+/*
+ * FS1 opcode.
+ */
+enum fs1 {
+ fadds_op, fsubs_op, fcpynss_op, fcpyss_op,
+ fmadds_op, fmsubs_op, fcmovns_op, fcmovzs_op,
+ fnmadds_op, fnmsubs_op,
+ fmuls_op = 0xc, fdivs_op,
+ fs1_f2op_op = 0xf
+};
+
+/*
+ * FS1/F2OP opcode.
+ */
+enum fs1_f2 {
+ fs2d_op, fsqrts_op,
+ fui2s_op = 0x8, fsi2s_op = 0xc,
+ fs2ui_op = 0x10, fs2ui_z_op = 0x14,
+ fs2si_op = 0x18, fs2si_z_op = 0x1c
+};
+
+/*
+ * FS2 opcode.
+ */
+enum fs2 {
+ fcmpeqs_op, fcmpeqs_e_op, fcmplts_op, fcmplts_e_op,
+ fcmples_op, fcmples_e_op, fcmpuns_op, fcmpuns_e_op
+};
+
+/*
+ * FD1 opcode.
+ */
+enum fd1 {
+ faddd_op, fsubd_op, fcpynsd_op, fcpysd_op,
+ fmaddd_op, fmsubd_op, fcmovnd_op, fcmovzd_op,
+ fnmaddd_op, fnmsubd_op,
+ fmuld_op = 0xc, fdivd_op, fd1_f2op_op = 0xf
+};
+
+/*
+ * FD1/F2OP opcode.
+ */
+enum fd1_f2 {
+ fd2s_op, fsqrtd_op,
+ fui2d_op = 0x8, fsi2d_op = 0xc,
+ fd2ui_op = 0x10, fd2ui_z_op = 0x14,
+ fd2si_op = 0x18, fd2si_z_op = 0x1c
+};
+
+/*
+ * FD2 opcode.
+ */
+enum fd2 {
+ fcmpeqd_op, fcmpeqd_e_op, fcmpltd_op, fcmpltd_e_op,
+ fcmpled_op, fcmpled_e_op, fcmpund_op, fcmpund_e_op
+};
+
+#define NDS32Insn(x) x
+
+#define I_OPCODE_off 25
+#define NDS32Insn_OPCODE(x) (NDS32Insn(x) >> I_OPCODE_off)
+
+#define I_OPCODE_offRt 20
+#define I_OPCODE_mskRt (0x1fUL << I_OPCODE_offRt)
+#define NDS32Insn_OPCODE_Rt(x) \
+ ((NDS32Insn(x) & I_OPCODE_mskRt) >> I_OPCODE_offRt)
+
+#define I_OPCODE_offRa 15
+#define I_OPCODE_mskRa (0x1fUL << I_OPCODE_offRa)
+#define NDS32Insn_OPCODE_Ra(x) \
+ ((NDS32Insn(x) & I_OPCODE_mskRa) >> I_OPCODE_offRa)
+
+#define I_OPCODE_offRb 10
+#define I_OPCODE_mskRb (0x1fUL << I_OPCODE_offRb)
+#define NDS32Insn_OPCODE_Rb(x) \
+ ((NDS32Insn(x) & I_OPCODE_mskRb) >> I_OPCODE_offRb)
+
+#define I_OPCODE_offbit1014 10
+#define I_OPCODE_mskbit1014 (0x1fUL << I_OPCODE_offbit1014)
+#define NDS32Insn_OPCODE_BIT1014(x) \
+ ((NDS32Insn(x) & I_OPCODE_mskbit1014) >> I_OPCODE_offbit1014)
+
+#define I_OPCODE_offbit69 6
+#define I_OPCODE_mskbit69 (0xfUL << I_OPCODE_offbit69)
+#define NDS32Insn_OPCODE_BIT69(x) \
+ ((NDS32Insn(x) & I_OPCODE_mskbit69) >> I_OPCODE_offbit69)
+
+#define I_OPCODE_offCOP0 0
+#define I_OPCODE_mskCOP0 (0x3fUL << I_OPCODE_offCOP0)
+#define NDS32Insn_OPCODE_COP0(x) \
+ ((NDS32Insn(x) & I_OPCODE_mskCOP0) >> I_OPCODE_offCOP0)
+
+#endif /* __NDS32_FPU_INST_H */
diff --git a/arch/nds32/include/asm/perf_event.h b/arch/nds32/include/asm/perf_event.h
new file mode 100644
index 000000000000..fcdff02acc14
--- /dev/null
+++ b/arch/nds32/include/asm/perf_event.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2008-2018 Andes Technology Corporation */
+
+#ifndef __ASM_PERF_EVENT_H
+#define __ASM_PERF_EVENT_H
+
+/*
+ * This file is request by Perf,
+ * please refer to tools/perf/design.txt for more details
+ */
+struct pt_regs;
+unsigned long perf_instruction_pointer(struct pt_regs *regs);
+unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs) perf_misc_flags(regs)
+
+#endif
diff --git a/arch/nds32/include/asm/pmu.h b/arch/nds32/include/asm/pmu.h
new file mode 100644
index 000000000000..e1ac0b0b8bcf
--- /dev/null
+++ b/arch/nds32/include/asm/pmu.h
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2008-2018 Andes Technology Corporation */
+
+#ifndef __ASM_PMU_H
+#define __ASM_PMU_H
+
+#include <linux/interrupt.h>
+#include <linux/perf_event.h>
+#include <asm/unistd.h>
+#include <asm/bitfield.h>
+
+/* Has special meaning for perf core implementation */
+#define HW_OP_UNSUPPORTED 0x0
+#define C(_x) PERF_COUNT_HW_CACHE_##_x
+#define CACHE_OP_UNSUPPORTED 0x0
+
+/* Enough for both software and hardware defined events */
+#define SOFTWARE_EVENT_MASK 0xFF
+
+#define PFM_OFFSET_MAGIC_0 2 /* DO NOT START FROM 0 */
+#define PFM_OFFSET_MAGIC_1 (PFM_OFFSET_MAGIC_0 + 36)
+#define PFM_OFFSET_MAGIC_2 (PFM_OFFSET_MAGIC_1 + 36)
+
+enum { PFMC0, PFMC1, PFMC2, MAX_COUNTERS };
+
+u32 PFM_CTL_OVF[3] = { PFM_CTL_mskOVF0, PFM_CTL_mskOVF1,
+ PFM_CTL_mskOVF2 };
+u32 PFM_CTL_EN[3] = { PFM_CTL_mskEN0, PFM_CTL_mskEN1,
+ PFM_CTL_mskEN2 };
+u32 PFM_CTL_OFFSEL[3] = { PFM_CTL_offSEL0, PFM_CTL_offSEL1,
+ PFM_CTL_offSEL2 };
+u32 PFM_CTL_IE[3] = { PFM_CTL_mskIE0, PFM_CTL_mskIE1, PFM_CTL_mskIE2 };
+u32 PFM_CTL_KS[3] = { PFM_CTL_mskKS0, PFM_CTL_mskKS1, PFM_CTL_mskKS2 };
+u32 PFM_CTL_KU[3] = { PFM_CTL_mskKU0, PFM_CTL_mskKU1, PFM_CTL_mskKU2 };
+u32 PFM_CTL_SEL[3] = { PFM_CTL_mskSEL0, PFM_CTL_mskSEL1, PFM_CTL_mskSEL2 };
+/*
+ * Perf Events' indices
+ */
+#define NDS32_IDX_CYCLE_COUNTER 0
+#define NDS32_IDX_COUNTER0 1
+#define NDS32_IDX_COUNTER1 2
+
+/* The events for a given PMU register set. */
+struct pmu_hw_events {
+ /*
+ * The events that are active on the PMU for the given index.
+ */
+ struct perf_event *events[MAX_COUNTERS];
+
+ /*
+ * A 1 bit for an index indicates that the counter is being used for
+ * an event. A 0 means that the counter can be used.
+ */
+ unsigned long used_mask[BITS_TO_LONGS(MAX_COUNTERS)];
+
+ /*
+ * Hardware lock to serialize accesses to PMU registers. Needed for the
+ * read/modify/write sequences.
+ */
+ raw_spinlock_t pmu_lock;
+};
+
+struct nds32_pmu {
+ struct pmu pmu;
+ cpumask_t active_irqs;
+ char *name;
+ irqreturn_t (*handle_irq)(int irq_num, void *dev);
+ void (*enable)(struct perf_event *event);
+ void (*disable)(struct perf_event *event);
+ int (*get_event_idx)(struct pmu_hw_events *hw_events,
+ struct perf_event *event);
+ int (*set_event_filter)(struct hw_perf_event *evt,
+ struct perf_event_attr *attr);
+ u32 (*read_counter)(struct perf_event *event);
+ void (*write_counter)(struct perf_event *event, u32 val);
+ void (*start)(struct nds32_pmu *nds32_pmu);
+ void (*stop)(struct nds32_pmu *nds32_pmu);
+ void (*reset)(void *data);
+ int (*request_irq)(struct nds32_pmu *nds32_pmu, irq_handler_t handler);
+ void (*free_irq)(struct nds32_pmu *nds32_pmu);
+ int (*map_event)(struct perf_event *event);
+ int num_events;
+ atomic_t active_events;
+ u64 max_period;
+ struct platform_device *plat_device;
+ struct pmu_hw_events *(*get_hw_events)(void);
+};
+
+#define to_nds32_pmu(p) (container_of(p, struct nds32_pmu, pmu))
+
+int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type);
+
+u64 nds32_pmu_event_update(struct perf_event *event);
+
+int nds32_pmu_event_set_period(struct perf_event *event);
+
+/*
+ * Common NDS32 SPAv3 event types
+ *
+ * Note: An implementation may not be able to count all of these events
+ * but the encodings are considered to be `reserved' in the case that
+ * they are not available.
+ *
+ * SEL_TOTAL_CYCLES will add an offset is due to ZERO is defined as
+ * NOT_SUPPORTED EVENT mapping in generic perf code.
+ * You will need to deal it in the event writing implementation.
+ */
+enum spav3_counter_0_perf_types {
+ SPAV3_0_SEL_BASE = -1 + PFM_OFFSET_MAGIC_0, /* counting symbol */
+ SPAV3_0_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_0,
+ SPAV3_0_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_0,
+ SPAV3_0_SEL_LAST /* counting symbol */
+};
+
+enum spav3_counter_1_perf_types {
+ SPAV3_1_SEL_BASE = -1 + PFM_OFFSET_MAGIC_1, /* counting symbol */
+ SPAV3_1_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_CONDITIONAL_BRANCH = 2 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_TAKEN_CONDITIONAL_BRANCH = 3 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_PREFETCH_INSTRUCTION = 4 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_RET_INST = 5 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_JR_INST = 6 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_JAL_JRAL_INST = 7 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_NOP_INST = 8 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_SCW_INST = 9 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_ISB_DSB_INST = 10 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_CCTL_INST = 11 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_TAKEN_INTERRUPTS = 12 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_LOADS_COMPLETED = 13 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_UITLB_ACCESS = 14 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_UDTLB_ACCESS = 15 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_MTLB_ACCESS = 16 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_CODE_CACHE_ACCESS = 17 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_DATA_DEPENDENCY_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_DATA_CACHE_MISS_STALL_CYCLES = 19 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_DATA_CACHE_ACCESS = 20 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS = 22 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS = 23 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_ILM_ACCESS = 24 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_LSU_BIU_CYCLES = 25 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_HPTWK_BIU_CYCLES = 26 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_DMA_BIU_CYCLES = 27 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_CODE_CACHE_FILL_BIU_CYCLES = 28 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_LEGAL_UNALIGN_DCACHE_ACCESS = 29 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_PUSH25 = 30 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_SYSCALLS_INST = 31 + PFM_OFFSET_MAGIC_1,
+ SPAV3_1_SEL_LAST /* counting symbol */
+};
+
+enum spav3_counter_2_perf_types {
+ SPAV3_2_SEL_BASE = -1 + PFM_OFFSET_MAGIC_2, /* counting symbol */
+ SPAV3_2_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_CONDITIONAL_BRANCH_MISPREDICT = 2 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_TAKEN_CONDITIONAL_BRANCH_MISPREDICT =
+ 3 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_PREFETCH_INSTRUCTION_CACHE_HIT = 4 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_RET_MISPREDICT = 5 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_IMMEDIATE_J_INST = 6 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_MULTIPLY_INST = 7 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_16_BIT_INST = 8 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_FAILED_SCW_INST = 9 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_LD_AFTER_ST_CONFLICT_REPLAYS = 10 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_TAKEN_EXCEPTIONS = 12 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_STORES_COMPLETED = 13 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_UITLB_MISS = 14 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_UDTLB_MISS = 15 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_MTLB_MISS = 16 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_CODE_CACHE_MISS = 17 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_EMPTY_INST_QUEUE_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_DATA_WRITE_BACK = 19 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_LOAD_DATA_CACHE_MISS = 22 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_STORE_DATA_CACHE_MISS = 23 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_DLM_ACCESS = 24 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_LSU_BIU_REQUEST = 25 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_HPTWK_BIU_REQUEST = 26 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_DMA_BIU_REQUEST = 27 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_CODE_CACHE_FILL_BIU_REQUEST = 28 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_EXTERNAL_EVENTS = 29 + PFM_OFFSET_MAGIC_2,
+ SPAV3_1_SEL_POP25 = 30 + PFM_OFFSET_MAGIC_2,
+ SPAV3_2_SEL_LAST /* counting symbol */
+};
+
+/* Get converted event counter index */
+static inline int get_converted_event_idx(unsigned long event)
+{
+ int idx;
+
+ if ((event) > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST) {
+ idx = 0;
+ } else if ((event) > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST) {
+ idx = 1;
+ } else if ((event) > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST) {
+ idx = 2;
+ } else {
+ pr_err("GET_CONVERTED_EVENT_IDX PFM counter range error\n");
+ return -EPERM;
+ }
+
+ return idx;
+}
+
+/* Get converted hardware event number */
+static inline u32 get_converted_evet_hw_num(u32 event)
+{
+ if (event > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST)
+ event -= PFM_OFFSET_MAGIC_0;
+ else if (event > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST)
+ event -= PFM_OFFSET_MAGIC_1;
+ else if (event > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST)
+ event -= PFM_OFFSET_MAGIC_2;
+ else if (event != 0)
+ pr_err("GET_CONVERTED_EVENT_HW_NUM PFM counter range error\n");
+
+ return event;
+}
+
+/*
+ * NDS32 HW events mapping
+ *
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned int nds32_pfm_perf_map[PERF_COUNT_HW_MAX] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = SPAV3_0_SEL_TOTAL_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = SPAV3_1_SEL_COMPLETED_INSTRUCTION,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = SPAV3_1_SEL_DATA_CACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = SPAV3_2_SEL_DATA_CACHE_MISS,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED,
+ [PERF_COUNT_HW_BRANCH_MISSES] = HW_OP_UNSUPPORTED,
+ [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = HW_OP_UNSUPPORTED
+};
+
+static const unsigned int nds32_pfm_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ [C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] =
+ SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS,
+ [C(RESULT_MISS)] =
+ SPAV3_2_SEL_LOAD_DATA_CACHE_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] =
+ SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS,
+ [C(RESULT_MISS)] =
+ SPAV3_2_SEL_STORE_DATA_CACHE_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] =
+ CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] =
+ SPAV3_1_SEL_CODE_CACHE_ACCESS,
+ [C(RESULT_MISS)] =
+ SPAV3_2_SEL_CODE_CACHE_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] =
+ SPAV3_1_SEL_CODE_CACHE_ACCESS,
+ [C(RESULT_MISS)] =
+ SPAV3_2_SEL_CODE_CACHE_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ /* TODO: L2CC */
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ /* NDS32 PMU does not support TLB read/write hit/miss,
+ * However, it can count access/miss, which mixed with read and write.
+ * Therefore, only READ counter will use it.
+ * We do as possible as we can.
+ */
+ [C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] =
+ SPAV3_1_SEL_UDTLB_ACCESS,
+ [C(RESULT_MISS)] =
+ SPAV3_2_SEL_UDTLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] =
+ CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] =
+ SPAV3_1_SEL_UITLB_ACCESS,
+ [C(RESULT_MISS)] =
+ SPAV3_2_SEL_UITLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] =
+ CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] =
+ CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(BPU)] = { /* What is BPU? */
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] =
+ CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] =
+ CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] =
+ CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(NODE)] = { /* What is NODE? */
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] =
+ CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] =
+ CACHE_OP_UNSUPPORTED,
+ },
+ },
+};
+
+int nds32_pmu_map_event(struct perf_event *event,
+ const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
+ const unsigned int (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask);
+
+#endif /* __ASM_PMU_H */
diff --git a/arch/nds32/include/asm/processor.h b/arch/nds32/include/asm/processor.h
index c2660f566bac..72024f8bc129 100644
--- a/arch/nds32/include/asm/processor.h
+++ b/arch/nds32/include/asm/processor.h
@@ -35,6 +35,8 @@ struct thread_struct {
unsigned long address;
unsigned long trap_no;
unsigned long error_code;
+
+ struct fpu_struct fpu;
};
#define INIT_THREAD { }
@@ -72,6 +74,11 @@ struct task_struct;
/* Free all resources held by a thread. */
#define release_thread(thread) do { } while(0)
+#if IS_ENABLED(CONFIG_FPU)
+#if !IS_ENABLED(CONFIG_UNLAZU_FPU)
+extern struct task_struct *last_task_used_math;
+#endif
+#endif
/* Prepare to copy thread state - unlazy all lazy status */
#define prepare_to_copy(tsk) do { } while (0)
diff --git a/arch/nds32/include/asm/sfp-machine.h b/arch/nds32/include/asm/sfp-machine.h
new file mode 100644
index 000000000000..b1a5caa332b5
--- /dev/null
+++ b/arch/nds32/include/asm/sfp-machine.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2005-2018 Andes Technology Corporation */
+
+#include <asm/bitfield.h>
+
+#define _FP_W_TYPE_SIZE 32
+#define _FP_W_TYPE unsigned long
+#define _FP_WS_TYPE signed long
+#define _FP_I_TYPE long
+
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+#define _FP_MUL_MEAT_S(R, X, Y) \
+ _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S, R, X, Y, umul_ppmm)
+#define _FP_MUL_MEAT_D(R, X, Y) \
+ _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D, R, X, Y, umul_ppmm)
+#define _FP_MUL_MEAT_Q(R, X, Y) \
+ _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q, R, X, Y, umul_ppmm)
+
+#define _FP_MUL_MEAT_DW_S(R, X, Y) \
+ _FP_MUL_MEAT_DW_1_wide(_FP_WFRACBITS_S, R, X, Y, umul_ppmm)
+#define _FP_MUL_MEAT_DW_D(R, X, Y) \
+ _FP_MUL_MEAT_DW_2_wide(_FP_WFRACBITS_D, R, X, Y, umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R, X, Y) _FP_DIV_MEAT_1_udiv_norm(S, R, X, Y)
+#define _FP_DIV_MEAT_D(R, X, Y) _FP_DIV_MEAT_2_udiv(D, R, X, Y)
+
+#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S 0
+#define _FP_NANSIGN_D 0
+#define _FP_NANSIGN_Q 0
+
+#define _FP_KEEPNANFRACP 1
+#define _FP_QNANNEGATEDP 0
+
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \
+do { \
+ if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \
+ && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) { \
+ R##_s = Y##_s; \
+ _FP_FRAC_COPY_##wc(R, Y); \
+ } else { \
+ R##_s = X##_s; \
+ _FP_FRAC_COPY_##wc(R, X); \
+ } \
+ R##_c = FP_CLS_NAN; \
+} while (0)
+
+#define __FPU_FPCSR (current->thread.fpu.fpcsr)
+
+/* Obtain the current rounding mode. */
+#define FP_ROUNDMODE \
+({ \
+ __FPU_FPCSR & FPCSR_mskRM; \
+})
+
+#define FP_RND_NEAREST 0
+#define FP_RND_PINF 1
+#define FP_RND_MINF 2
+#define FP_RND_ZERO 3
+
+#define FP_EX_INVALID FPCSR_mskIVO
+#define FP_EX_DIVZERO FPCSR_mskDBZ
+#define FP_EX_OVERFLOW FPCSR_mskOVF
+#define FP_EX_UNDERFLOW FPCSR_mskUDF
+#define FP_EX_INEXACT FPCSR_mskIEX
+
+#define SF_CEQ 2
+#define SF_CLT 1
+#define SF_CGT 3
+#define SF_CUN 4
+
+#include <asm/byteorder.h>
+
+#ifdef __BIG_ENDIAN__
+#define __BYTE_ORDER __BIG_ENDIAN
+#define __LITTLE_ENDIAN 0
+#else
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#define __BIG_ENDIAN 0
+#endif
+
+#define abort() do { } while (0)
+#define umul_ppmm(w1, w0, u, v) \
+do { \
+ UWtype __x0, __x1, __x2, __x3; \
+ UHWtype __ul, __vl, __uh, __vh; \
+ \
+ __ul = __ll_lowpart(u); \
+ __uh = __ll_highpart(u); \
+ __vl = __ll_lowpart(v); \
+ __vh = __ll_highpart(v); \
+ \
+ __x0 = (UWtype) __ul * __vl; \
+ __x1 = (UWtype) __ul * __vh; \
+ __x2 = (UWtype) __uh * __vl; \
+ __x3 = (UWtype) __uh * __vh; \
+ \
+ __x1 += __ll_highpart(__x0); \
+ __x1 += __x2; \
+ if (__x1 < __x2) \
+ __x3 += __ll_B; \
+ \
+ (w1) = __x3 + __ll_highpart(__x1); \
+ (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0); \
+} while (0)
+
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+do { \
+ UWtype __x; \
+ __x = (al) + (bl); \
+ (sh) = (ah) + (bh) + (__x < (al)); \
+ (sl) = __x; \
+} while (0)
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+do { \
+ UWtype __x; \
+ __x = (al) - (bl); \
+ (sh) = (ah) - (bh) - (__x > (al)); \
+ (sl) = __x; \
+} while (0)
+
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { \
+ UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
+ __d1 = __ll_highpart(d); \
+ __d0 = __ll_lowpart(d); \
+ \
+ __r1 = (n1) % __d1; \
+ __q1 = (n1) / __d1; \
+ __m = (UWtype) __q1 * __d0; \
+ __r1 = __r1 * __ll_B | __ll_highpart(n0); \
+ if (__r1 < __m) { \
+ __q1--, __r1 += (d); \
+ if (__r1 >= (d)) \
+ if (__r1 < __m) \
+ __q1--, __r1 += (d); \
+ } \
+ __r1 -= __m; \
+ __r0 = __r1 % __d1; \
+ __q0 = __r1 / __d1; \
+ __m = (UWtype) __q0 * __d0; \
+ __r0 = __r0 * __ll_B | __ll_lowpart(n0); \
+ if (__r0 < __m) { \
+ __q0--, __r0 += (d); \
+ if (__r0 >= (d)) \
+ if (__r0 < __m) \
+ __q0--, __r0 += (d); \
+ } \
+ __r0 -= __m; \
+ (q) = (UWtype) __q1 * __ll_B | __q0; \
+ (r) = __r0; \
+} while (0)
diff --git a/arch/nds32/include/asm/stacktrace.h b/arch/nds32/include/asm/stacktrace.h
new file mode 100644
index 000000000000..6bf7c777bda4
--- /dev/null
+++ b/arch/nds32/include/asm/stacktrace.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2008-2018 Andes Technology Corporation */
+
+#ifndef __ASM_STACKTRACE_H
+#define __ASM_STACKTRACE_H
+
+/* Kernel callchain */
+struct stackframe {
+ unsigned long fp;
+ unsigned long sp;
+ unsigned long lp;
+};
+
+/*
+ * struct frame_tail: User callchain
+ * IMPORTANT:
+ * This struct is used for call-stack walking,
+ * the order and types matters.
+ * Do not use array, it only stores sizeof(pointer)
+ *
+ * The details can refer to arch/arm/kernel/perf_event.c
+ */
+struct frame_tail {
+ unsigned long stack_fp;
+ unsigned long stack_lp;
+};
+
+/* For User callchain with optimize for size */
+struct frame_tail_opt_size {
+ unsigned long stack_r6;
+ unsigned long stack_fp;
+ unsigned long stack_gp;
+ unsigned long stack_lp;
+};
+
+extern void
+get_real_ret_addr(unsigned long *addr, struct task_struct *tsk, int *graph);
+
+#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/nds32/include/asm/suspend.h b/arch/nds32/include/asm/suspend.h
new file mode 100644
index 000000000000..6ed2418af1ac
--- /dev/null
+++ b/arch/nds32/include/asm/suspend.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2008-2017 Andes Technology Corporation
+
+#ifndef __ASM_NDS32_SUSPEND_H
+#define __ASM_NDS32_SUSPEND_H
+
+extern void suspend2ram(void);
+extern void cpu_resume(void);
+extern unsigned long wake_mask;
+
+#endif
diff --git a/arch/nds32/include/asm/syscalls.h b/arch/nds32/include/asm/syscalls.h
index 78778ecff60c..da32101b455d 100644
--- a/arch/nds32/include/asm/syscalls.h
+++ b/arch/nds32/include/asm/syscalls.h
@@ -7,6 +7,7 @@
asmlinkage long sys_cacheflush(unsigned long addr, unsigned long len, unsigned int op);
asmlinkage long sys_fadvise64_64_wrapper(int fd, int advice, loff_t offset, loff_t len);
asmlinkage long sys_rt_sigreturn_wrapper(void);
+asmlinkage long sys_udftrap(int option);
#include <asm-generic/syscalls.h>
diff --git a/arch/nds32/include/uapi/asm/auxvec.h b/arch/nds32/include/uapi/asm/auxvec.h
index 56043ce4972f..2d3213f5e595 100644
--- a/arch/nds32/include/uapi/asm/auxvec.h
+++ b/arch/nds32/include/uapi/asm/auxvec.h
@@ -4,6 +4,13 @@
#ifndef __ASM_AUXVEC_H
#define __ASM_AUXVEC_H
+/*
+ * This entry gives some information about the FPU initialization
+ * performed by the kernel.
+ */
+#define AT_FPUCW 18 /* Used FPU control word. */
+
+
/* VDSO location */
#define AT_SYSINFO_EHDR 33
diff --git a/arch/nds32/include/uapi/asm/sigcontext.h b/arch/nds32/include/uapi/asm/sigcontext.h
index 00567b237b0c..58afc416473e 100644
--- a/arch/nds32/include/uapi/asm/sigcontext.h
+++ b/arch/nds32/include/uapi/asm/sigcontext.h
@@ -9,6 +9,19 @@
* before the signal handler was invoked. Note: only add new entries
* to the end of the structure.
*/
+struct fpu_struct {
+ unsigned long long fd_regs[32];
+ unsigned long fpcsr;
+ /*
+ * UDF_trap is used to recognize whether underflow trap is enabled
+ * or not. When UDF_trap == 1, this process will be traped and then
+ * get a SIGFPE signal when encountering an underflow exception.
+ * UDF_trap is only modified through setfputrap syscall. Therefore,
+ * UDF_trap needn't be saved or loaded to context in each context
+ * switch.
+ */
+ unsigned long UDF_trap;
+};
struct zol_struct {
unsigned long nds32_lc; /* $LC */
@@ -54,6 +67,7 @@ struct sigcontext {
unsigned long fault_address;
unsigned long used_math_flag;
/* FPU Registers */
+ struct fpu_struct fpu;
struct zol_struct zol;
};
diff --git a/arch/nds32/include/uapi/asm/udftrap.h b/arch/nds32/include/uapi/asm/udftrap.h
new file mode 100644
index 000000000000..433f79d679c0
--- /dev/null
+++ b/arch/nds32/include/uapi/asm/udftrap.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2005-2018 Andes Technology Corporation */
+#ifndef _ASM_SETFPUTRAP
+#define _ASM_SETFPUTRAP
+
+/*
+ * Options for setfputrap system call
+ */
+#define DISABLE_UDFTRAP 0 /* disable underflow exception trap */
+#define ENABLE_UDFTRAP 1 /* enable undeflos exception trap */
+#define GET_UDFTRAP 2 /* only get undeflos exception trap status */
+
+#endif /* _ASM_CACHECTL */
diff --git a/arch/nds32/include/uapi/asm/unistd.h b/arch/nds32/include/uapi/asm/unistd.h
index 603e826e0449..c2c3a3e34083 100644
--- a/arch/nds32/include/uapi/asm/unistd.h
+++ b/arch/nds32/include/uapi/asm/unistd.h
@@ -9,4 +9,6 @@
/* Additional NDS32 specific syscalls. */
#define __NR_cacheflush (__NR_arch_specific_syscall)
+#define __NR_udftrap (__NR_arch_specific_syscall + 1)
__SYSCALL(__NR_cacheflush, sys_cacheflush)
+__SYSCALL(__NR_udftrap, sys_udftrap)