diff options
-rw-r--r-- | arch/loongarch/Kconfig | 51 | ||||
-rw-r--r-- | arch/loongarch/include/asm/asmmacro.h | 393 | ||||
-rw-r--r-- | arch/loongarch/include/asm/fpu.h | 185 | ||||
-rw-r--r-- | arch/loongarch/include/uapi/asm/ptrace.h | 16 | ||||
-rw-r--r-- | arch/loongarch/include/uapi/asm/sigcontext.h | 18 | ||||
-rw-r--r-- | arch/loongarch/kernel/cpu-probe.c | 12 | ||||
-rw-r--r-- | arch/loongarch/kernel/fpu.S | 270 | ||||
-rw-r--r-- | arch/loongarch/kernel/process.c | 10 | ||||
-rw-r--r-- | arch/loongarch/kernel/ptrace.c | 110 | ||||
-rw-r--r-- | arch/loongarch/kernel/signal.c | 326 | ||||
-rw-r--r-- | arch/loongarch/kernel/traps.c | 84 |
11 files changed, 1452 insertions, 23 deletions
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 1944bae2f31c..72b614429c37 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -164,14 +164,6 @@ config 32BIT config 64BIT def_bool y -config CPU_HAS_FPU - bool - default y - -config CPU_HAS_PREFETCH - bool - default y - config GENERIC_BUG def_bool y depends on BUG @@ -247,6 +239,12 @@ config AS_HAS_EXPLICIT_RELOCS config AS_HAS_FCSR_CLASS def_bool $(as-instr,movfcsr2gr \$t0$(comma)\$fcsr0) +config AS_HAS_LSX_EXTENSION + def_bool $(as-instr,vld \$vr0$(comma)\$a0$(comma)0) + +config AS_HAS_LASX_EXTENSION + def_bool $(as-instr,xvld \$xr0$(comma)\$a0$(comma)0) + menu "Kernel type and options" source "kernel/Kconfig.hz" @@ -487,6 +485,43 @@ config ARCH_STRICT_ALIGN to run kernel only on systems with h/w unaligned access support in order to optimise for performance. +config CPU_HAS_FPU + bool + default y + +config CPU_HAS_LSX + bool "Support for the Loongson SIMD Extension" + depends on AS_HAS_LSX_EXTENSION + help + Loongson SIMD Extension (LSX) introduces 128 bit wide vector registers + and a set of SIMD instructions to operate on them. When this option + is enabled the kernel will support allocating & switching LSX + vector register contexts. If you know that your kernel will only be + running on CPUs which do not support LSX or that your userland will + not be making use of it then you may wish to say N here to reduce + the size & complexity of your kernel. + + If unsure, say Y. + +config CPU_HAS_LASX + bool "Support for the Loongson Advanced SIMD Extension" + depends on CPU_HAS_LSX + depends on AS_HAS_LASX_EXTENSION + help + Loongson Advanced SIMD Extension (LASX) introduces 256 bit wide vector + registers and a set of SIMD instructions to operate on them. When this + option is enabled the kernel will support allocating & switching LASX + vector register contexts. If you know that your kernel will only be + running on CPUs which do not support LASX or that your userland will + not be making use of it then you may wish to say N here to reduce + the size & complexity of your kernel. + + If unsure, say Y. + +config CPU_HAS_PREFETCH + bool + default y + config KEXEC bool "Kexec system call" select KEXEC_CORE diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h index c51a1b43acb4..79e1d53fea89 100644 --- a/arch/loongarch/include/asm/asmmacro.h +++ b/arch/loongarch/include/asm/asmmacro.h @@ -270,6 +270,399 @@ fld.d $f31, \tmp, THREAD_FPR31 - THREAD_FPR0 .endm + .macro lsx_save_data thread tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + vst $vr0, \tmp, THREAD_FPR0 - THREAD_FPR0 + vst $vr1, \tmp, THREAD_FPR1 - THREAD_FPR0 + vst $vr2, \tmp, THREAD_FPR2 - THREAD_FPR0 + vst $vr3, \tmp, THREAD_FPR3 - THREAD_FPR0 + vst $vr4, \tmp, THREAD_FPR4 - THREAD_FPR0 + vst $vr5, \tmp, THREAD_FPR5 - THREAD_FPR0 + vst $vr6, \tmp, THREAD_FPR6 - THREAD_FPR0 + vst $vr7, \tmp, THREAD_FPR7 - THREAD_FPR0 + vst $vr8, \tmp, THREAD_FPR8 - THREAD_FPR0 + vst $vr9, \tmp, THREAD_FPR9 - THREAD_FPR0 + vst $vr10, \tmp, THREAD_FPR10 - THREAD_FPR0 + vst $vr11, \tmp, THREAD_FPR11 - THREAD_FPR0 + vst $vr12, \tmp, THREAD_FPR12 - THREAD_FPR0 + vst $vr13, \tmp, THREAD_FPR13 - THREAD_FPR0 + vst $vr14, \tmp, THREAD_FPR14 - THREAD_FPR0 + vst $vr15, \tmp, THREAD_FPR15 - THREAD_FPR0 + vst $vr16, \tmp, THREAD_FPR16 - THREAD_FPR0 + vst $vr17, \tmp, THREAD_FPR17 - THREAD_FPR0 + vst $vr18, \tmp, THREAD_FPR18 - THREAD_FPR0 + vst $vr19, \tmp, THREAD_FPR19 - THREAD_FPR0 + vst $vr20, \tmp, THREAD_FPR20 - THREAD_FPR0 + vst $vr21, \tmp, THREAD_FPR21 - THREAD_FPR0 + vst $vr22, \tmp, THREAD_FPR22 - THREAD_FPR0 + vst $vr23, \tmp, THREAD_FPR23 - THREAD_FPR0 + vst $vr24, \tmp, THREAD_FPR24 - THREAD_FPR0 + vst $vr25, \tmp, THREAD_FPR25 - THREAD_FPR0 + vst $vr26, \tmp, THREAD_FPR26 - THREAD_FPR0 + vst $vr27, \tmp, THREAD_FPR27 - THREAD_FPR0 + vst $vr28, \tmp, THREAD_FPR28 - THREAD_FPR0 + vst $vr29, \tmp, THREAD_FPR29 - THREAD_FPR0 + vst $vr30, \tmp, THREAD_FPR30 - THREAD_FPR0 + vst $vr31, \tmp, THREAD_FPR31 - THREAD_FPR0 + .endm + + .macro lsx_restore_data thread tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + vld $vr0, \tmp, THREAD_FPR0 - THREAD_FPR0 + vld $vr1, \tmp, THREAD_FPR1 - THREAD_FPR0 + vld $vr2, \tmp, THREAD_FPR2 - THREAD_FPR0 + vld $vr3, \tmp, THREAD_FPR3 - THREAD_FPR0 + vld $vr4, \tmp, THREAD_FPR4 - THREAD_FPR0 + vld $vr5, \tmp, THREAD_FPR5 - THREAD_FPR0 + vld $vr6, \tmp, THREAD_FPR6 - THREAD_FPR0 + vld $vr7, \tmp, THREAD_FPR7 - THREAD_FPR0 + vld $vr8, \tmp, THREAD_FPR8 - THREAD_FPR0 + vld $vr9, \tmp, THREAD_FPR9 - THREAD_FPR0 + vld $vr10, \tmp, THREAD_FPR10 - THREAD_FPR0 + vld $vr11, \tmp, THREAD_FPR11 - THREAD_FPR0 + vld $vr12, \tmp, THREAD_FPR12 - THREAD_FPR0 + vld $vr13, \tmp, THREAD_FPR13 - THREAD_FPR0 + vld $vr14, \tmp, THREAD_FPR14 - THREAD_FPR0 + vld $vr15, \tmp, THREAD_FPR15 - THREAD_FPR0 + vld $vr16, \tmp, THREAD_FPR16 - THREAD_FPR0 + vld $vr17, \tmp, THREAD_FPR17 - THREAD_FPR0 + vld $vr18, \tmp, THREAD_FPR18 - THREAD_FPR0 + vld $vr19, \tmp, THREAD_FPR19 - THREAD_FPR0 + vld $vr20, \tmp, THREAD_FPR20 - THREAD_FPR0 + vld $vr21, \tmp, THREAD_FPR21 - THREAD_FPR0 + vld $vr22, \tmp, THREAD_FPR22 - THREAD_FPR0 + vld $vr23, \tmp, THREAD_FPR23 - THREAD_FPR0 + vld $vr24, \tmp, THREAD_FPR24 - THREAD_FPR0 + vld $vr25, \tmp, THREAD_FPR25 - THREAD_FPR0 + vld $vr26, \tmp, THREAD_FPR26 - THREAD_FPR0 + vld $vr27, \tmp, THREAD_FPR27 - THREAD_FPR0 + vld $vr28, \tmp, THREAD_FPR28 - THREAD_FPR0 + vld $vr29, \tmp, THREAD_FPR29 - THREAD_FPR0 + vld $vr30, \tmp, THREAD_FPR30 - THREAD_FPR0 + vld $vr31, \tmp, THREAD_FPR31 - THREAD_FPR0 + .endm + + .macro lsx_save_all thread tmp0 tmp1 + fpu_save_cc \thread, \tmp0, \tmp1 + fpu_save_csr \thread, \tmp0 + lsx_save_data \thread, \tmp0 + .endm + + .macro lsx_restore_all thread tmp0 tmp1 + lsx_restore_data \thread, \tmp0 + fpu_restore_cc \thread, \tmp0, \tmp1 + fpu_restore_csr \thread, \tmp0 + .endm + + .macro lsx_save_upper vd base tmp off + vpickve2gr.d \tmp, \vd, 1 + st.d \tmp, \base, (\off+8) + .endm + + .macro lsx_save_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + lsx_save_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0) + lsx_save_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0) + lsx_save_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0) + lsx_save_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0) + lsx_save_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0) + lsx_save_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0) + lsx_save_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0) + lsx_save_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0) + lsx_save_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0) + lsx_save_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0) + lsx_save_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0) + lsx_save_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0) + lsx_save_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0) + lsx_save_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0) + lsx_save_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0) + lsx_save_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0) + lsx_save_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0) + lsx_save_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0) + lsx_save_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0) + lsx_save_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0) + lsx_save_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0) + lsx_save_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0) + lsx_save_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0) + lsx_save_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0) + lsx_save_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0) + lsx_save_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0) + lsx_save_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0) + lsx_save_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0) + lsx_save_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0) + lsx_save_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0) + lsx_save_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0) + lsx_save_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0) + .endm + + .macro lsx_restore_upper vd base tmp off + ld.d \tmp, \base, (\off+8) + vinsgr2vr.d \vd, \tmp, 1 + .endm + + .macro lsx_restore_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + lsx_restore_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0) + lsx_restore_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0) + lsx_restore_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0) + lsx_restore_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0) + lsx_restore_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0) + lsx_restore_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0) + lsx_restore_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0) + lsx_restore_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0) + lsx_restore_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0) + lsx_restore_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0) + lsx_restore_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0) + lsx_restore_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0) + lsx_restore_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0) + lsx_restore_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0) + lsx_restore_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0) + lsx_restore_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0) + lsx_restore_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0) + lsx_restore_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0) + lsx_restore_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0) + lsx_restore_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0) + lsx_restore_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0) + lsx_restore_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0) + lsx_restore_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0) + lsx_restore_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0) + lsx_restore_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0) + lsx_restore_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0) + lsx_restore_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0) + lsx_restore_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0) + lsx_restore_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0) + lsx_restore_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0) + lsx_restore_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0) + lsx_restore_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0) + .endm + + .macro lsx_init_upper vd tmp + vinsgr2vr.d \vd, \tmp, 1 + .endm + + .macro lsx_init_all_upper tmp + not \tmp, zero + lsx_init_upper $vr0 \tmp + lsx_init_upper $vr1 \tmp + lsx_init_upper $vr2 \tmp + lsx_init_upper $vr3 \tmp + lsx_init_upper $vr4 \tmp + lsx_init_upper $vr5 \tmp + lsx_init_upper $vr6 \tmp + lsx_init_upper $vr7 \tmp + lsx_init_upper $vr8 \tmp + lsx_init_upper $vr9 \tmp + lsx_init_upper $vr10 \tmp + lsx_init_upper $vr11 \tmp + lsx_init_upper $vr12 \tmp + lsx_init_upper $vr13 \tmp + lsx_init_upper $vr14 \tmp + lsx_init_upper $vr15 \tmp + lsx_init_upper $vr16 \tmp + lsx_init_upper $vr17 \tmp + lsx_init_upper $vr18 \tmp + lsx_init_upper $vr19 \tmp + lsx_init_upper $vr20 \tmp + lsx_init_upper $vr21 \tmp + lsx_init_upper $vr22 \tmp + lsx_init_upper $vr23 \tmp + lsx_init_upper $vr24 \tmp + lsx_init_upper $vr25 \tmp + lsx_init_upper $vr26 \tmp + lsx_init_upper $vr27 \tmp + lsx_init_upper $vr28 \tmp + lsx_init_upper $vr29 \tmp + lsx_init_upper $vr30 \tmp + lsx_init_upper $vr31 \tmp + .endm + + .macro lasx_save_data thread tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + xvst $xr0, \tmp, THREAD_FPR0 - THREAD_FPR0 + xvst $xr1, \tmp, THREAD_FPR1 - THREAD_FPR0 + xvst $xr2, \tmp, THREAD_FPR2 - THREAD_FPR0 + xvst $xr3, \tmp, THREAD_FPR3 - THREAD_FPR0 + xvst $xr4, \tmp, THREAD_FPR4 - THREAD_FPR0 + xvst $xr5, \tmp, THREAD_FPR5 - THREAD_FPR0 + xvst $xr6, \tmp, THREAD_FPR6 - THREAD_FPR0 + xvst $xr7, \tmp, THREAD_FPR7 - THREAD_FPR0 + xvst $xr8, \tmp, THREAD_FPR8 - THREAD_FPR0 + xvst $xr9, \tmp, THREAD_FPR9 - THREAD_FPR0 + xvst $xr10, \tmp, THREAD_FPR10 - THREAD_FPR0 + xvst $xr11, \tmp, THREAD_FPR11 - THREAD_FPR0 + xvst $xr12, \tmp, THREAD_FPR12 - THREAD_FPR0 + xvst $xr13, \tmp, THREAD_FPR13 - THREAD_FPR0 + xvst $xr14, \tmp, THREAD_FPR14 - THREAD_FPR0 + xvst $xr15, \tmp, THREAD_FPR15 - THREAD_FPR0 + xvst $xr16, \tmp, THREAD_FPR16 - THREAD_FPR0 + xvst $xr17, \tmp, THREAD_FPR17 - THREAD_FPR0 + xvst $xr18, \tmp, THREAD_FPR18 - THREAD_FPR0 + xvst $xr19, \tmp, THREAD_FPR19 - THREAD_FPR0 + xvst $xr20, \tmp, THREAD_FPR20 - THREAD_FPR0 + xvst $xr21, \tmp, THREAD_FPR21 - THREAD_FPR0 + xvst $xr22, \tmp, THREAD_FPR22 - THREAD_FPR0 + xvst $xr23, \tmp, THREAD_FPR23 - THREAD_FPR0 + xvst $xr24, \tmp, THREAD_FPR24 - THREAD_FPR0 + xvst $xr25, \tmp, THREAD_FPR25 - THREAD_FPR0 + xvst $xr26, \tmp, THREAD_FPR26 - THREAD_FPR0 + xvst $xr27, \tmp, THREAD_FPR27 - THREAD_FPR0 + xvst $xr28, \tmp, THREAD_FPR28 - THREAD_FPR0 + xvst $xr29, \tmp, THREAD_FPR29 - THREAD_FPR0 + xvst $xr30, \tmp, THREAD_FPR30 - THREAD_FPR0 + xvst $xr31, \tmp, THREAD_FPR31 - THREAD_FPR0 + .endm + + .macro lasx_restore_data thread tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + xvld $xr0, \tmp, THREAD_FPR0 - THREAD_FPR0 + xvld $xr1, \tmp, THREAD_FPR1 - THREAD_FPR0 + xvld $xr2, \tmp, THREAD_FPR2 - THREAD_FPR0 + xvld $xr3, \tmp, THREAD_FPR3 - THREAD_FPR0 + xvld $xr4, \tmp, THREAD_FPR4 - THREAD_FPR0 + xvld $xr5, \tmp, THREAD_FPR5 - THREAD_FPR0 + xvld $xr6, \tmp, THREAD_FPR6 - THREAD_FPR0 + xvld $xr7, \tmp, THREAD_FPR7 - THREAD_FPR0 + xvld $xr8, \tmp, THREAD_FPR8 - THREAD_FPR0 + xvld $xr9, \tmp, THREAD_FPR9 - THREAD_FPR0 + xvld $xr10, \tmp, THREAD_FPR10 - THREAD_FPR0 + xvld $xr11, \tmp, THREAD_FPR11 - THREAD_FPR0 + xvld $xr12, \tmp, THREAD_FPR12 - THREAD_FPR0 + xvld $xr13, \tmp, THREAD_FPR13 - THREAD_FPR0 + xvld $xr14, \tmp, THREAD_FPR14 - THREAD_FPR0 + xvld $xr15, \tmp, THREAD_FPR15 - THREAD_FPR0 + xvld $xr16, \tmp, THREAD_FPR16 - THREAD_FPR0 + xvld $xr17, \tmp, THREAD_FPR17 - THREAD_FPR0 + xvld $xr18, \tmp, THREAD_FPR18 - THREAD_FPR0 + xvld $xr19, \tmp, THREAD_FPR19 - THREAD_FPR0 + xvld $xr20, \tmp, THREAD_FPR20 - THREAD_FPR0 + xvld $xr21, \tmp, THREAD_FPR21 - THREAD_FPR0 + xvld $xr22, \tmp, THREAD_FPR22 - THREAD_FPR0 + xvld $xr23, \tmp, THREAD_FPR23 - THREAD_FPR0 + xvld $xr24, \tmp, THREAD_FPR24 - THREAD_FPR0 + xvld $xr25, \tmp, THREAD_FPR25 - THREAD_FPR0 + xvld $xr26, \tmp, THREAD_FPR26 - THREAD_FPR0 + xvld $xr27, \tmp, THREAD_FPR27 - THREAD_FPR0 + xvld $xr28, \tmp, THREAD_FPR28 - THREAD_FPR0 + xvld $xr29, \tmp, THREAD_FPR29 - THREAD_FPR0 + xvld $xr30, \tmp, THREAD_FPR30 - THREAD_FPR0 + xvld $xr31, \tmp, THREAD_FPR31 - THREAD_FPR0 + .endm + + .macro lasx_save_all thread tmp0 tmp1 + fpu_save_cc \thread, \tmp0, \tmp1 + fpu_save_csr \thread, \tmp0 + lasx_save_data \thread, \tmp0 + .endm + + .macro lasx_restore_all thread tmp0 tmp1 + lasx_restore_data \thread, \tmp0 + fpu_restore_cc \thread, \tmp0, \tmp1 + fpu_restore_csr \thread, \tmp0 + .endm + + .macro lasx_save_upper xd base tmp off + /* Nothing */ + .endm + + .macro lasx_save_all_upper thread base tmp + /* Nothing */ + .endm + + .macro lasx_restore_upper xd base tmp0 tmp1 off + vld \tmp0, \base, (\off+16) + xvpermi.q \xd, \tmp1, 0x2 + .endm + + .macro lasx_restore_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + /* Save $vr31 ($xr31 lower bits) with xvpickve2gr */ + xvpickve2gr.d $r17, $xr31, 0 + xvpickve2gr.d $r18, $xr31, 1 + lasx_restore_upper $xr0, \base, $vr31, $xr31, (THREAD_FPR0-THREAD_FPR0) + lasx_restore_upper $xr1, \base, $vr31, $xr31, (THREAD_FPR1-THREAD_FPR0) + lasx_restore_upper $xr2, \base, $vr31, $xr31, (THREAD_FPR2-THREAD_FPR0) + lasx_restore_upper $xr3, \base, $vr31, $xr31, (THREAD_FPR3-THREAD_FPR0) + lasx_restore_upper $xr4, \base, $vr31, $xr31, (THREAD_FPR4-THREAD_FPR0) + lasx_restore_upper $xr5, \base, $vr31, $xr31, (THREAD_FPR5-THREAD_FPR0) + lasx_restore_upper $xr6, \base, $vr31, $xr31, (THREAD_FPR6-THREAD_FPR0) + lasx_restore_upper $xr7, \base, $vr31, $xr31, (THREAD_FPR7-THREAD_FPR0) + lasx_restore_upper $xr8, \base, $vr31, $xr31, (THREAD_FPR8-THREAD_FPR0) + lasx_restore_upper $xr9, \base, $vr31, $xr31, (THREAD_FPR9-THREAD_FPR0) + lasx_restore_upper $xr10, \base, $vr31, $xr31, (THREAD_FPR10-THREAD_FPR0) + lasx_restore_upper $xr11, \base, $vr31, $xr31, (THREAD_FPR11-THREAD_FPR0) + lasx_restore_upper $xr12, \base, $vr31, $xr31, (THREAD_FPR12-THREAD_FPR0) + lasx_restore_upper $xr13, \base, $vr31, $xr31, (THREAD_FPR13-THREAD_FPR0) + lasx_restore_upper $xr14, \base, $vr31, $xr31, (THREAD_FPR14-THREAD_FPR0) + lasx_restore_upper $xr15, \base, $vr31, $xr31, (THREAD_FPR15-THREAD_FPR0) + lasx_restore_upper $xr16, \base, $vr31, $xr31, (THREAD_FPR16-THREAD_FPR0) + lasx_restore_upper $xr17, \base, $vr31, $xr31, (THREAD_FPR17-THREAD_FPR0) + lasx_restore_upper $xr18, \base, $vr31, $xr31, (THREAD_FPR18-THREAD_FPR0) + lasx_restore_upper $xr19, \base, $vr31, $xr31, (THREAD_FPR19-THREAD_FPR0) + lasx_restore_upper $xr20, \base, $vr31, $xr31, (THREAD_FPR20-THREAD_FPR0) + lasx_restore_upper $xr21, \base, $vr31, $xr31, (THREAD_FPR21-THREAD_FPR0) + lasx_restore_upper $xr22, \base, $vr31, $xr31, (THREAD_FPR22-THREAD_FPR0) + lasx_restore_upper $xr23, \base, $vr31, $xr31, (THREAD_FPR23-THREAD_FPR0) + lasx_restore_upper $xr24, \base, $vr31, $xr31, (THREAD_FPR24-THREAD_FPR0) + lasx_restore_upper $xr25, \base, $vr31, $xr31, (THREAD_FPR25-THREAD_FPR0) + lasx_restore_upper $xr26, \base, $vr31, $xr31, (THREAD_FPR26-THREAD_FPR0) + lasx_restore_upper $xr27, \base, $vr31, $xr31, (THREAD_FPR27-THREAD_FPR0) + lasx_restore_upper $xr28, \base, $vr31, $xr31, (THREAD_FPR28-THREAD_FPR0) + lasx_restore_upper $xr29, \base, $vr31, $xr31, (THREAD_FPR29-THREAD_FPR0) + lasx_restore_upper $xr30, \base, $vr31, $xr31, (THREAD_FPR30-THREAD_FPR0) + lasx_restore_upper $xr31, \base, $vr31, $xr31, (THREAD_FPR31-THREAD_FPR0) + /* Restore $vr31 ($xr31 lower bits) with xvinsgr2vr */ + xvinsgr2vr.d $xr31, $r17, 0 + xvinsgr2vr.d $xr31, $r18, 1 + .endm + + .macro lasx_init_upper xd tmp + xvinsgr2vr.d \xd, \tmp, 2 + xvinsgr2vr.d \xd, \tmp, 3 + .endm + + .macro lasx_init_all_upper tmp + not \tmp, zero + lasx_init_upper $xr0 \tmp + lasx_init_upper $xr1 \tmp + lasx_init_upper $xr2 \tmp + lasx_init_upper $xr3 \tmp + lasx_init_upper $xr4 \tmp + lasx_init_upper $xr5 \tmp + lasx_init_upper $xr6 \tmp + lasx_init_upper $xr7 \tmp + lasx_init_upper $xr8 \tmp + lasx_init_upper $xr9 \tmp + lasx_init_upper $xr10 \tmp + lasx_init_upper $xr11 \tmp + lasx_init_upper $xr12 \tmp + lasx_init_upper $xr13 \tmp + lasx_init_upper $xr14 \tmp + lasx_init_upper $xr15 \tmp + lasx_init_upper $xr16 \tmp + lasx_init_upper $xr17 \tmp + lasx_init_upper $xr18 \tmp + lasx_init_upper $xr19 \tmp + lasx_init_upper $xr20 \tmp + lasx_init_upper $xr21 \tmp + lasx_init_upper $xr22 \tmp + lasx_init_upper $xr23 \tmp + lasx_init_upper $xr24 \tmp + lasx_init_upper $xr25 \tmp + lasx_init_upper $xr26 \tmp + lasx_init_upper $xr27 \tmp + lasx_init_upper $xr28 \tmp + lasx_init_upper $xr29 \tmp + lasx_init_upper $xr30 \tmp + lasx_init_upper $xr31 \tmp + .endm + .macro not dst src nor \dst, \src, zero .endm diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index 192f8e35d912..e4193d637f66 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -28,6 +28,26 @@ extern void _init_fpu(unsigned int); extern void _save_fp(struct loongarch_fpu *); extern void _restore_fp(struct loongarch_fpu *); +extern void _save_lsx(struct loongarch_fpu *fpu); +extern void _restore_lsx(struct loongarch_fpu *fpu); +extern void _init_lsx_upper(void); +extern void _restore_lsx_upper(struct loongarch_fpu *fpu); + +extern void _save_lasx(struct loongarch_fpu *fpu); +extern void _restore_lasx(struct loongarch_fpu *fpu); +extern void _init_lasx_upper(void); +extern void _restore_lasx_upper(struct loongarch_fpu *fpu); + +static inline void enable_lsx(void); +static inline void disable_lsx(void); +static inline void save_lsx(struct task_struct *t); +static inline void restore_lsx(struct task_struct *t); + +static inline void enable_lasx(void); +static inline void disable_lasx(void); +static inline void save_lasx(struct task_struct *t); +static inline void restore_lasx(struct task_struct *t); + /* * Mask the FCSR Cause bits according to the Enable bits, observing * that Unimplemented is always enabled. @@ -44,6 +64,29 @@ static inline int is_fp_enabled(void) 1 : 0; } +static inline int is_lsx_enabled(void) +{ + if (!cpu_has_lsx) + return 0; + + return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LSXEN) ? + 1 : 0; +} + +static inline int is_lasx_enabled(void) +{ + if (!cpu_has_lasx) + return 0; + + return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LASXEN) ? + 1 : 0; +} + +static inline int is_simd_enabled(void) +{ + return is_lsx_enabled() | is_lasx_enabled(); +} + #define enable_fpu() set_csr_euen(CSR_EUEN_FPEN) #define disable_fpu() clear_csr_euen(CSR_EUEN_FPEN) @@ -81,9 +124,22 @@ static inline void own_fpu(int restore) static inline void lose_fpu_inatomic(int save, struct task_struct *tsk) { if (is_fpu_owner()) { - if (save) - _save_fp(&tsk->thread.fpu); - disable_fpu(); + if (!is_simd_enabled()) { + if (save) + _save_fp(&tsk->thread.fpu); + disable_fpu(); + } else { + if (save) { + if (!is_lasx_enabled()) + save_lsx(tsk); + else + save_lasx(tsk); + } + disable_fpu(); + disable_lsx(); + disable_lasx(); + clear_tsk_thread_flag(tsk, TIF_USEDSIMD); + } clear_tsk_thread_flag(tsk, TIF_USEDFPU); } KSTK_EUEN(tsk) &= ~(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN); @@ -129,4 +185,127 @@ static inline union fpureg *get_fpu_regs(struct task_struct *tsk) return tsk->thread.fpu.fpr; } +static inline int is_simd_owner(void) +{ + return test_thread_flag(TIF_USEDSIMD); +} + +#ifdef CONFIG_CPU_HAS_LSX + +static inline void enable_lsx(void) +{ + if (cpu_has_lsx) + csr_xchg32(CSR_EUEN_LSXEN, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN); +} + +static inline void disable_lsx(void) +{ + if (cpu_has_lsx) + csr_xchg32(0, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN); +} + +static inline void save_lsx(struct task_struct *t) +{ + if (cpu_has_lsx) + _save_lsx(&t->thread.fpu); +} + +static inline void restore_lsx(struct task_struct *t) +{ + if (cpu_has_lsx) + _restore_lsx(&t->thread.fpu); +} + +static inline void init_lsx_upper(void) +{ + /* + * Check cpu_has_lsx only if it's a constant. This will allow the + * compiler to optimise out code for CPUs without LSX without adding + * an extra redundant check for CPUs with LSX. + */ + if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx) + return; + + _init_lsx_upper(); +} + +static inline void restore_lsx_upper(struct task_struct *t) +{ + if (cpu_has_lsx) + _restore_lsx_upper(&t->thread.fpu); +} + +#else +static inline void enable_lsx(void) {} +static inline void disable_lsx(void) {} +static inline void save_lsx(struct task_struct *t) {} +static inline void restore_lsx(struct task_struct *t) {} +static inline void init_lsx_upper(void) {} +static inline void restore_lsx_upper(struct task_struct *t) {} +#endif + +#ifdef CONFIG_CPU_HAS_LASX + +static inline void enable_lasx(void) +{ + + if (cpu_has_lasx) + csr_xchg32(CSR_EUEN_LASXEN, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN); +} + +static inline void disable_lasx(void) +{ + if (cpu_has_lasx) + csr_xchg32(0, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN); +} + +static inline void save_lasx(struct task_struct *t) +{ + if (cpu_has_lasx) + _save_lasx(&t->thread.fpu); +} + +static inline void restore_lasx(struct task_struct *t) +{ + if (cpu_has_lasx) + _restore_lasx(&t->thread.fpu); +} + +static inline void init_lasx_upper(void) +{ + if (cpu_has_lasx) + _init_lasx_upper(); +} + +static inline void restore_lasx_upper(struct task_struct *t) +{ + if (cpu_has_lasx) + _restore_lasx_upper(&t->thread.fpu); +} + +#else +static inline void enable_lasx(void) {} +static inline void disable_lasx(void) {} +static inline void save_lasx(struct task_struct *t) {} +static inline void restore_lasx(struct task_struct *t) {} +static inline void init_lasx_upper(void) {} +static inline void restore_lasx_upper(struct task_struct *t) {} +#endif + +static inline int thread_lsx_context_live(void) +{ + if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx) + return 0; + + return test_thread_flag(TIF_LSX_CTX_LIVE); +} + +static inline int thread_lasx_context_live(void) +{ + if (__builtin_constant_p(cpu_has_lasx) && !cpu_has_lasx) + return 0; + + return test_thread_flag(TIF_LASX_CTX_LIVE); +} + #endif /* _ASM_FPU_H */ diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h index 82d811b5c6e9..06e3be52cb04 100644 --- a/arch/loongarch/include/uapi/asm/ptrace.h +++ b/arch/loongarch/include/uapi/asm/ptrace.h @@ -41,9 +41,19 @@ struct user_pt_regs { } __attribute__((aligned(8))); struct user_fp_state { - uint64_t fpr[32]; - uint64_t fcc; - uint32_t fcsr; + uint64_t fpr[32]; + uint64_t fcc; + uint32_t fcsr; +}; + +struct user_lsx_state { + /* 32 registers, 128 bits width per register. */ + uint64_t vregs[32*2]; +}; + +struct user_lasx_state { + /* 32 registers, 256 bits width per register. */ + uint64_t vregs[32*4]; }; struct user_watch_state { diff --git a/arch/loongarch/include/uapi/asm/sigcontext.h b/arch/loongarch/include/uapi/asm/sigcontext.h index 52e49b8bf4be..4cd7d16f7037 100644 --- a/arch/loongarch/include/uapi/asm/sigcontext.h +++ b/arch/loongarch/include/uapi/asm/sigcontext.h @@ -41,4 +41,22 @@ struct fpu_context { __u32 fcsr; }; +/* LSX context */ +#define LSX_CTX_MAGIC 0x53580001 +#define LSX_CTX_ALIGN 16 +struct lsx_context { + __u64 regs[2*32]; + __u64 fcc; + __u32 fcsr; +}; + +/* LASX context */ +#define LASX_CTX_MAGIC 0x41535801 +#define LASX_CTX_ALIGN 32 +struct lasx_context { + __u64 regs[4*32]; + __u64 fcc; + __u32 fcsr; +}; + #endif /* _UAPI_ASM_SIGCONTEXT_H */ diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 5adf0f736c6d..f42acc6c8df6 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -116,6 +116,18 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_FPU; elf_hwcap |= HWCAP_LOONGARCH_FPU; } +#ifdef CONFIG_CPU_HAS_LSX + if (config & CPUCFG2_LSX) { + c->options |= LOONGARCH_CPU_LSX; + elf_hwcap |= HWCAP_LOONGARCH_LSX; + } +#endif +#ifdef CONFIG_CPU_HAS_LASX + if (config & CPUCFG2_LASX) { + c->options |= LOONGARCH_CPU_LASX; + elf_hwcap |= HWCAP_LOONGARCH_LASX; + } +#endif if (config & CPUCFG2_COMPLEX) { c->options |= LOONGARCH_CPU_COMPLEX; elf_hwcap |= HWCAP_LOONGARCH_COMPLEX; diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index ccde94140c89..f3df5f0a4509 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -145,6 +145,154 @@ movgr2fcsr fcsr0, \tmp0 .endm + .macro sc_save_lsx base +#ifdef CONFIG_CPU_HAS_LSX + EX vst $vr0, \base, (0 * LSX_REG_WIDTH) + EX vst $vr1, \base, (1 * LSX_REG_WIDTH) + EX vst $vr2, \base, (2 * LSX_REG_WIDTH) + EX vst $vr3, \base, (3 * LSX_REG_WIDTH) + EX vst $vr4, \base, (4 * LSX_REG_WIDTH) + EX vst $vr5, \base, (5 * LSX_REG_WIDTH) + EX vst $vr6, \base, (6 * LSX_REG_WIDTH) + EX vst $vr7, \base, (7 * LSX_REG_WIDTH) + EX vst $vr8, \base, (8 * LSX_REG_WIDTH) + EX vst $vr9, \base, (9 * LSX_REG_WIDTH) + EX vst $vr10, \base, (10 * LSX_REG_WIDTH) + EX vst $vr11, \base, (11 * LSX_REG_WIDTH) + EX vst $vr12, \base, (12 * LSX_REG_WIDTH) + EX vst $vr13, \base, (13 * LSX_REG_WIDTH) + EX vst $vr14, \base, (14 * LSX_REG_WIDTH) + EX vst $vr15, \base, (15 * LSX_REG_WIDTH) + EX vst $vr16, \base, (16 * LSX_REG_WIDTH) + EX vst $vr17, \base, (17 * LSX_REG_WIDTH) + EX vst $vr18, \base, (18 * LSX_REG_WIDTH) + EX vst $vr19, \base, (19 * LSX_REG_WIDTH) + EX vst $vr20, \base, (20 * LSX_REG_WIDTH) + EX vst $vr21, \base, (21 * LSX_REG_WIDTH) + EX vst $vr22, \base, (22 * LSX_REG_WIDTH) + EX vst $vr23, \base, (23 * LSX_REG_WIDTH) + EX vst $vr24, \base, (24 * LSX_REG_WIDTH) + EX vst $vr25, \base, (25 * LSX_REG_WIDTH) + EX vst $vr26, \base, (26 * LSX_REG_WIDTH) + EX vst $vr27, \base, (27 * LSX_REG_WIDTH) + EX vst $vr28, \base, (28 * LSX_REG_WIDTH) + EX vst $vr29, \base, (29 * LSX_REG_WIDTH) + EX vst $vr30, \base, (30 * LSX_REG_WIDTH) + EX vst $vr31, \base, (31 * LSX_REG_WIDTH) +#endif + .endm + + .macro sc_restore_lsx base +#ifdef CONFIG_CPU_HAS_LSX + EX vld $vr0, \base, (0 * LSX_REG_WIDTH) + EX vld $vr1, \base, (1 * LSX_REG_WIDTH) + EX vld $vr2, \base, (2 * LSX_REG_WIDTH) + EX vld $vr3, \base, (3 * LSX_REG_WIDTH) + EX vld $vr4, \base, (4 * LSX_REG_WIDTH) + EX vld $vr5, \base, (5 * LSX_REG_WIDTH) + EX vld $vr6, \base, (6 * LSX_REG_WIDTH) + EX vld $vr7, \base, (7 * LSX_REG_WIDTH) + EX vld $vr8, \base, (8 * LSX_REG_WIDTH) + EX vld $vr9, \base, (9 * LSX_REG_WIDTH) + EX vld $vr10, \base, (10 * LSX_REG_WIDTH) + EX vld $vr11, \base, (11 * LSX_REG_WIDTH) + EX vld $vr12, \base, (12 * LSX_REG_WIDTH) + EX vld $vr13, \base, (13 * LSX_REG_WIDTH) + EX vld $vr14, \base, (14 * LSX_REG_WIDTH) + EX vld $vr15, \base, (15 * LSX_REG_WIDTH) + EX vld $vr16, \base, (16 * LSX_REG_WIDTH) + EX vld $vr17, \base, (17 * LSX_REG_WIDTH) + EX vld $vr18, \base, (18 * LSX_REG_WIDTH) + EX vld $vr19, \base, (19 * LSX_REG_WIDTH) + EX vld $vr20, \base, (20 * LSX_REG_WIDTH) + EX vld $vr21, \base, (21 * LSX_REG_WIDTH) + EX vld $vr22, \base, (22 * LSX_REG_WIDTH) + EX vld $vr23, \base, (23 * LSX_REG_WIDTH) + EX vld $vr24, \base, (24 * LSX_REG_WIDTH) + EX vld $vr25, \base, (25 * LSX_REG_WIDTH) + EX vld $vr26, \base, (26 * LSX_REG_WIDTH) + EX vld $vr27, \base, (27 * LSX_REG_WIDTH) + EX vld $vr28, \base, (28 * LSX_REG_WIDTH) + EX vld $vr29, \base, (29 * LSX_REG_WIDTH) + EX vld $vr30, \base, (30 * LSX_REG_WIDTH) + EX vld $vr31, \base, (31 * LSX_REG_WIDTH) +#endif + .endm + + .macro sc_save_lasx base +#ifdef CONFIG_CPU_HAS_LASX + EX xvst $xr0, \base, (0 * LASX_REG_WIDTH) + EX xvst $xr1, \base, (1 * LASX_REG_WIDTH) + EX xvst $xr2, \base, (2 * LASX_REG_WIDTH) + EX xvst $xr3, \base, (3 * LASX_REG_WIDTH) + EX xvst $xr4, \base, (4 * LASX_REG_WIDTH) + EX xvst $xr5, \base, (5 * LASX_REG_WIDTH) + EX xvst $xr6, \base, (6 * LASX_REG_WIDTH) + EX xvst $xr7, \base, (7 * LASX_REG_WIDTH) + EX xvst $xr8, \base, (8 * LASX_REG_WIDTH) + EX xvst $xr9, \base, (9 * LASX_REG_WIDTH) + EX xvst $xr10, \base, (10 * LASX_REG_WIDTH) + EX xvst $xr11, \base, (11 * LASX_REG_WIDTH) + EX xvst $xr12, \base, (12 * LASX_REG_WIDTH) + EX xvst $xr13, \base, (13 * LASX_REG_WIDTH) + EX xvst $xr14, \base, (14 * LASX_REG_WIDTH) + EX xvst $xr15, \base, (15 * LASX_REG_WIDTH) + EX xvst $xr16, \base, (16 * LASX_REG_WIDTH) + EX xvst $xr17, \base, (17 * LASX_REG_WIDTH) + EX xvst $xr18, \base, (18 * LASX_REG_WIDTH) + EX xvst $xr19, \base, (19 * LASX_REG_WIDTH) + EX xvst $xr20, \base, (20 * LASX_REG_WIDTH) + EX xvst $xr21, \base, (21 * LASX_REG_WIDTH) + EX xvst $xr22, \base, (22 * LASX_REG_WIDTH) + EX xvst $xr23, \base, (23 * LASX_REG_WIDTH) + EX xvst $xr24, \base, (24 * LASX_REG_WIDTH) + EX xvst $xr25, \base, (25 * LASX_REG_WIDTH) + EX xvst $xr26, \base, (26 * LASX_REG_WIDTH) + EX xvst $xr27, \base, (27 * LASX_REG_WIDTH) + EX xvst $xr28, \base, (28 * LASX_REG_WIDTH) + EX xvst $xr29, \base, (29 * LASX_REG_WIDTH) + EX xvst $xr30, \base, (30 * LASX_REG_WIDTH) + EX xvst $xr31, \base, (31 * LASX_REG_WIDTH) +#endif + .endm + + .macro sc_restore_lasx base +#ifdef CONFIG_CPU_HAS_LASX + EX xvld $xr0, \base, (0 * LASX_REG_WIDTH) + EX xvld $xr1, \base, (1 * LASX_REG_WIDTH) + EX xvld $xr2, \base, (2 * LASX_REG_WIDTH) + EX xvld $xr3, \base, (3 * LASX_REG_WIDTH) + EX xvld $xr4, \base, (4 * LASX_REG_WIDTH) + EX xvld $xr5, \base, (5 * LASX_REG_WIDTH) + EX xvld $xr6, \base, (6 * LASX_REG_WIDTH) + EX xvld $xr7, \base, (7 * LASX_REG_WIDTH) + EX xvld $xr8, \base, (8 * LASX_REG_WIDTH) + EX xvld $xr9, \base, (9 * LASX_REG_WIDTH) + EX xvld $xr10, \base, (10 * LASX_REG_WIDTH) + EX xvld $xr11, \base, (11 * LASX_REG_WIDTH) + EX xvld $xr12, \base, (12 * LASX_REG_WIDTH) + EX xvld $xr13, \base, (13 * LASX_REG_WIDTH) + EX xvld $xr14, \base, (14 * LASX_REG_WIDTH) + EX xvld $xr15, \base, (15 * LASX_REG_WIDTH) + EX xvld $xr16, \base, (16 * LASX_REG_WIDTH) + EX xvld $xr17, \base, (17 * LASX_REG_WIDTH) + EX xvld $xr18, \base, (18 * LASX_REG_WIDTH) + EX xvld $xr19, \base, (19 * LASX_REG_WIDTH) + EX xvld $xr20, \base, (20 * LASX_REG_WIDTH) + EX xvld $xr21, \base, (21 * LASX_REG_WIDTH) + EX xvld $xr22, \base, (22 * LASX_REG_WIDTH) + EX xvld $xr23, \base, (23 * LASX_REG_WIDTH) + EX xvld $xr24, \base, (24 * LASX_REG_WIDTH) + EX xvld $xr25, \base, (25 * LASX_REG_WIDTH) + EX xvld $xr26, \base, (26 * LASX_REG_WIDTH) + EX xvld $xr27, \base, (27 * LASX_REG_WIDTH) + EX xvld $xr28, \base, (28 * LASX_REG_WIDTH) + EX xvld $xr29, \base, (29 * LASX_REG_WIDTH) + EX xvld $xr30, \base, (30 * LASX_REG_WIDTH) + EX xvld $xr31, \base, (31 * LASX_REG_WIDTH) +#endif + .endm + /* * Save a thread's fp context. */ @@ -166,6 +314,76 @@ SYM_FUNC_START(_restore_fp) jr ra SYM_FUNC_END(_restore_fp) +#ifdef CONFIG_CPU_HAS_LSX + +/* + * Save a thread's LSX vector context. + */ +SYM_FUNC_START(_save_lsx) + lsx_save_all a0 t1 t2 + jr ra +SYM_FUNC_END(_save_lsx) +EXPORT_SYMBOL(_save_lsx) + +/* + * Restore a thread's LSX vector context. + */ +SYM_FUNC_START(_restore_lsx) + lsx_restore_all a0 t1 t2 + jr ra +SYM_FUNC_END(_restore_lsx) + +SYM_FUNC_START(_save_lsx_upper) + lsx_save_all_upper a0 t0 t1 + jr ra +SYM_FUNC_END(_save_lsx_upper) + +SYM_FUNC_START(_restore_lsx_upper) + lsx_restore_all_upper a0 t0 t1 + jr ra +SYM_FUNC_END(_restore_lsx_upper) + +SYM_FUNC_START(_init_lsx_upper) + lsx_init_all_upper t1 + jr ra +SYM_FUNC_END(_init_lsx_upper) +#endif + +#ifdef CONFIG_CPU_HAS_LASX + +/* + * Save a thread's LASX vector context. + */ +SYM_FUNC_START(_save_lasx) + lasx_save_all a0 t1 t2 + jr ra +SYM_FUNC_END(_save_lasx) +EXPORT_SYMBOL(_save_lasx) + +/* + * Restore a thread's LASX vector context. + */ +SYM_FUNC_START(_restore_lasx) + lasx_restore_all a0 t1 t2 + jr ra +SYM_FUNC_END(_restore_lasx) + +SYM_FUNC_START(_save_lasx_upper) + lasx_save_all_upper a0 t0 t1 + jr ra +SYM_FUNC_END(_save_lasx_upper) + +SYM_FUNC_START(_restore_lasx_upper) + lasx_restore_all_upper a0 t0 t1 + jr ra +SYM_FUNC_END(_restore_lasx_upper) + +SYM_FUNC_START(_init_lasx_upper) + lasx_init_all_upper t1 + jr ra +SYM_FUNC_END(_init_lasx_upper) +#endif + /* * Load the FPU with signalling NANS. This bit pattern we're using has * the property that no matter whether considered as single or as double @@ -244,6 +462,58 @@ SYM_FUNC_START(_restore_fp_context) jr ra SYM_FUNC_END(_restore_fp_context) +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_save_lsx_context) + sc_save_fcc a1, t0, t1 + sc_save_fcsr a2, t0 + sc_save_lsx a0 + li.w a0, 0 # success + jr ra +SYM_FUNC_END(_save_lsx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_restore_lsx_context) + sc_restore_lsx a0 + sc_restore_fcc a1, t1, t2 + sc_restore_fcsr a2, t1 + li.w a0, 0 # success + jr ra +SYM_FUNC_END(_restore_lsx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_save_lasx_context) + sc_save_fcc a1, t0, t1 + sc_save_fcsr a2, t0 + sc_save_lasx a0 + li.w a0, 0 # success + jr ra +SYM_FUNC_END(_save_lasx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_restore_lasx_context) + sc_restore_lasx a0 + sc_restore_fcc a1, t1, t2 + sc_restore_fcsr a2, t1 + li.w a0, 0 # success + jr ra +SYM_FUNC_END(_restore_lasx_context) + SYM_FUNC_START(fault) li.w a0, -EFAULT # failure jr ra diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 9535a0662480..2e04eb07abb6 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -117,8 +117,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) */ preempt_disable(); - if (is_fpu_owner()) - save_fp(current); + if (is_fpu_owner()) { + if (is_lasx_enabled()) + save_lasx(current); + else if (is_lsx_enabled()) + save_lsx(current); + else + save_fp(current); + } preempt_enable(); diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index 5fcffb452367..a0767c3a0f0a 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -250,6 +250,90 @@ static int cfg_set(struct task_struct *target, return 0; } +#ifdef CONFIG_CPU_HAS_LSX + +static void copy_pad_fprs(struct task_struct *target, + const struct user_regset *regset, + struct membuf *to, unsigned int live_sz) +{ + int i, j; + unsigned long long fill = ~0ull; + unsigned int cp_sz, pad_sz; + + cp_sz = min(regset->size, live_sz); + pad_sz = regset->size - cp_sz; + WARN_ON(pad_sz % sizeof(fill)); + + for (i = 0; i < NUM_FPU_REGS; i++) { + membuf_write(to, &target->thread.fpu.fpr[i], cp_sz); + for (j = 0; j < (pad_sz / sizeof(fill)); j++) { + membuf_store(to, fill); + } + } +} + +static int simd_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + const unsigned int wr_size = NUM_FPU_REGS * regset->size; + + if (!tsk_used_math(target)) { + /* The task hasn't used FP or LSX, fill with 0xff */ + copy_pad_fprs(target, regset, &to, 0); + } else if (!test_tsk_thread_flag(target, TIF_LSX_CTX_LIVE)) { + /* Copy scalar FP context, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, 8); +#ifdef CONFIG_CPU_HAS_LASX + } else if (!test_tsk_thread_flag(target, TIF_LASX_CTX_LIVE)) { + /* Copy LSX 128 Bit context, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, 16); +#endif + } else if (sizeof(target->thread.fpu.fpr[0]) == regset->size) { + /* Trivially copy the vector registers */ + membuf_write(&to, &target->thread.fpu.fpr, wr_size); + } else { + /* Copy as much context as possible, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, sizeof(target->thread.fpu.fpr[0])); + } + + return 0; +} + +static int simd_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + const unsigned int wr_size = NUM_FPU_REGS * regset->size; + unsigned int cp_sz; + int i, err, start; + + init_fp_ctx(target); + + if (sizeof(target->thread.fpu.fpr[0]) == regset->size) { + /* Trivially copy the vector registers */ + err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fpr, + 0, wr_size); + } else { + /* Copy as much context as possible */ + cp_sz = min_t(unsigned int, regset->size, + sizeof(target->thread.fpu.fpr[0])); + + i = start = err = 0; + for (; i < NUM_FPU_REGS; i++, start += regset->size) { + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fpr[i], + start, start + cp_sz); + } + } + + return err; +} + +#endif /* CONFIG_CPU_HAS_LSX */ + #ifdef CONFIG_HAVE_HW_BREAKPOINT /* @@ -708,6 +792,12 @@ enum loongarch_regset { REGSET_GPR, REGSET_FPR, REGSET_CPUCFG, +#ifdef CONFIG_CPU_HAS_LSX + REGSET_LSX, +#endif +#ifdef CONFIG_CPU_HAS_LASX + REGSET_LASX, +#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT REGSET_HW_BREAK, REGSET_HW_WATCH, @@ -739,6 +829,26 @@ static const struct user_regset loongarch64_regsets[] = { .regset_get = cfg_get, .set = cfg_set, }, +#ifdef CONFIG_CPU_HAS_LSX + [REGSET_LSX] = { + .core_note_type = NT_LOONGARCH_LSX, + .n = NUM_FPU_REGS, + .size = 16, + .align = 16, + .regset_get = simd_get, + .set = simd_set, + }, +#endif +#ifdef CONFIG_CPU_HAS_LASX + [REGSET_LASX] = { + .core_note_type = NT_LOONGARCH_LASX, + .n = NUM_FPU_REGS, + .size = 32, + .align = 32, + .regset_get = simd_get, + .set = simd_set, + }, +#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT [REGSET_HW_BREAK] = { .core_note_type = NT_LOONGARCH_HW_BREAK, diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 8f5b7986374b..ceb899366c0a 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -50,6 +50,14 @@ extern asmlinkage int _save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); extern asmlinkage int _restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); +extern asmlinkage int +_save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +extern asmlinkage int +_restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +extern asmlinkage int +_save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +extern asmlinkage int +_restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); struct rt_sigframe { struct siginfo rs_info; @@ -65,6 +73,8 @@ struct extctx_layout { unsigned long size; unsigned int flags; struct _ctx_layout fpu; + struct _ctx_layout lsx; + struct _ctx_layout lasx; struct _ctx_layout end; }; @@ -115,6 +125,96 @@ static int copy_fpu_from_sigcontext(struct fpu_context __user *ctx) return err; } +static int copy_lsx_to_sigcontext(struct lsx_context __user *ctx) +{ + int i; + int err = 0; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0), + ®s[2*i]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1), + ®s[2*i+1]); + } + err |= __put_user(current->thread.fpu.fcc, fcc); + err |= __put_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + +static int copy_lsx_from_sigcontext(struct lsx_context __user *ctx) +{ + int i; + int err = 0; + u64 fpr_val; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __get_user(fpr_val, ®s[2*i]); + set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val); + err |= __get_user(fpr_val, ®s[2*i+1]); + set_fpr64(¤t->thread.fpu.fpr[i], 1, fpr_val); + } + err |= __get_user(current->thread.fpu.fcc, fcc); + err |= __get_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + +static int copy_lasx_to_sigcontext(struct lasx_context __user *ctx) +{ + int i; + int err = 0; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0), + ®s[4*i]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1), + ®s[4*i+1]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 2), + ®s[4*i+2]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 3), + ®s[4*i+3]); + } + err |= __put_user(current->thread.fpu.fcc, fcc); + err |= __put_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + +static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx) +{ + int i; + int err = 0; + u64 fpr_val; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __get_user(fpr_val, ®s[4*i]); + set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val); + err |= __get_user(fpr_val, ®s[4*i+1]); + set_fpr64(¤t->thread.fpu.fpr[i], 1, fpr_val); + err |= __get_user(fpr_val, ®s[4*i+2]); + set_fpr64(¤t->thread.fpu.fpr[i], 2, fpr_val); + err |= __get_user(fpr_val, ®s[4*i+3]); + set_fpr64(¤t->thread.fpu.fpr[i], 3, fpr_val); + } + err |= __get_user(current->thread.fpu.fcc, fcc); + err |= __get_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + /* * Wrappers for the assembly _{save,restore}_fp_context functions. */ @@ -136,6 +236,42 @@ static int restore_hw_fpu_context(struct fpu_context __user *ctx) return _restore_fp_context(regs, fcc, fcsr); } +static int save_hw_lsx_context(struct lsx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _save_lsx_context(regs, fcc, fcsr); +} + +static int restore_hw_lsx_context(struct lsx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _restore_lsx_context(regs, fcc, fcsr); +} + +static int save_hw_lasx_context(struct lasx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _save_lasx_context(regs, fcc, fcsr); +} + +static int restore_hw_lasx_context(struct lasx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _restore_lasx_context(regs, fcc, fcsr); +} + static int fcsr_pending(unsigned int __user *fcsr) { int err, sig = 0; @@ -227,6 +363,162 @@ static int protected_restore_fpu_context(struct extctx_layout *extctx) return err ?: sig; } +static int protected_save_lsx_context(struct extctx_layout *extctx) +{ + int err = 0; + struct sctx_info __user *info = extctx->lsx.addr; + struct lsx_context __user *lsx_ctx = (struct lsx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lsx_ctx->regs; + uint64_t __user *fcc = &lsx_ctx->fcc; + uint32_t __user *fcsr = &lsx_ctx->fcsr; + + while (1) { + lock_fpu_owner(); + if (is_lsx_enabled()) + err = save_hw_lsx_context(lsx_ctx); + else { + if (is_fpu_owner()) + save_fp(current); + err = copy_lsx_to_sigcontext(lsx_ctx); + } + unlock_fpu_owner(); + + err |= __put_user(LSX_CTX_MAGIC, &info->magic); + err |= __put_user(extctx->lsx.size, &info->size); + + if (likely(!err)) + break; + /* Touch the LSX context and try again */ + err = __put_user(0, ®s[0]) | + __put_user(0, ®s[32*2-1]) | + __put_user(0, fcc) | + __put_user(0, fcsr); + if (err) + return err; /* really bad sigcontext */ + } + + return err; +} + +static int protected_restore_lsx_context(struct extctx_layout *extctx) +{ + int err = 0, sig = 0, tmp __maybe_unused; + struct sctx_info __user *info = extctx->lsx.addr; + struct lsx_context __user *lsx_ctx = (struct lsx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lsx_ctx->regs; + uint64_t __user *fcc = &lsx_ctx->fcc; + uint32_t __user *fcsr = &lsx_ctx->fcsr; + + err = sig = fcsr_pending(fcsr); + if (err < 0) + return err; + + while (1) { + lock_fpu_owner(); + if (is_lsx_enabled()) + err = restore_hw_lsx_context(lsx_ctx); + else { + err = copy_lsx_from_sigcontext(lsx_ctx); + if (is_fpu_owner()) + restore_fp(current); + } + unlock_fpu_owner(); + + if (likely(!err)) + break; + /* Touch the LSX context and try again */ + err = __get_user(tmp, ®s[0]) | + __get_user(tmp, ®s[32*2-1]) | + __get_user(tmp, fcc) | + __get_user(tmp, fcsr); + if (err) + break; /* really bad sigcontext */ + } + + return err ?: sig; +} + +static int protected_save_lasx_context(struct extctx_layout *extctx) +{ + int err = 0; + struct sctx_info __user *info = extctx->lasx.addr; + struct lasx_context __user *lasx_ctx = + (struct lasx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lasx_ctx->regs; + uint64_t __user *fcc = &lasx_ctx->fcc; + uint32_t __user *fcsr = &lasx_ctx->fcsr; + + while (1) { + lock_fpu_owner(); + if (is_lasx_enabled()) + err = save_hw_lasx_context(lasx_ctx); + else { + if (is_lsx_enabled()) + save_lsx(current); + else if (is_fpu_owner()) + save_fp(current); + err = copy_lasx_to_sigcontext(lasx_ctx); + } + unlock_fpu_owner(); + + err |= __put_user(LASX_CTX_MAGIC, &info->magic); + err |= __put_user(extctx->lasx.size, &info->size); + + if (likely(!err)) + break; + /* Touch the LASX context and try again */ + err = __put_user(0, ®s[0]) | + __put_user(0, ®s[32*4-1]) | + __put_user(0, fcc) | + __put_user(0, fcsr); + if (err) + return err; /* really bad sigcontext */ + } + + return err; +} + +static int protected_restore_lasx_context(struct extctx_layout *extctx) +{ + int err = 0, sig = 0, tmp __maybe_unused; + struct sctx_info __user *info = extctx->lasx.addr; + struct lasx_context __user *lasx_ctx = + (struct lasx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lasx_ctx->regs; + uint64_t __user *fcc = &lasx_ctx->fcc; + uint32_t __user *fcsr = &lasx_ctx->fcsr; + + err = sig = fcsr_pending(fcsr); + if (err < 0) + return err; + + while (1) { + lock_fpu_owner(); + if (is_lasx_enabled()) + err = restore_hw_lasx_context(lasx_ctx); + else { + err = copy_lasx_from_sigcontext(lasx_ctx); + if (is_lsx_enabled()) + restore_lsx(current); + else if (is_fpu_owner()) + restore_fp(current); + } + unlock_fpu_owner(); + + if (likely(!err)) + break; + /* Touch the LASX context and try again */ + err = __get_user(tmp, ®s[0]) | + __get_user(tmp, ®s[32*4-1]) | + __get_user(tmp, fcc) | + __get_user(tmp, fcsr); + if (err) + break; /* really bad sigcontext */ + } + + return err ?: sig; +} + static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, struct extctx_layout *extctx) { @@ -240,7 +532,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, for (i = 1; i < 32; i++) err |= __put_user(regs->regs[i], &sc->sc_regs[i]); - if (extctx->fpu.addr) + if (extctx->lasx.addr) + err |= protected_save_lasx_context(extctx); + else if (extctx->lsx.addr) + err |= protected_save_lsx_context(extctx); + else if (extctx->fpu.addr) err |= protected_save_fpu_context(extctx); /* Set the "end" magic */ @@ -274,6 +570,20 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout * extctx->fpu.addr = info; break; + case LSX_CTX_MAGIC: + if (size < (sizeof(struct sctx_info) + + sizeof(struct lsx_context))) + goto invalid; + extctx->lsx.addr = info; + break; + + case LASX_CTX_MAGIC: + if (size < (sizeof(struct sctx_info) + + sizeof(struct lasx_context))) + goto invalid; + extctx->lasx.addr = info; + break; + default: goto invalid; } @@ -319,7 +629,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc for (i = 1; i < 32; i++) err |= __get_user(regs->regs[i], &sc->sc_regs[i]); - if (extctx.fpu.addr) + if (extctx.lasx.addr) + err |= protected_restore_lasx_context(&extctx); + else if (extctx.lsx.addr) + err |= protected_restore_lsx_context(&extctx); + else if (extctx.fpu.addr) err |= protected_restore_fpu_context(&extctx); bad: @@ -375,7 +689,13 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon extctx->size += extctx->end.size; if (extctx->flags & SC_USED_FP) { - if (cpu_has_fpu) + if (cpu_has_lasx && thread_lasx_context_live()) + new_sp = extframe_alloc(extctx, &extctx->lasx, + sizeof(struct lasx_context), LASX_CTX_ALIGN, new_sp); + else if (cpu_has_lsx && thread_lsx_context_live()) + new_sp = extframe_alloc(extctx, &extctx->lsx, + sizeof(struct lsx_context), LSX_CTX_ALIGN, new_sp); + else if (cpu_has_fpu) new_sp = extframe_alloc(extctx, &extctx->fpu, sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp); } diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index e73d9bbe1658..e56df45f7202 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -852,12 +852,67 @@ static void init_restore_fp(void) BUG_ON(!is_fp_enabled()); } +static void init_restore_lsx(void) +{ + enable_lsx(); + + if (!thread_lsx_context_live()) { + /* First time LSX context user */ + init_restore_fp(); + init_lsx_upper(); + set_thread_flag(TIF_LSX_CTX_LIVE); + } else { + if (!is_simd_owner()) { + if (is_fpu_owner()) { + restore_lsx_upper(current); + } else { + __own_fpu(); + restore_lsx(current); + } + } + } + + set_thread_flag(TIF_USEDSIMD); + + BUG_ON(!is_fp_enabled()); + BUG_ON(!is_lsx_enabled()); +} + +static void init_restore_lasx(void) +{ + enable_lasx(); + + if (!thread_lasx_context_live()) { + /* First time LASX context user */ + init_restore_lsx(); + init_lasx_upper(); + set_thread_flag(TIF_LASX_CTX_LIVE); + } else { + if (is_fpu_owner() || is_simd_owner()) { + init_restore_lsx(); + restore_lasx_upper(current); + } else { + __own_fpu(); + enable_lsx(); + restore_lasx(current); + } + } + + set_thread_flag(TIF_USEDSIMD); + + BUG_ON(!is_fp_enabled()); + BUG_ON(!is_lsx_enabled()); + BUG_ON(!is_lasx_enabled()); +} + asmlinkage void noinstr do_fpu(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); local_irq_enable(); die_if_kernel("do_fpu invoked from kernel context!", regs); + BUG_ON(is_lsx_enabled()); + BUG_ON(is_lasx_enabled()); preempt_disable(); init_restore_fp(); @@ -872,9 +927,20 @@ asmlinkage void noinstr do_lsx(struct pt_regs *regs) irqentry_state_t state = irqentry_enter(regs); local_irq_enable(); - force_sig(SIGILL); - local_irq_disable(); + if (!cpu_has_lsx) { + force_sig(SIGILL); + goto out; + } + + die_if_kernel("do_lsx invoked from kernel context!", regs); + BUG_ON(is_lasx_enabled()); + preempt_disable(); + init_restore_lsx(); + preempt_enable(); + +out: + local_irq_disable(); irqentry_exit(regs, state); } @@ -883,9 +949,19 @@ asmlinkage void noinstr do_lasx(struct pt_regs *regs) irqentry_state_t state = irqentry_enter(regs); local_irq_enable(); - force_sig(SIGILL); - local_irq_disable(); + if (!cpu_has_lasx) { + force_sig(SIGILL); + goto out; + } + + die_if_kernel("do_lasx invoked from kernel context!", regs); + preempt_disable(); + init_restore_lasx(); + preempt_enable(); + +out: + local_irq_disable(); irqentry_exit(regs, state); } |