summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorCatalin Marinas <catalin.marinas@arm.com>2023-02-01 17:55:29 +0000
committerCatalin Marinas <catalin.marinas@arm.com>2023-02-01 17:55:29 +0000
commitea776e4932302b965a2800e7905d9fa48c0d9e85 (patch)
treef483f2e00022301f9936be05f8e4f97480faf4f5 /arch
parent8ced928019353eaecbffee566d7ed6a9a9e60e78 (diff)
parentb2ab432bcf65e6fa3ec3fef6dd08796404b009d0 (diff)
downloadlinux-stable-ea776e4932302b965a2800e7905d9fa48c0d9e85.tar.gz
linux-stable-ea776e4932302b965a2800e7905d9fa48c0d9e85.tar.bz2
linux-stable-ea776e4932302b965a2800e7905d9fa48c0d9e85.zip
Merge branches 'for-next/tpidr2' and 'for-next/sme2' into for-next/signal
Patches on this branch depend on the branches merged above.
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/include/asm/cpufeature.h6
-rw-r--r--arch/arm64/include/asm/esr.h1
-rw-r--r--arch/arm64/include/asm/fpsimd.h30
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h22
-rw-r--r--arch/arm64/include/asm/hwcap.h6
-rw-r--r--arch/arm64/include/asm/processor.h2
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h6
-rw-r--r--arch/arm64/include/uapi/asm/sigcontext.h19
-rw-r--r--arch/arm64/kernel/cpufeature.c28
-rw-r--r--arch/arm64/kernel/cpuinfo.c6
-rw-r--r--arch/arm64/kernel/entry-fpsimd.S30
-rw-r--r--arch/arm64/kernel/fpsimd.c47
-rw-r--r--arch/arm64/kernel/hyp-stub.S6
-rw-r--r--arch/arm64/kernel/idreg-override.c1
-rw-r--r--arch/arm64/kernel/process.c21
-rw-r--r--arch/arm64/kernel/ptrace.c60
-rw-r--r--arch/arm64/kernel/signal.c113
-rw-r--r--arch/arm64/kvm/fpsimd.c2
-rw-r--r--arch/arm64/tools/cpucaps1
-rw-r--r--arch/arm64/tools/sysreg27
20 files changed, 376 insertions, 58 deletions
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 03d1c9d7af82..fc2c739f48f1 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -769,6 +769,12 @@ static __always_inline bool system_supports_sme(void)
cpus_have_const_cap(ARM64_SME);
}
+static __always_inline bool system_supports_sme2(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_SME) &&
+ cpus_have_const_cap(ARM64_SME2);
+}
+
static __always_inline bool system_supports_fa64(void)
{
return IS_ENABLED(CONFIG_ARM64_SME) &&
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 15b34fbfca66..5f3271e9d5df 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -341,6 +341,7 @@
#define ESR_ELx_SME_ISS_ILL 1
#define ESR_ELx_SME_ISS_SM_DISABLED 2
#define ESR_ELx_SME_ISS_ZA_DISABLED 3
+#define ESR_ELx_SME_ISS_ZT_DISABLED 4
#ifndef __ASSEMBLY__
#include <asm/types.h>
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index e6fa1e2982c8..67f2fb781f59 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -61,7 +61,7 @@ extern void fpsimd_kvm_prepare(void);
struct cpu_fp_state {
struct user_fpsimd_state *st;
void *sve_state;
- void *za_state;
+ void *sme_state;
u64 *svcr;
unsigned int sve_vl;
unsigned int sme_vl;
@@ -105,6 +105,13 @@ static inline void *sve_pffr(struct thread_struct *thread)
return (char *)thread->sve_state + sve_ffr_offset(vl);
}
+static inline void *thread_zt_state(struct thread_struct *thread)
+{
+ /* The ZT register state is stored immediately after the ZA state */
+ unsigned int sme_vq = sve_vq_from_vl(thread_get_sme_vl(thread));
+ return thread->sme_state + ZA_SIG_REGS_SIZE(sme_vq);
+}
+
extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
extern void sve_load_state(void const *state, u32 const *pfpsr,
int restore_ffr);
@@ -112,12 +119,13 @@ extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void);
extern void sve_set_vq(unsigned long vq_minus_1);
extern void sme_set_vq(unsigned long vq_minus_1);
-extern void za_save_state(void *state);
-extern void za_load_state(void const *state);
+extern void sme_save_state(void *state, int zt);
+extern void sme_load_state(void const *state, int zt);
struct arm64_cpu_capabilities;
extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
+extern void sme2_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern u64 read_zcr_features(void);
@@ -355,14 +363,20 @@ extern int sme_get_current_vl(void);
/*
* Return how many bytes of memory are required to store the full SME
- * specific state (currently just ZA) for task, given task's currently
- * configured vector length.
+ * specific state for task, given task's currently configured vector
+ * length.
*/
-static inline size_t za_state_size(struct task_struct const *task)
+static inline size_t sme_state_size(struct task_struct const *task)
{
unsigned int vl = task_get_sme_vl(task);
+ size_t size;
+
+ size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+
+ if (system_supports_sme2())
+ size += ZT_SIG_REG_SIZE;
- return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+ return size;
}
#else
@@ -382,7 +396,7 @@ static inline int sme_max_virtualisable_vl(void) { return 0; }
static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
static inline int sme_get_current_vl(void) { return -EINVAL; }
-static inline size_t za_state_size(struct task_struct const *task)
+static inline size_t sme_state_size(struct task_struct const *task)
{
return 0;
}
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index 5e0910cf4832..cd03819a3b68 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -221,6 +221,28 @@
.endm
/*
+ * LDR (ZT0)
+ *
+ * LDR ZT0, nx
+ */
+.macro _ldr_zt nx
+ _check_general_reg \nx
+ .inst 0xe11f8000 \
+ | (\nx << 5)
+.endm
+
+/*
+ * STR (ZT0)
+ *
+ * STR ZT0, nx
+ */
+.macro _str_zt nx
+ _check_general_reg \nx
+ .inst 0xe13f8000 \
+ | (\nx << 5)
+.endm
+
+/*
* Zero the entire ZA array
* ZERO ZA
*/
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 06dd12c514e6..475c803ecf42 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -123,6 +123,12 @@
#define KERNEL_HWCAP_CSSC __khwcap2_feature(CSSC)
#define KERNEL_HWCAP_RPRFM __khwcap2_feature(RPRFM)
#define KERNEL_HWCAP_SVE2P1 __khwcap2_feature(SVE2P1)
+#define KERNEL_HWCAP_SME2 __khwcap2_feature(SME2)
+#define KERNEL_HWCAP_SME2P1 __khwcap2_feature(SME2P1)
+#define KERNEL_HWCAP_SME_I16I32 __khwcap2_feature(SME_I16I32)
+#define KERNEL_HWCAP_SME_BI32I32 __khwcap2_feature(SME_BI32I32)
+#define KERNEL_HWCAP_SME_B16B16 __khwcap2_feature(SME_B16B16)
+#define KERNEL_HWCAP_SME_F16F16 __khwcap2_feature(SME_F16F16)
/*
* This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index d51b32a69309..3918f2a67970 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -161,7 +161,7 @@ struct thread_struct {
enum fp_type fp_type; /* registers FPSIMD or SVE? */
unsigned int fpsimd_cpu;
void *sve_state; /* SVE registers, if any */
- void *za_state; /* ZA register, if any */
+ void *sme_state; /* ZA and ZT state, if any */
unsigned int vl[ARM64_VEC_MAX]; /* vector length */
unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */
unsigned long fault_address; /* fault info */
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index b713d30544f1..69a4fb749c65 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -96,5 +96,11 @@
#define HWCAP2_CSSC (1UL << 34)
#define HWCAP2_RPRFM (1UL << 35)
#define HWCAP2_SVE2P1 (1UL << 36)
+#define HWCAP2_SME2 (1UL << 37)
+#define HWCAP2_SME2P1 (1UL << 38)
+#define HWCAP2_SME_I16I32 (1UL << 39)
+#define HWCAP2_SME_BI32I32 (1UL << 40)
+#define HWCAP2_SME_B16B16 (1UL << 41)
+#define HWCAP2_SME_F16F16 (1UL << 42)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index 4b4398d7fb2d..656a10ea6c67 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -160,6 +160,14 @@ struct za_context {
__u16 __reserved[3];
};
+#define ZT_MAGIC 0x5a544e01
+
+struct zt_context {
+ struct _aarch64_ctx head;
+ __u16 nregs;
+ __u16 __reserved[3];
+};
+
#endif /* !__ASSEMBLY__ */
#include <asm/sve_context.h>
@@ -312,4 +320,15 @@ struct za_context {
#define ZA_SIG_CONTEXT_SIZE(vq) \
(ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq))
+#define ZT_SIG_REG_SIZE 512
+
+#define ZT_SIG_REG_BYTES (ZT_SIG_REG_SIZE / 8)
+
+#define ZT_SIG_REGS_OFFSET sizeof(struct zt_context)
+
+#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * n)
+
+#define ZT_SIG_CONTEXT_SIZE(n) \
+ (sizeof(struct zt_context) + ZT_SIG_REGS_SIZE(n))
+
#endif /* _UAPI__ASM_SIGCONTEXT_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index a77315b338e6..5bd959bd9a1f 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -283,16 +283,26 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I32_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0),
ARM64_FTR_END,
};
@@ -2649,6 +2659,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.cpu_enable = fa64_kernel_enable,
},
+ {
+ .desc = "SME2",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_SME2,
+ .sys_reg = SYS_ID_AA64PFR1_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64PFR1_EL1_SME_SHIFT,
+ .field_width = ID_AA64PFR1_EL1_SME_WIDTH,
+ .min_field_value = ID_AA64PFR1_EL1_SME_SME2,
+ .matches = has_cpuid_feature,
+ .cpu_enable = sme2_kernel_enable,
+ },
#endif /* CONFIG_ARM64_SME */
{
.desc = "WFx with timeout",
@@ -2827,11 +2849,17 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
#ifdef CONFIG_ARM64_SME
HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SME_IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_FA64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_SMEver_SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_SMEver_SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F64F64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16B16_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F16_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16),
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I8I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_BI32I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32),
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F32F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
#endif /* CONFIG_ARM64_SME */
{},
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 379695262b77..85e54417d141 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -119,6 +119,12 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_CSSC] = "cssc",
[KERNEL_HWCAP_RPRFM] = "rprfm",
[KERNEL_HWCAP_SVE2P1] = "sve2p1",
+ [KERNEL_HWCAP_SME2] = "sme2",
+ [KERNEL_HWCAP_SME2P1] = "sme2p1",
+ [KERNEL_HWCAP_SME_I16I32] = "smei16i32",
+ [KERNEL_HWCAP_SME_BI32I32] = "smebi32i32",
+ [KERNEL_HWCAP_SME_B16B16] = "smeb16b16",
+ [KERNEL_HWCAP_SME_F16F16] = "smef16f16",
};
#ifdef CONFIG_COMPAT
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 229436f33df5..6325db1a2179 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -100,25 +100,35 @@ SYM_FUNC_START(sme_set_vq)
SYM_FUNC_END(sme_set_vq)
/*
- * Save the SME state
+ * Save the ZA and ZT state
*
* x0 - pointer to buffer for state
+ * x1 - number of ZT registers to save
*/
-SYM_FUNC_START(za_save_state)
- _sme_rdsvl 1, 1 // x1 = VL/8
- sme_save_za 0, x1, 12
+SYM_FUNC_START(sme_save_state)
+ _sme_rdsvl 2, 1 // x2 = VL/8
+ sme_save_za 0, x2, 12 // Leaves x0 pointing to the end of ZA
+
+ cbz x1, 1f
+ _str_zt 0
+1:
ret
-SYM_FUNC_END(za_save_state)
+SYM_FUNC_END(sme_save_state)
/*
- * Load the SME state
+ * Load the ZA and ZT state
*
* x0 - pointer to buffer for state
+ * x1 - number of ZT registers to save
*/
-SYM_FUNC_START(za_load_state)
- _sme_rdsvl 1, 1 // x1 = VL/8
- sme_load_za 0, x1, 12
+SYM_FUNC_START(sme_load_state)
+ _sme_rdsvl 2, 1 // x2 = VL/8
+ sme_load_za 0, x2, 12 // Leaves x0 pointing to the end of ZA
+
+ cbz x1, 1f
+ _ldr_zt 0
+1:
ret
-SYM_FUNC_END(za_load_state)
+SYM_FUNC_END(sme_load_state)
#endif /* CONFIG_ARM64_SME */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index dcc81e7200d4..cec8b43e7888 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -299,7 +299,7 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
/*
* TIF_SME controls whether a task can use SME without trapping while
* in userspace, when TIF_SME is set then we must have storage
- * alocated in sve_state and za_state to store the contents of both ZA
+ * alocated in sve_state and sme_state to store the contents of both ZA
* and the SVE registers for both streaming and non-streaming modes.
*
* If both SVCR.ZA and SVCR.SM are disabled then at any point we
@@ -429,7 +429,8 @@ static void task_fpsimd_load(void)
write_sysreg_s(current->thread.svcr, SYS_SVCR);
if (thread_za_enabled(&current->thread))
- za_load_state(current->thread.za_state);
+ sme_load_state(current->thread.sme_state,
+ system_supports_sme2());
if (thread_sm_enabled(&current->thread))
restore_ffr = system_supports_fa64();
@@ -490,7 +491,8 @@ static void fpsimd_save(void)
*svcr = read_sysreg_s(SYS_SVCR);
if (*svcr & SVCR_ZA_MASK)
- za_save_state(last->za_state);
+ sme_save_state(last->sme_state,
+ system_supports_sme2());
/* If we are in streaming mode override regular SVE. */
if (*svcr & SVCR_SM_MASK) {
@@ -1257,30 +1259,30 @@ void fpsimd_release_task(struct task_struct *dead_task)
#ifdef CONFIG_ARM64_SME
/*
- * Ensure that task->thread.za_state is allocated and sufficiently large.
+ * Ensure that task->thread.sme_state is allocated and sufficiently large.
*
* This function should be used only in preparation for replacing
- * task->thread.za_state with new data. The memory is always zeroed
+ * task->thread.sme_state with new data. The memory is always zeroed
* here to prevent stale data from showing through: this is done in
* the interest of testability and predictability, the architecture
* guarantees that when ZA is enabled it will be zeroed.
*/
void sme_alloc(struct task_struct *task)
{
- if (task->thread.za_state) {
- memset(task->thread.za_state, 0, za_state_size(task));
+ if (task->thread.sme_state) {
+ memset(task->thread.sme_state, 0, sme_state_size(task));
return;
}
/* This could potentially be up to 64K. */
- task->thread.za_state =
- kzalloc(za_state_size(task), GFP_KERNEL);
+ task->thread.sme_state =
+ kzalloc(sme_state_size(task), GFP_KERNEL);
}
static void sme_free(struct task_struct *task)
{
- kfree(task->thread.za_state);
- task->thread.za_state = NULL;
+ kfree(task->thread.sme_state);
+ task->thread.sme_state = NULL;
}
void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
@@ -1302,6 +1304,17 @@ void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
* This must be called after sme_kernel_enable(), we rely on the
* feature table being sorted to ensure this.
*/
+void sme2_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+{
+ /* Allow use of ZT0 */
+ write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK,
+ SYS_SMCR_EL1);
+}
+
+/*
+ * This must be called after sme_kernel_enable(), we rely on the
+ * feature table being sorted to ensure this.
+ */
void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
{
/* Allow use of FA64 */
@@ -1488,7 +1501,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
sve_alloc(current, false);
sme_alloc(current);
- if (!current->thread.sve_state || !current->thread.za_state) {
+ if (!current->thread.sve_state || !current->thread.sme_state) {
force_sig(SIGKILL);
return;
}
@@ -1609,7 +1622,7 @@ static void fpsimd_flush_thread_vl(enum vec_type type)
void fpsimd_flush_thread(void)
{
void *sve_state = NULL;
- void *za_state = NULL;
+ void *sme_state = NULL;
if (!system_supports_fpsimd())
return;
@@ -1634,8 +1647,8 @@ void fpsimd_flush_thread(void)
clear_thread_flag(TIF_SME);
/* Defer kfree() while in atomic context */
- za_state = current->thread.za_state;
- current->thread.za_state = NULL;
+ sme_state = current->thread.sme_state;
+ current->thread.sme_state = NULL;
fpsimd_flush_thread_vl(ARM64_VEC_SME);
current->thread.svcr = 0;
@@ -1645,7 +1658,7 @@ void fpsimd_flush_thread(void)
put_cpu_fpsimd_context();
kfree(sve_state);
- kfree(za_state);
+ kfree(sme_state);
}
/*
@@ -1711,7 +1724,7 @@ static void fpsimd_bind_task_to_cpu(void)
WARN_ON(!system_supports_fpsimd());
last->st = &current->thread.uw.fpsimd_state;
last->sve_state = current->thread.sve_state;
- last->za_state = current->thread.za_state;
+ last->sme_state = current->thread.sme_state;
last->sve_vl = task_get_sve_vl(current);
last->sme_vl = task_get_sme_vl(current);
last->svcr = &current->thread.svcr;
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 2ee18c860f2a..d31d1acb170d 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -132,6 +132,12 @@ SYM_CODE_START_LOCAL(__finalise_el2)
orr x0, x0, SMCR_ELx_FA64_MASK
.Lskip_sme_fa64:
+ // ZT0 available?
+ __check_override id_aa64smfr0 ID_AA64SMFR0_EL1_SMEver_SHIFT 4 .Linit_sme_zt0 .Lskip_sme_zt0
+.Linit_sme_zt0:
+ orr x0, x0, SMCR_ELx_EZT0_MASK
+.Lskip_sme_zt0:
+
orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector
msr_s SYS_SMCR_EL2, x0 // length for EL1.
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 95133765ed29..d833d78a7f31 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -131,6 +131,7 @@ static const struct ftr_set_desc smfr0 __initconst = {
.name = "id_aa64smfr0",
.override = &id_aa64smfr0_override,
.fields = {
+ FIELD("smever", ID_AA64SMFR0_EL1_SMEver_SHIFT, NULL),
/* FA64 is a one bit field... :-/ */
{ "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, },
{}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 269ac1c25ae2..71d59b5abede 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -307,27 +307,28 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
/*
* In the unlikely event that we create a new thread with ZA
- * enabled we should retain the ZA state so duplicate it here.
- * This may be shortly freed if we exec() or if CLONE_SETTLS
- * but it's simpler to do it here. To avoid confusing the rest
- * of the code ensure that we have a sve_state allocated
- * whenever za_state is allocated.
+ * enabled we should retain the ZA and ZT state so duplicate
+ * it here. This may be shortly freed if we exec() or if
+ * CLONE_SETTLS but it's simpler to do it here. To avoid
+ * confusing the rest of the code ensure that we have a
+ * sve_state allocated whenever sme_state is allocated.
*/
if (thread_za_enabled(&src->thread)) {
dst->thread.sve_state = kzalloc(sve_state_size(src),
GFP_KERNEL);
if (!dst->thread.sve_state)
return -ENOMEM;
- dst->thread.za_state = kmemdup(src->thread.za_state,
- za_state_size(src),
- GFP_KERNEL);
- if (!dst->thread.za_state) {
+
+ dst->thread.sme_state = kmemdup(src->thread.sme_state,
+ sme_state_size(src),
+ GFP_KERNEL);
+ if (!dst->thread.sme_state) {
kfree(dst->thread.sve_state);
dst->thread.sve_state = NULL;
return -ENOMEM;
}
} else {
- dst->thread.za_state = NULL;
+ dst->thread.sme_state = NULL;
clear_tsk_thread_flag(dst, TIF_SME);
}
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 2686ab157601..38be7ca202af 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1045,7 +1045,7 @@ static int za_get(struct task_struct *target,
if (thread_za_enabled(&target->thread)) {
start = end;
end = ZA_PT_SIZE(vq);
- membuf_write(&to, target->thread.za_state, end - start);
+ membuf_write(&to, target->thread.sme_state, end - start);
}
/* Zero any trailing padding */
@@ -1099,7 +1099,7 @@ static int za_set(struct task_struct *target,
/* Allocate/reinit ZA storage */
sme_alloc(target);
- if (!target->thread.za_state) {
+ if (!target->thread.sme_state) {
ret = -ENOMEM;
goto out;
}
@@ -1124,7 +1124,7 @@ static int za_set(struct task_struct *target,
start = ZA_PT_ZA_OFFSET;
end = ZA_PT_SIZE(vq);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- target->thread.za_state,
+ target->thread.sme_state,
start, end);
if (ret)
goto out;
@@ -1138,6 +1138,51 @@ out:
return ret;
}
+static int zt_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!system_supports_sme2())
+ return -EINVAL;
+
+ /*
+ * If PSTATE.ZA is not set then ZT will be zeroed when it is
+ * enabled so report the current register value as zero.
+ */
+ if (thread_za_enabled(&target->thread))
+ membuf_write(&to, thread_zt_state(&target->thread),
+ ZT_SIG_REG_BYTES);
+ else
+ membuf_zero(&to, ZT_SIG_REG_BYTES);
+
+ return 0;
+}
+
+static int zt_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!system_supports_sme2())
+ return -EINVAL;
+
+ if (!thread_za_enabled(&target->thread)) {
+ sme_alloc(target);
+ if (!target->thread.sme_state)
+ return -ENOMEM;
+ }
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ thread_zt_state(&target->thread),
+ 0, ZT_SIG_REG_BYTES);
+ if (ret == 0)
+ target->thread.svcr |= SVCR_ZA_MASK;
+
+ return ret;
+}
+
#endif /* CONFIG_ARM64_SME */
#ifdef CONFIG_ARM64_PTR_AUTH
@@ -1360,6 +1405,7 @@ enum aarch64_regset {
#ifdef CONFIG_ARM64_SVE
REGSET_SSVE,
REGSET_ZA,
+ REGSET_ZT,
#endif
#ifdef CONFIG_ARM64_PTR_AUTH
REGSET_PAC_MASK,
@@ -1467,6 +1513,14 @@ static const struct user_regset aarch64_regsets[] = {
.regset_get = za_get,
.set = za_set,
},
+ [REGSET_ZT] = { /* SME ZT */
+ .core_note_type = NT_ARM_ZT,
+ .n = 1,
+ .size = ZT_SIG_REG_BYTES,
+ .align = sizeof(u64),
+ .regset_get = zt_get,
+ .set = zt_set,
+ },
#endif
#ifdef CONFIG_ARM64_PTR_AUTH
[REGSET_PAC_MASK] = {
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 5fe45c7c5e4f..ed692284f199 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -58,6 +58,7 @@ struct rt_sigframe_user_layout {
unsigned long sve_offset;
unsigned long tpidr2_offset;
unsigned long za_offset;
+ unsigned long zt_offset;
unsigned long extra_offset;
unsigned long end_offset;
};
@@ -223,6 +224,7 @@ struct user_ctxs {
struct sve_context __user *sve;
struct tpidr2_context __user *tpidr2;
struct za_context __user *za;
+ struct zt_context __user *zt;
};
#ifdef CONFIG_ARM64_SVE
@@ -417,7 +419,7 @@ static int preserve_za_context(struct za_context __user *ctx)
* fpsimd_signal_preserve_current_state().
*/
err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET,
- current->thread.za_state,
+ current->thread.sme_state,
ZA_SIG_REGS_SIZE(vq));
}
@@ -448,7 +450,7 @@ static int restore_za_context(struct user_ctxs *user)
/*
* Careful: we are about __copy_from_user() directly into
- * thread.za_state with preemption enabled, so protection is
+ * thread.sme_state with preemption enabled, so protection is
* needed to prevent a racing context switch from writing stale
* registers back over the new data.
*/
@@ -457,13 +459,13 @@ static int restore_za_context(struct user_ctxs *user)
/* From now, fpsimd_thread_switch() won't touch thread.sve_state */
sme_alloc(current);
- if (!current->thread.za_state) {
+ if (!current->thread.sme_state) {
current->thread.svcr &= ~SVCR_ZA_MASK;
clear_thread_flag(TIF_SME);
return -ENOMEM;
}
- err = __copy_from_user(current->thread.za_state,
+ err = __copy_from_user(current->thread.sme_state,
(char __user const *)user->za +
ZA_SIG_REGS_OFFSET,
ZA_SIG_REGS_SIZE(vq));
@@ -475,6 +477,74 @@ static int restore_za_context(struct user_ctxs *user)
return 0;
}
+
+static int preserve_zt_context(struct zt_context __user *ctx)
+{
+ int err = 0;
+ u16 reserved[ARRAY_SIZE(ctx->__reserved)];
+
+ if (WARN_ON(!thread_za_enabled(&current->thread)))
+ return -EINVAL;
+
+ memset(reserved, 0, sizeof(reserved));
+
+ __put_user_error(ZT_MAGIC, &ctx->head.magic, err);
+ __put_user_error(round_up(ZT_SIG_CONTEXT_SIZE(1), 16),
+ &ctx->head.size, err);
+ __put_user_error(1, &ctx->nregs, err);
+ BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
+ err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
+
+ /*
+ * This assumes that the ZT state has already been saved to
+ * the task struct by calling the function
+ * fpsimd_signal_preserve_current_state().
+ */
+ err |= __copy_to_user((char __user *)ctx + ZT_SIG_REGS_OFFSET,
+ thread_zt_state(&current->thread),
+ ZT_SIG_REGS_SIZE(1));
+
+ return err ? -EFAULT : 0;
+}
+
+static int restore_zt_context(struct user_ctxs *user)
+{
+ int err;
+ struct zt_context zt;
+
+ /* ZA must be restored first for this check to be valid */
+ if (!thread_za_enabled(&current->thread))
+ return -EINVAL;
+
+ if (__copy_from_user(&zt, user->zt, sizeof(zt)))
+ return -EFAULT;
+
+ if (zt.nregs != 1)
+ return -EINVAL;
+
+ if (zt.head.size != ZT_SIG_CONTEXT_SIZE(zt.nregs))
+ return -EINVAL;
+
+ /*
+ * Careful: we are about __copy_from_user() directly into
+ * thread.zt_state with preemption enabled, so protection is
+ * needed to prevent a racing context switch from writing stale
+ * registers back over the new data.
+ */
+
+ fpsimd_flush_task_state(current);
+ /* From now, fpsimd_thread_switch() won't touch ZT in thread state */
+
+ err = __copy_from_user(thread_zt_state(&current->thread),
+ (char __user const *)user->zt +
+ ZT_SIG_REGS_OFFSET,
+ ZT_SIG_REGS_SIZE(1));
+ if (err)
+ return -EFAULT;
+
+ return 0;
+}
+
#else /* ! CONFIG_ARM64_SME */
/* Turn any non-optimised out attempts to use these into a link error: */
@@ -482,6 +552,8 @@ extern int preserve_tpidr2_context(void __user *ctx);
extern int restore_tpidr2_context(struct user_ctxs *user);
extern int preserve_za_context(void __user *ctx);
extern int restore_za_context(struct user_ctxs *user);
+extern int preserve_zt_context(void __user *ctx);
+extern int restore_zt_context(struct user_ctxs *user);
#endif /* ! CONFIG_ARM64_SME */
@@ -500,6 +572,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->sve = NULL;
user->tpidr2 = NULL;
user->za = NULL;
+ user->zt = NULL;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
@@ -591,6 +664,19 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->za = (struct za_context __user *)head;
break;
+ case ZT_MAGIC:
+ if (!system_supports_sme2())
+ goto invalid;
+
+ if (user->zt)
+ goto invalid;
+
+ if (size < sizeof(*user->zt))
+ goto invalid;
+
+ user->zt = (struct zt_context __user *)head;
+ break;
+
case EXTRA_MAGIC:
if (have_extra_context)
goto invalid;
@@ -716,6 +802,9 @@ static int restore_sigframe(struct pt_regs *regs,
if (err == 0 && system_supports_sme() && user.za)
err = restore_za_context(&user);
+ if (err == 0 && system_supports_sme2() && user.zt)
+ err = restore_zt_context(&user);
+
return err;
}
@@ -821,6 +910,15 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
return err;
}
+ if (system_supports_sme2()) {
+ if (add_all || thread_za_enabled(&current->thread)) {
+ err = sigframe_alloc(user, &user->zt_offset,
+ ZT_SIG_CONTEXT_SIZE(1));
+ if (err)
+ return err;
+ }
+ }
+
return sigframe_alloc_end(user);
}
@@ -883,6 +981,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
err |= preserve_za_context(za_ctx);
}
+ /* ZT state if present */
+ if (system_supports_sme2() && err == 0 && user->zt_offset) {
+ struct zt_context __user *zt_ctx =
+ apply_user_offset(user, user->zt_offset);
+ err |= preserve_zt_context(zt_ctx);
+ }
+
if (err == 0 && user->extra_offset) {
char __user *sfp = (char __user *)user->sigframe;
char __user *userp =
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 02dd7e9ebd39..235775d0c825 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -143,7 +143,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
fp_state.st = &vcpu->arch.ctxt.fp_regs;
fp_state.sve_state = vcpu->arch.sve_state;
fp_state.sve_vl = vcpu->arch.sve_max_vl;
- fp_state.za_state = NULL;
+ fp_state.sme_state = NULL;
fp_state.svcr = &vcpu->arch.svcr;
fp_state.fp_type = &vcpu->arch.fp_type;
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index a86ee376920a..1222b0ed63a2 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -50,6 +50,7 @@ MTE
MTE_ASYMM
SME
SME_FA64
+SME2
SPECTRE_V2
SPECTRE_V3A
SPECTRE_V4
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 184e58fd5631..13c87d8a42f1 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -894,6 +894,7 @@ EndEnum
Enum 27:24 SME
0b0000 NI
0b0001 IMP
+ 0b0010 SME2
EndEnum
Res0 23:20
Enum 19:16 MPAM_frac
@@ -975,7 +976,9 @@ Enum 63 FA64
EndEnum
Res0 62:60
Enum 59:56 SMEver
- 0b0000 IMP
+ 0b0000 SME
+ 0b0001 SME2
+ 0b0010 SME2p1
EndEnum
Enum 55:52 I16I64
0b0000 NI
@@ -986,7 +989,19 @@ Enum 48 F64F64
0b0 NI
0b1 IMP
EndEnum
-Res0 47:40
+Enum 47:44 I16I32
+ 0b0000 NI
+ 0b0101 IMP
+EndEnum
+Enum 43 B16B16
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+Enum 42 F16F16
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+Res0 41:40
Enum 39:36 I8I32
0b0000 NI
0b1111 IMP
@@ -999,7 +1014,10 @@ Enum 34 B16F32
0b0 NI
0b1 IMP
EndEnum
-Res0 33
+Enum 33 BI32I32
+ 0b0 NI
+ 0b1 IMP
+EndEnum
Enum 32 F32F32
0b0 NI
0b1 IMP
@@ -1599,7 +1617,8 @@ EndSysreg
SysregFields SMCR_ELx
Res0 63:32
Field 31 FA64
-Res0 30:9
+Field 30 EZT0
+Res0 29:9
Raz 8:4
Field 3:0 LEN
EndSysregFields