From ce514000da4f4b5f850f3339f805471e5c5c1caf Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 16 Jan 2023 16:04:36 +0000 Subject: arm64/sme: Rename za_state to sme_state In preparation for adding support for storage for ZT0 to the thread_struct rename za_state to sme_state. Since ZT0 is accessible when PSTATE.ZA is set just like ZA itself we will extend the allocation done for ZA to cover it, avoiding the need to further expand task_struct for non-SME tasks. No functional changes. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20221208-arm64-sme2-v4-1-f2fa0aef982f@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 34 +++++++++++++++++----------------- arch/arm64/kernel/process.c | 13 +++++++------ arch/arm64/kernel/ptrace.c | 6 +++--- arch/arm64/kernel/signal.c | 8 ++++---- 4 files changed, 31 insertions(+), 30 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index dcc81e7200d4..9e168a9eb615 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -299,7 +299,7 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, /* * TIF_SME controls whether a task can use SME without trapping while * in userspace, when TIF_SME is set then we must have storage - * alocated in sve_state and za_state to store the contents of both ZA + * alocated in sve_state and sme_state to store the contents of both ZA * and the SVE registers for both streaming and non-streaming modes. * * If both SVCR.ZA and SVCR.SM are disabled then at any point we @@ -429,7 +429,7 @@ static void task_fpsimd_load(void) write_sysreg_s(current->thread.svcr, SYS_SVCR); if (thread_za_enabled(¤t->thread)) - za_load_state(current->thread.za_state); + za_load_state(current->thread.sme_state); if (thread_sm_enabled(¤t->thread)) restore_ffr = system_supports_fa64(); @@ -490,7 +490,7 @@ static void fpsimd_save(void) *svcr = read_sysreg_s(SYS_SVCR); if (*svcr & SVCR_ZA_MASK) - za_save_state(last->za_state); + za_save_state(last->sme_state); /* If we are in streaming mode override regular SVE. */ if (*svcr & SVCR_SM_MASK) { @@ -1257,30 +1257,30 @@ void fpsimd_release_task(struct task_struct *dead_task) #ifdef CONFIG_ARM64_SME /* - * Ensure that task->thread.za_state is allocated and sufficiently large. + * Ensure that task->thread.sme_state is allocated and sufficiently large. * * This function should be used only in preparation for replacing - * task->thread.za_state with new data. The memory is always zeroed + * task->thread.sme_state with new data. The memory is always zeroed * here to prevent stale data from showing through: this is done in * the interest of testability and predictability, the architecture * guarantees that when ZA is enabled it will be zeroed. */ void sme_alloc(struct task_struct *task) { - if (task->thread.za_state) { - memset(task->thread.za_state, 0, za_state_size(task)); + if (task->thread.sme_state) { + memset(task->thread.sme_state, 0, sme_state_size(task)); return; } /* This could potentially be up to 64K. */ - task->thread.za_state = - kzalloc(za_state_size(task), GFP_KERNEL); + task->thread.sme_state = + kzalloc(sme_state_size(task), GFP_KERNEL); } static void sme_free(struct task_struct *task) { - kfree(task->thread.za_state); - task->thread.za_state = NULL; + kfree(task->thread.sme_state); + task->thread.sme_state = NULL; } void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) @@ -1488,7 +1488,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) sve_alloc(current, false); sme_alloc(current); - if (!current->thread.sve_state || !current->thread.za_state) { + if (!current->thread.sve_state || !current->thread.sme_state) { force_sig(SIGKILL); return; } @@ -1609,7 +1609,7 @@ static void fpsimd_flush_thread_vl(enum vec_type type) void fpsimd_flush_thread(void) { void *sve_state = NULL; - void *za_state = NULL; + void *sme_state = NULL; if (!system_supports_fpsimd()) return; @@ -1634,8 +1634,8 @@ void fpsimd_flush_thread(void) clear_thread_flag(TIF_SME); /* Defer kfree() while in atomic context */ - za_state = current->thread.za_state; - current->thread.za_state = NULL; + sme_state = current->thread.sme_state; + current->thread.sme_state = NULL; fpsimd_flush_thread_vl(ARM64_VEC_SME); current->thread.svcr = 0; @@ -1645,7 +1645,7 @@ void fpsimd_flush_thread(void) put_cpu_fpsimd_context(); kfree(sve_state); - kfree(za_state); + kfree(sme_state); } /* @@ -1711,7 +1711,7 @@ static void fpsimd_bind_task_to_cpu(void) WARN_ON(!system_supports_fpsimd()); last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; - last->za_state = current->thread.za_state; + last->sme_state = current->thread.sme_state; last->sve_vl = task_get_sve_vl(current); last->sme_vl = task_get_sme_vl(current); last->svcr = ¤t->thread.svcr; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 269ac1c25ae2..4ce0c4313ec6 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -311,23 +311,24 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) * This may be shortly freed if we exec() or if CLONE_SETTLS * but it's simpler to do it here. To avoid confusing the rest * of the code ensure that we have a sve_state allocated - * whenever za_state is allocated. + * whenever sme_state is allocated. */ if (thread_za_enabled(&src->thread)) { dst->thread.sve_state = kzalloc(sve_state_size(src), GFP_KERNEL); if (!dst->thread.sve_state) return -ENOMEM; - dst->thread.za_state = kmemdup(src->thread.za_state, - za_state_size(src), - GFP_KERNEL); - if (!dst->thread.za_state) { + + dst->thread.sme_state = kmemdup(src->thread.sme_state, + sme_state_size(src), + GFP_KERNEL); + if (!dst->thread.sme_state) { kfree(dst->thread.sve_state); dst->thread.sve_state = NULL; return -ENOMEM; } } else { - dst->thread.za_state = NULL; + dst->thread.sme_state = NULL; clear_tsk_thread_flag(dst, TIF_SME); } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 2686ab157601..67256e7772e4 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1045,7 +1045,7 @@ static int za_get(struct task_struct *target, if (thread_za_enabled(&target->thread)) { start = end; end = ZA_PT_SIZE(vq); - membuf_write(&to, target->thread.za_state, end - start); + membuf_write(&to, target->thread.sme_state, end - start); } /* Zero any trailing padding */ @@ -1099,7 +1099,7 @@ static int za_set(struct task_struct *target, /* Allocate/reinit ZA storage */ sme_alloc(target); - if (!target->thread.za_state) { + if (!target->thread.sme_state) { ret = -ENOMEM; goto out; } @@ -1124,7 +1124,7 @@ static int za_set(struct task_struct *target, start = ZA_PT_ZA_OFFSET; end = ZA_PT_SIZE(vq); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - target->thread.za_state, + target->thread.sme_state, start, end); if (ret) goto out; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index e0d09bf5b01b..27768809dd3e 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -389,7 +389,7 @@ static int preserve_za_context(struct za_context __user *ctx) * fpsimd_signal_preserve_current_state(). */ err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET, - current->thread.za_state, + current->thread.sme_state, ZA_SIG_REGS_SIZE(vq)); } @@ -420,7 +420,7 @@ static int restore_za_context(struct user_ctxs *user) /* * Careful: we are about __copy_from_user() directly into - * thread.za_state with preemption enabled, so protection is + * thread.sme_state with preemption enabled, so protection is * needed to prevent a racing context switch from writing stale * registers back over the new data. */ @@ -429,13 +429,13 @@ static int restore_za_context(struct user_ctxs *user) /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ sme_alloc(current); - if (!current->thread.za_state) { + if (!current->thread.sme_state) { current->thread.svcr &= ~SVCR_ZA_MASK; clear_thread_flag(TIF_SME); return -ENOMEM; } - err = __copy_from_user(current->thread.za_state, + err = __copy_from_user(current->thread.sme_state, (char __user const *)user->za + ZA_SIG_REGS_OFFSET, ZA_SIG_REGS_SIZE(vq)); -- cgit v1.2.3 From f122576f35336820259a79847e408b9f807eba15 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 16 Jan 2023 16:04:42 +0000 Subject: arm64/sme: Enable host kernel to access ZT0 The new register ZT0 introduced by SME2 comes with a new trap, disable it for the host kernel so that we can implement support for it. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20221208-arm64-sme2-v4-7-f2fa0aef982f@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/hyp-stub.S | 6 ++++++ arch/arm64/kernel/idreg-override.c | 1 + 2 files changed, 7 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 2ee18c860f2a..d31d1acb170d 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -132,6 +132,12 @@ SYM_CODE_START_LOCAL(__finalise_el2) orr x0, x0, SMCR_ELx_FA64_MASK .Lskip_sme_fa64: + // ZT0 available? + __check_override id_aa64smfr0 ID_AA64SMFR0_EL1_SMEver_SHIFT 4 .Linit_sme_zt0 .Lskip_sme_zt0 +.Linit_sme_zt0: + orr x0, x0, SMCR_ELx_EZT0_MASK +.Lskip_sme_zt0: + orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector msr_s SYS_SMCR_EL2, x0 // length for EL1. diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 95133765ed29..d833d78a7f31 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -131,6 +131,7 @@ static const struct ftr_set_desc smfr0 __initconst = { .name = "id_aa64smfr0", .override = &id_aa64smfr0_override, .fields = { + FIELD("smever", ID_AA64SMFR0_EL1_SMEver_SHIFT, NULL), /* FA64 is a one bit field... :-/ */ { "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, }, {} -- cgit v1.2.3 From d4913eee152d3ffaf9a1e70073bc15e773625685 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 16 Jan 2023 16:04:43 +0000 Subject: arm64/sme: Add basic enumeration for SME2 Add basic feature detection for SME2, detecting that the feature is present and disabling traps for ZT0. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20221208-arm64-sme2-v4-8-f2fa0aef982f@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 14 ++++++++++++++ arch/arm64/kernel/fpsimd.c | 11 +++++++++++ 2 files changed, 25 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a77315b338e6..fd90905bc2e6 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -282,6 +282,8 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), @@ -2649,6 +2651,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .cpu_enable = fa64_kernel_enable, }, + { + .desc = "SME2", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_SME2, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR1_EL1_SME_SHIFT, + .field_width = ID_AA64PFR1_EL1_SME_WIDTH, + .min_field_value = ID_AA64PFR1_EL1_SME_SME2, + .matches = has_cpuid_feature, + .cpu_enable = sme2_kernel_enable, + }, #endif /* CONFIG_ARM64_SME */ { .desc = "WFx with timeout", diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 9e168a9eb615..717ae4aaa021 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1298,6 +1298,17 @@ void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) isb(); } +/* + * This must be called after sme_kernel_enable(), we rely on the + * feature table being sorted to ensure this. + */ +void sme2_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) +{ + /* Allow use of ZT0 */ + write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK, + SYS_SMCR_EL1); +} + /* * This must be called after sme_kernel_enable(), we rely on the * feature table being sorted to ensure this. -- cgit v1.2.3 From d6138b4adc70729404efeb0133bc9f6e9ad78d03 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 16 Jan 2023 16:04:44 +0000 Subject: arm64/sme: Provide storage for ZT0 When the system supports SME2 there is an additional register ZT0 which we must store when the task is using SME. Since ZT0 is accessible only when PSTATE.ZA is set just like ZA we allocate storage for it along with ZA, increasing the allocation size for the memory region where we store ZA and storing the data for ZT after that for ZA. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20221208-arm64-sme2-v4-9-f2fa0aef982f@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/process.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 4ce0c4313ec6..71d59b5abede 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -307,11 +307,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) /* * In the unlikely event that we create a new thread with ZA - * enabled we should retain the ZA state so duplicate it here. - * This may be shortly freed if we exec() or if CLONE_SETTLS - * but it's simpler to do it here. To avoid confusing the rest - * of the code ensure that we have a sve_state allocated - * whenever sme_state is allocated. + * enabled we should retain the ZA and ZT state so duplicate + * it here. This may be shortly freed if we exec() or if + * CLONE_SETTLS but it's simpler to do it here. To avoid + * confusing the rest of the code ensure that we have a + * sve_state allocated whenever sme_state is allocated. */ if (thread_za_enabled(&src->thread)) { dst->thread.sve_state = kzalloc(sve_state_size(src), -- cgit v1.2.3 From 95fcec713259d440626526ed96ff5d3bac6179ea Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 16 Jan 2023 16:04:45 +0000 Subject: arm64/sme: Implement context switching for ZT0 When the system supports SME2 the ZT0 register must be context switched as part of the floating point state. This register is stored immediately after ZA in memory and is only accessible when PSTATE.ZA is set so we handle it in the same functions we use to save and restore ZA. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20221208-arm64-sme2-v4-10-f2fa0aef982f@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry-fpsimd.S | 30 ++++++++++++++++++++---------- arch/arm64/kernel/fpsimd.c | 6 ++++-- 2 files changed, 24 insertions(+), 12 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 229436f33df5..6325db1a2179 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -100,25 +100,35 @@ SYM_FUNC_START(sme_set_vq) SYM_FUNC_END(sme_set_vq) /* - * Save the SME state + * Save the ZA and ZT state * * x0 - pointer to buffer for state + * x1 - number of ZT registers to save */ -SYM_FUNC_START(za_save_state) - _sme_rdsvl 1, 1 // x1 = VL/8 - sme_save_za 0, x1, 12 +SYM_FUNC_START(sme_save_state) + _sme_rdsvl 2, 1 // x2 = VL/8 + sme_save_za 0, x2, 12 // Leaves x0 pointing to the end of ZA + + cbz x1, 1f + _str_zt 0 +1: ret -SYM_FUNC_END(za_save_state) +SYM_FUNC_END(sme_save_state) /* - * Load the SME state + * Load the ZA and ZT state * * x0 - pointer to buffer for state + * x1 - number of ZT registers to save */ -SYM_FUNC_START(za_load_state) - _sme_rdsvl 1, 1 // x1 = VL/8 - sme_load_za 0, x1, 12 +SYM_FUNC_START(sme_load_state) + _sme_rdsvl 2, 1 // x2 = VL/8 + sme_load_za 0, x2, 12 // Leaves x0 pointing to the end of ZA + + cbz x1, 1f + _ldr_zt 0 +1: ret -SYM_FUNC_END(za_load_state) +SYM_FUNC_END(sme_load_state) #endif /* CONFIG_ARM64_SME */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 717ae4aaa021..cec8b43e7888 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -429,7 +429,8 @@ static void task_fpsimd_load(void) write_sysreg_s(current->thread.svcr, SYS_SVCR); if (thread_za_enabled(¤t->thread)) - za_load_state(current->thread.sme_state); + sme_load_state(current->thread.sme_state, + system_supports_sme2()); if (thread_sm_enabled(¤t->thread)) restore_ffr = system_supports_fa64(); @@ -490,7 +491,8 @@ static void fpsimd_save(void) *svcr = read_sysreg_s(SYS_SVCR); if (*svcr & SVCR_ZA_MASK) - za_save_state(last->sme_state); + sme_save_state(last->sme_state, + system_supports_sme2()); /* If we are in streaming mode override regular SVE. */ if (*svcr & SVCR_SM_MASK) { -- cgit v1.2.3 From ee072cf708048c0d718a88159ae7985dd96d316f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 16 Jan 2023 16:04:46 +0000 Subject: arm64/sme: Implement signal handling for ZT Add a new signal context type for ZT which is present in the signal frame when ZA is enabled and ZT is supported by the system. In order to account for the possible addition of further ZT registers in the future we make the number of registers variable in the ABI, though currently the only possible number is 1. We could just use a bare list head for the context since the number of registers can be inferred from the size of the context but for usability and future extensibility we define a header with the number of registers and some reserved fields in it. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20221208-arm64-sme2-v4-11-f2fa0aef982f@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/signal.c | 105 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 27768809dd3e..1c5e557a3617 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -57,6 +57,7 @@ struct rt_sigframe_user_layout { unsigned long esr_offset; unsigned long sve_offset; unsigned long za_offset; + unsigned long zt_offset; unsigned long extra_offset; unsigned long end_offset; }; @@ -221,6 +222,7 @@ struct user_ctxs { struct fpsimd_context __user *fpsimd; struct sve_context __user *sve; struct za_context __user *za; + struct zt_context __user *zt; }; #ifdef CONFIG_ARM64_SVE @@ -447,11 +449,81 @@ static int restore_za_context(struct user_ctxs *user) return 0; } + +static int preserve_zt_context(struct zt_context __user *ctx) +{ + int err = 0; + u16 reserved[ARRAY_SIZE(ctx->__reserved)]; + + if (WARN_ON(!thread_za_enabled(¤t->thread))) + return -EINVAL; + + memset(reserved, 0, sizeof(reserved)); + + __put_user_error(ZT_MAGIC, &ctx->head.magic, err); + __put_user_error(round_up(ZT_SIG_CONTEXT_SIZE(1), 16), + &ctx->head.size, err); + __put_user_error(1, &ctx->nregs, err); + BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved)); + err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved)); + + /* + * This assumes that the ZT state has already been saved to + * the task struct by calling the function + * fpsimd_signal_preserve_current_state(). + */ + err |= __copy_to_user((char __user *)ctx + ZT_SIG_REGS_OFFSET, + thread_zt_state(¤t->thread), + ZT_SIG_REGS_SIZE(1)); + + return err ? -EFAULT : 0; +} + +static int restore_zt_context(struct user_ctxs *user) +{ + int err; + struct zt_context zt; + + /* ZA must be restored first for this check to be valid */ + if (!thread_za_enabled(¤t->thread)) + return -EINVAL; + + if (__copy_from_user(&zt, user->zt, sizeof(zt))) + return -EFAULT; + + if (zt.nregs != 1) + return -EINVAL; + + if (zt.head.size != ZT_SIG_CONTEXT_SIZE(zt.nregs)) + return -EINVAL; + + /* + * Careful: we are about __copy_from_user() directly into + * thread.zt_state with preemption enabled, so protection is + * needed to prevent a racing context switch from writing stale + * registers back over the new data. + */ + + fpsimd_flush_task_state(current); + /* From now, fpsimd_thread_switch() won't touch ZT in thread state */ + + err = __copy_from_user(thread_zt_state(¤t->thread), + (char __user const *)user->zt + + ZT_SIG_REGS_OFFSET, + ZT_SIG_REGS_SIZE(1)); + if (err) + return -EFAULT; + + return 0; +} + #else /* ! CONFIG_ARM64_SME */ /* Turn any non-optimised out attempts to use these into a link error: */ extern int preserve_za_context(void __user *ctx); extern int restore_za_context(struct user_ctxs *user); +extern int preserve_zt_context(void __user *ctx); +extern int restore_zt_context(struct user_ctxs *user); #endif /* ! CONFIG_ARM64_SME */ @@ -469,6 +541,7 @@ static int parse_user_sigframe(struct user_ctxs *user, user->fpsimd = NULL; user->sve = NULL; user->za = NULL; + user->zt = NULL; if (!IS_ALIGNED((unsigned long)base, 16)) goto invalid; @@ -547,6 +620,19 @@ static int parse_user_sigframe(struct user_ctxs *user, user->za = (struct za_context __user *)head; break; + case ZT_MAGIC: + if (!system_supports_sme2()) + goto invalid; + + if (user->zt) + goto invalid; + + if (size < sizeof(*user->zt)) + goto invalid; + + user->zt = (struct zt_context __user *)head; + break; + case EXTRA_MAGIC: if (have_extra_context) goto invalid; @@ -669,6 +755,9 @@ static int restore_sigframe(struct pt_regs *regs, if (err == 0 && system_supports_sme() && user.za) err = restore_za_context(&user); + if (err == 0 && system_supports_sme2() && user.zt) + err = restore_zt_context(&user); + return err; } @@ -769,6 +858,15 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, return err; } + if (system_supports_sme2()) { + if (add_all || thread_za_enabled(¤t->thread)) { + err = sigframe_alloc(user, &user->zt_offset, + ZT_SIG_CONTEXT_SIZE(1)); + if (err) + return err; + } + } + return sigframe_alloc_end(user); } @@ -824,6 +922,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, err |= preserve_za_context(za_ctx); } + /* ZT state if present */ + if (system_supports_sme2() && err == 0 && user->zt_offset) { + struct zt_context __user *zt_ctx = + apply_user_offset(user, user->zt_offset); + err |= preserve_zt_context(zt_ctx); + } + if (err == 0 && user->extra_offset) { char __user *sfp = (char __user *)user->sigframe; char __user *userp = -- cgit v1.2.3 From f90b529bcbe57c66da2b6a0d3c7c81e8567af63d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 16 Jan 2023 16:04:47 +0000 Subject: arm64/sme: Implement ZT0 ptrace support Implement support for a new note type NT_ARM64_ZT providing access to ZT0 when implemented. Since ZT0 is a register with constant size this is much simpler than for other SME state. As ZT0 is only accessible when PSTATE.ZA is set writes to ZT0 cause PSTATE.ZA to be set, the main alternative would be to return -EBUSY in this case but this seemed more constructive. Practical users are also going to be working with ZA anyway and have some understanding of the state. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20221208-arm64-sme2-v4-12-f2fa0aef982f@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ptrace.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 67256e7772e4..38be7ca202af 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1138,6 +1138,51 @@ out: return ret; } +static int zt_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + if (!system_supports_sme2()) + return -EINVAL; + + /* + * If PSTATE.ZA is not set then ZT will be zeroed when it is + * enabled so report the current register value as zero. + */ + if (thread_za_enabled(&target->thread)) + membuf_write(&to, thread_zt_state(&target->thread), + ZT_SIG_REG_BYTES); + else + membuf_zero(&to, ZT_SIG_REG_BYTES); + + return 0; +} + +static int zt_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!system_supports_sme2()) + return -EINVAL; + + if (!thread_za_enabled(&target->thread)) { + sme_alloc(target); + if (!target->thread.sme_state) + return -ENOMEM; + } + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + thread_zt_state(&target->thread), + 0, ZT_SIG_REG_BYTES); + if (ret == 0) + target->thread.svcr |= SVCR_ZA_MASK; + + return ret; +} + #endif /* CONFIG_ARM64_SME */ #ifdef CONFIG_ARM64_PTR_AUTH @@ -1360,6 +1405,7 @@ enum aarch64_regset { #ifdef CONFIG_ARM64_SVE REGSET_SSVE, REGSET_ZA, + REGSET_ZT, #endif #ifdef CONFIG_ARM64_PTR_AUTH REGSET_PAC_MASK, @@ -1467,6 +1513,14 @@ static const struct user_regset aarch64_regsets[] = { .regset_get = za_get, .set = za_set, }, + [REGSET_ZT] = { /* SME ZT */ + .core_note_type = NT_ARM_ZT, + .n = 1, + .size = ZT_SIG_REG_BYTES, + .align = sizeof(u64), + .regset_get = zt_get, + .set = zt_set, + }, #endif #ifdef CONFIG_ARM64_PTR_AUTH [REGSET_PAC_MASK] = { -- cgit v1.2.3 From 7d5d8601e4577c918fdb5fa922c634a73557ac0f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 16 Jan 2023 16:04:48 +0000 Subject: arm64/sme: Add hwcaps for SME 2 and 2.1 features In order to allow userspace to discover the presence of the new SME features add hwcaps for them. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20221208-arm64-sme2-v4-13-f2fa0aef982f@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 14 ++++++++++++++ arch/arm64/kernel/cpuinfo.c | 6 ++++++ 2 files changed, 20 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index fd90905bc2e6..5bd959bd9a1f 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -288,12 +288,20 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0), ARM64_FTR_END, @@ -2841,11 +2849,17 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #ifdef CONFIG_ARM64_SME HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SME_IMP, CAP_HWCAP, KERNEL_HWCAP_SME), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_FA64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_SMEver_SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_SMEver_SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F64F64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16B16_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F16_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I8I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_BI32I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F32F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), #endif /* CONFIG_ARM64_SME */ {}, diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 379695262b77..85e54417d141 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -119,6 +119,12 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_CSSC] = "cssc", [KERNEL_HWCAP_RPRFM] = "rprfm", [KERNEL_HWCAP_SVE2P1] = "sve2p1", + [KERNEL_HWCAP_SME2] = "sme2", + [KERNEL_HWCAP_SME2P1] = "sme2p1", + [KERNEL_HWCAP_SME_I16I32] = "smei16i32", + [KERNEL_HWCAP_SME_BI32I32] = "smebi32i32", + [KERNEL_HWCAP_SME_B16B16] = "smeb16b16", + [KERNEL_HWCAP_SME_F16F16] = "smef16f16", }; #ifdef CONFIG_COMPAT -- cgit v1.2.3