/* SPDX-License-Identifier: GPL-2.0-only */ /* * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions * * Copyright (C) 2013 - 2017 Linaro Ltd. * Copyright (C) 2024 Google LLC * * Author: Ard Biesheuvel */ #include #include .text .arch armv8-a+crypto .macro load_round_keys, rk, nr, tmp sub w\tmp, \nr, #10 add \tmp, \rk, w\tmp, sxtw #4 ld1 {v10.4s-v13.4s}, [\rk] ld1 {v14.4s-v17.4s}, [\tmp], #64 ld1 {v18.4s-v21.4s}, [\tmp], #64 ld1 {v3.4s-v5.4s}, [\tmp] .endm .macro dround, va, vb, vk aese \va\().16b, \vk\().16b aesmc \va\().16b, \va\().16b aese \vb\().16b, \vk\().16b aesmc \vb\().16b, \vb\().16b .endm .macro aes_encrypt, va, vb, nr tbz \nr, #2, .L\@ dround \va, \vb, v10 dround \va, \vb, v11 tbz \nr, #1, .L\@ dround \va, \vb, v12 dround \va, \vb, v13 .L\@: .irp v, v14, v15, v16, v17, v18, v19, v20, v21, v3 dround \va, \vb, \v .endr aese \va\().16b, v4.16b aese \vb\().16b, v4.16b .endm .macro aes_ccm_do_crypt,enc load_round_keys x3, w4, x10 ld1 {v0.16b}, [x5] /* load mac */ cbz x2, ce_aes_ccm_final ldr x8, [x6, #8] /* load lower ctr */ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ 0: /* outer loop */ ld1 {v1.8b}, [x6] /* load upper ctr */ prfm pldl1strm, [x1] add x8, x8, #1 rev x9, x8 ins v1.d[1], x9 /* no carry in lower ctr */ aes_encrypt v0, v1, w4 subs w2, w2, #16 bmi ce_aes_ccm_crypt_tail ld1 {v2.16b}, [x1], #16 /* load next input block */ .if \enc == 1 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ eor v6.16b, v1.16b, v2.16b /* xor with crypted ctr */ .else eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */ eor v6.16b, v2.16b, v5.16b /* final round enc */ .endif eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ st1 {v6.16b}, [x0], #16 /* write output block */ bne 0b CPU_LE( rev x8, x8 ) str x8, [x6, #8] /* store lsb end of ctr (BE) */ cbnz x7, ce_aes_ccm_final st1 {v0.16b}, [x5] /* store mac */ ret .endm SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail) eor v0.16b, v0.16b, v5.16b /* final round mac */ eor v1.16b, v1.16b, v5.16b /* final round enc */ add x1, x1, w2, sxtw /* rewind the input pointer (w2 < 0) */ add x0, x0, w2, sxtw /* rewind the output pointer */ adr_l x8, .Lpermute /* load permute vectors */ add x9, x8, w2, sxtw sub x8, x8, w2, sxtw ld1 {v7.16b-v8.16b}, [x9] ld1 {v9.16b}, [x8] ld1 {v2.16b}, [x1] /* load a full block of input */ tbl v1.16b, {v1.16b}, v7.16b /* move keystream to end of register */ eor v7.16b, v2.16b, v1.16b /* encrypt partial input block */ bif v2.16b, v7.16b, v22.16b /* select plaintext */ tbx v7.16b, {v6.16b}, v8.16b /* insert output from previous iteration */ tbl v2.16b, {v2.16b}, v9.16b /* copy plaintext to start of v2 */ eor v0.16b, v0.16b, v2.16b /* fold plaintext into mac */ st1 {v7.16b}, [x0] /* store output block */ cbz x7, 0f SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL) ld1 {v1.16b}, [x7] /* load 1st ctriv */ aes_encrypt v0, v1, w4 /* final round key cancels out */ eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ 0: st1 {v0.16b}, [x5] /* store result */ ret SYM_FUNC_END(ce_aes_ccm_crypt_tail) /* * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, * u8 const rk[], u32 rounds, u8 mac[], * u8 ctr[], u8 const final_iv[]); * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, * u8 const rk[], u32 rounds, u8 mac[], * u8 ctr[], u8 const final_iv[]); */ SYM_FUNC_START(ce_aes_ccm_encrypt) movi v22.16b, #255 aes_ccm_do_crypt 1 SYM_FUNC_END(ce_aes_ccm_encrypt) SYM_FUNC_START(ce_aes_ccm_decrypt) movi v22.16b, #0 aes_ccm_do_crypt 0 SYM_FUNC_END(ce_aes_ccm_decrypt) .section ".rodata", "a" .align 6 .fill 15, 1, 0xff .Lpermute: .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf .fill 15, 1, 0xff