/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ // // This file is dual-licensed, meaning that you can use it under your // choice of either of the following two licenses: // // Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. // // Licensed under the Apache License 2.0 (the "License"). You can obtain // a copy in the file LICENSE in the source distribution or at // https://www.openssl.org/source/license.html // // or // // Copyright (c) 2023, Jerry Shih // Copyright 2024 Google LLC // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // The generated code of this file depends on the following RISC-V extensions: // - RV64I // - RISC-V Vector ('V') with VLEN >= 128 && VLEN < 2048 // - RISC-V Vector AES block cipher extension ('Zvkned') // - RISC-V Vector Bit-manipulation extension ('Zvbb') // - RISC-V Vector GCM/GMAC extension ('Zvkg') #include .text .option arch, +zvkned, +zvbb, +zvkg #include "aes-macros.S" #define KEYP a0 #define INP a1 #define OUTP a2 #define LEN a3 #define TWEAKP a4 #define LEN32 a5 #define TAIL_LEN a6 #define VL a7 #define VLMAX t4 // v1-v15 contain the AES round keys, but they are used for temporaries before // the AES round keys have been loaded. #define TWEAKS v16 // LMUL=4 (most of the time) #define TWEAKS_BREV v20 // LMUL=4 (most of the time) #define MULTS_BREV v24 // LMUL=4 (most of the time) #define TMP0 v28 #define TMP1 v29 #define TMP2 v30 #define TMP3 v31 // xts_init initializes the following values: // // TWEAKS: N 128-bit tweaks T*(x^i) for i in 0..(N - 1) // TWEAKS_BREV: same as TWEAKS, but bit-reversed // MULTS_BREV: N 128-bit values x^N, bit-reversed. Only if N > 1. // // N is the maximum number of blocks that will be processed per loop iteration, // computed using vsetvli. // // The field convention used by XTS is the same as that of GHASH, but with the // bits reversed within each byte. The zvkg extension provides the vgmul // instruction which does multiplication in this field. Therefore, for tweak // computation we use vgmul to do multiplications in parallel, instead of // serially multiplying by x using shifting+xoring. Note that for this to work, // the inputs and outputs to vgmul must be bit-reversed (we do it with vbrev8). .macro xts_init // Load the first tweak T. vsetivli zero, 4, e32, m1, ta, ma vle32.v TWEAKS, (TWEAKP) // If there's only one block (or no blocks at all), then skip the tweak // sequence computation because (at most) T itself is needed. li t0, 16 ble LEN, t0, .Linit_single_block\@ // Save a copy of T bit-reversed in v12. vbrev8.v v12, TWEAKS // // Generate x^i for i in 0..(N - 1), i.e. 128-bit values 1 << i assuming // that N <= 128. Though, this code actually requires N < 64 (or // equivalently VLEN < 2048) due to the use of 64-bit intermediate // values here and in the x^N computation later. // vsetvli VL, LEN32, e32, m4, ta, ma srli t0, VL, 2 // t0 = N (num blocks) // Generate two sequences, each with N 32-bit values: // v0=[1, 1, 1, ...] and v1=[0, 1, 2, ...]. vsetvli zero, t0, e32, m1, ta, ma vmv.v.i v0, 1 vid.v v1 // Use vzext to zero-extend the sequences to 64 bits. Reinterpret them // as two sequences, each with 2*N 32-bit values: // v2=[1, 0, 1, 0, 1, 0, ...] and v4=[0, 0, 1, 0, 2, 0, ...]. vsetvli zero, t0, e64, m2, ta, ma vzext.vf2 v2, v0 vzext.vf2 v4, v1 slli t1, t0, 1 // t1 = 2*N vsetvli zero, t1, e32, m2, ta, ma // Use vwsll to compute [1<<0, 0<<0, 1<<1, 0<<0, 1<<2, 0<<0, ...], // widening to 64 bits per element. When reinterpreted as N 128-bit // values, this is the needed sequence of 128-bit values 1 << i (x^i). vwsll.vv v8, v2, v4 // Copy the bit-reversed T to all N elements of TWEAKS_BREV, then // multiply by x^i. This gives the sequence T*(x^i), bit-reversed. vsetvli zero, LEN32, e32, m4, ta, ma vmv.v.i TWEAKS_BREV, 0 vaesz.vs TWEAKS_BREV, v12 vbrev8.v v8, v8 vgmul.vv TWEAKS_BREV, v8 // Save a copy of the sequence T*(x^i) with the bit reversal undone. vbrev8.v TWEAKS, TWEAKS_BREV // Generate N copies of x^N, i.e. 128-bit values 1 << N, bit-reversed. li t1, 1 sll t1, t1, t0 // t1 = 1 << N vsetivli zero, 2, e64, m1, ta, ma vmv.v.i v0, 0 vsetivli zero, 1, e64, m1, tu, ma vmv.v.x v0, t1 vbrev8.v v0, v0 vsetvli zero, LEN32, e32, m4, ta, ma vmv.v.i MULTS_BREV, 0 vaesz.vs MULTS_BREV, v0 j .Linit_done\@ .Linit_single_block\@: vbrev8.v TWEAKS_BREV, TWEAKS .Linit_done\@: .endm // Set the first 128 bits of MULTS_BREV to 0x40, i.e. 'x' bit-reversed. This is // the multiplier required to advance the tweak by one. .macro load_x li t0, 0x40 vsetivli zero, 4, e32, m1, ta, ma vmv.v.i MULTS_BREV, 0 vsetivli zero, 1, e8, m1, tu, ma vmv.v.x MULTS_BREV, t0 .endm .macro __aes_xts_crypt enc, keylen // With 16 < len <= 31, there's no main loop, just ciphertext stealing. beqz LEN32, .Lcts_without_main_loop\@ vsetvli VLMAX, zero, e32, m4, ta, ma 1: vsetvli VL, LEN32, e32, m4, ta, ma 2: // Encrypt or decrypt VL/4 blocks. vle32.v TMP0, (INP) vxor.vv TMP0, TMP0, TWEAKS aes_crypt TMP0, \enc, \keylen vxor.vv TMP0, TMP0, TWEAKS vse32.v TMP0, (OUTP) // Update the pointers and the remaining length. slli t0, VL, 2 add INP, INP, t0 add OUTP, OUTP, t0 sub LEN32, LEN32, VL // Check whether more blocks remain. beqz LEN32, .Lmain_loop_done\@ // Compute the next sequence of tweaks by multiplying the previous // sequence by x^N. Store the result in both bit-reversed order and // regular order (i.e. with the bit reversal undone). vgmul.vv TWEAKS_BREV, MULTS_BREV vbrev8.v TWEAKS, TWEAKS_BREV // Since we compute the tweak multipliers x^N in advance, we require // that each iteration process the same length except possibly the last. // This conflicts slightly with the behavior allowed by RISC-V Vector // Extension, where CPUs can select a lower length for both of the last // two iterations. E.g., vl might take the sequence of values // [16, 16, 16, 12, 12], whereas we need [16, 16, 16, 16, 8] so that we // can use x^4 again instead of computing x^3. Therefore, we explicitly // keep the vl at VLMAX if there is at least VLMAX remaining. bge LEN32, VLMAX, 2b j 1b .Lmain_loop_done\@: load_x // Compute the next tweak. addi t0, VL, -4 vsetivli zero, 4, e32, m4, ta, ma vslidedown.vx TWEAKS_BREV, TWEAKS_BREV, t0 // Extract last tweak vsetivli zero, 4, e32, m1, ta, ma vgmul.vv TWEAKS_BREV, MULTS_BREV // Advance to next tweak bnez TAIL_LEN, .Lcts\@ // Update *TWEAKP to contain the next tweak. vbrev8.v TWEAKS, TWEAKS_BREV vse32.v TWEAKS, (TWEAKP) ret .Lcts_without_main_loop\@: load_x .Lcts\@: // TWEAKS_BREV now contains the next tweak. Compute the one after that. vsetivli zero, 4, e32, m1, ta, ma vmv.v.v TMP0, TWEAKS_BREV vgmul.vv TMP0, MULTS_BREV // Undo the bit reversal of the next two tweaks and store them in TMP1 // and TMP2, such that TMP1 is the first needed and TMP2 the second. .if \enc vbrev8.v TMP1, TWEAKS_BREV vbrev8.v TMP2, TMP0 .else vbrev8.v TMP1, TMP0 vbrev8.v TMP2, TWEAKS_BREV .endif // Encrypt/decrypt the last full block. vle32.v TMP0, (INP) vxor.vv TMP0, TMP0, TMP1 aes_crypt TMP0, \enc, \keylen vxor.vv TMP0, TMP0, TMP1 // Swap the first TAIL_LEN bytes of the above result with the tail. // Note that to support in-place encryption/decryption, the load from // the input tail must happen before the store to the output tail. addi t0, INP, 16 addi t1, OUTP, 16 vmv.v.v TMP3, TMP0 vsetvli zero, TAIL_LEN, e8, m1, tu, ma vle8.v TMP0, (t0) vse8.v TMP3, (t1) // Encrypt/decrypt again and store the last full block. vsetivli zero, 4, e32, m1, ta, ma vxor.vv TMP0, TMP0, TMP2 aes_crypt TMP0, \enc, \keylen vxor.vv TMP0, TMP0, TMP2 vse32.v TMP0, (OUTP) ret .endm .macro aes_xts_crypt enc // Check whether the length is a multiple of the AES block size. andi TAIL_LEN, LEN, 15 beqz TAIL_LEN, 1f // The length isn't a multiple of the AES block size, so ciphertext // stealing will be required. Ciphertext stealing involves special // handling of the partial block and the last full block, so subtract // the length of both from the length to be processed in the main loop. sub LEN, LEN, TAIL_LEN addi LEN, LEN, -16 1: srli LEN32, LEN, 2 // LEN and LEN32 now contain the total length of the blocks that will be // processed in the main loop, in bytes and 32-bit words respectively. xts_init aes_begin KEYP, 128f, 192f __aes_xts_crypt \enc, 256 128: __aes_xts_crypt \enc, 128 192: __aes_xts_crypt \enc, 192 .endm // void aes_xts_encrypt_zvkned_zvbb_zvkg(const struct crypto_aes_ctx *key, // const u8 *in, u8 *out, size_t len, // u8 tweak[16]); // // |key| is the data key. |tweak| contains the next tweak; the encryption of // the original IV with the tweak key was already done. This function supports // incremental computation, but |len| must always be >= 16 (AES_BLOCK_SIZE), and // |len| must be a multiple of 16 except on the last call. If |len| is a // multiple of 16, then this function updates |tweak| to contain the next tweak. SYM_FUNC_START(aes_xts_encrypt_zvkned_zvbb_zvkg) aes_xts_crypt 1 SYM_FUNC_END(aes_xts_encrypt_zvkned_zvbb_zvkg) // Same prototype and calling convention as the encryption function SYM_FUNC_START(aes_xts_decrypt_zvkned_zvbb_zvkg) aes_xts_crypt 0 SYM_FUNC_END(aes_xts_decrypt_zvkned_zvbb_zvkg)