diff options
152 files changed, 13777 insertions, 5297 deletions
diff --git a/Documentation/ABI/testing/sysfs-driver-uacce b/Documentation/ABI/testing/sysfs-driver-uacce new file mode 100644 index 000000000000..08f2591138af --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-uacce @@ -0,0 +1,39 @@ +What: /sys/class/uacce/<dev_name>/api +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Api of the device + Can be any string and up to userspace to parse. + Application use the api to match the correct driver + +What: /sys/class/uacce/<dev_name>/flags +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Attributes of the device, see UACCE_DEV_xxx flag defined in uacce.h + +What: /sys/class/uacce/<dev_name>/available_instances +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Available instances left of the device + Return -ENODEV if uacce_ops get_available_instances is not provided + +What: /sys/class/uacce/<dev_name>/algorithms +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Algorithms supported by this accelerator, separated by new line. + Can be any string and up to userspace to parse. + +What: /sys/class/uacce/<dev_name>/region_mmio_size +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Size (bytes) of mmio region queue file + +What: /sys/class/uacce/<dev_name>/region_dus_size +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Size (bytes) of dus region queue file diff --git a/Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml b/Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml new file mode 100644 index 000000000000..55dd6e3d270d --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/crypto/xlnx,zynqmp-aes.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Xilinx ZynqMP AES-GCM Hardware Accelerator Device Tree Bindings + +maintainers: + - Kalyani Akula <kalyani.akula@xilinx.com> + - Michal Simek <michal.simek@xilinx.com> + +description: | + The ZynqMP AES-GCM hardened cryptographic accelerator is used to + encrypt or decrypt the data with provided key and initialization vector. + +properties: + compatible: + const: xlnx,zynqmp-aes + +required: + - compatible + +additionalProperties: false + +examples: + - | + firmware { + zynqmp_firmware: zynqmp-firmware { + compatible = "xlnx,zynqmp-firmware"; + method = "smc"; + xlnx_aes: zynqmp-aes { + compatible = "xlnx,zynqmp-aes"; + }; + }; + }; +... diff --git a/Documentation/misc-devices/uacce.rst b/Documentation/misc-devices/uacce.rst new file mode 100644 index 000000000000..1db412e9b1a3 --- /dev/null +++ b/Documentation/misc-devices/uacce.rst @@ -0,0 +1,176 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Introduction of Uacce +--------------------- + +Uacce (Unified/User-space-access-intended Accelerator Framework) targets to +provide Shared Virtual Addressing (SVA) between accelerators and processes. +So accelerator can access any data structure of the main cpu. +This differs from the data sharing between cpu and io device, which share +only data content rather than address. +Because of the unified address, hardware and user space of process can +share the same virtual address in the communication. +Uacce takes the hardware accelerator as a heterogeneous processor, while +IOMMU share the same CPU page tables and as a result the same translation +from va to pa. + +:: + + __________________________ __________________________ + | | | | + | User application (CPU) | | Hardware Accelerator | + |__________________________| |__________________________| + + | | + | va | va + V V + __________ __________ + | | | | + | MMU | | IOMMU | + |__________| |__________| + | | + | | + V pa V pa + _______________________________________ + | | + | Memory | + |_______________________________________| + + + +Architecture +------------ + +Uacce is the kernel module, taking charge of iommu and address sharing. +The user drivers and libraries are called WarpDrive. + +The uacce device, built around the IOMMU SVA API, can access multiple +address spaces, including the one without PASID. + +A virtual concept, queue, is used for the communication. It provides a +FIFO-like interface. And it maintains a unified address space between the +application and all involved hardware. + +:: + + ___________________ ________________ + | | user API | | + | WarpDrive library | ------------> | user driver | + |___________________| |________________| + | | + | | + | queue fd | + | | + | | + v | + ___________________ _________ | + | | | | | mmap memory + | Other framework | | uacce | | r/w interface + | crypto/nic/others | |_________| | + |___________________| | + | | | + | register | register | + | | | + | | | + | _________________ __________ | + | | | | | | + ------------- | Device Driver | | IOMMU | | + |_________________| |__________| | + | | + | V + | ___________________ + | | | + -------------------------- | Device(Hardware) | + |___________________| + + +How does it work +---------------- + +Uacce uses mmap and IOMMU to play the trick. + +Uacce creates a chrdev for every device registered to it. New queue is +created when user application open the chrdev. The file descriptor is used +as the user handle of the queue. +The accelerator device present itself as an Uacce object, which exports as +a chrdev to the user space. The user application communicates with the +hardware by ioctl (as control path) or share memory (as data path). + +The control path to the hardware is via file operation, while data path is +via mmap space of the queue fd. + +The queue file address space: + +:: + + /** + * enum uacce_qfrt: qfrt type + * @UACCE_QFRT_MMIO: device mmio region + * @UACCE_QFRT_DUS: device user share region + */ + enum uacce_qfrt { + UACCE_QFRT_MMIO = 0, + UACCE_QFRT_DUS = 1, + }; + +All regions are optional and differ from device type to type. +Each region can be mmapped only once, otherwise -EEXIST returns. + +The device mmio region is mapped to the hardware mmio space. It is generally +used for doorbell or other notification to the hardware. It is not fast enough +as data channel. + +The device user share region is used for share data buffer between user process +and device. + + +The Uacce register API +---------------------- + +The register API is defined in uacce.h. + +:: + + struct uacce_interface { + char name[UACCE_MAX_NAME_SIZE]; + unsigned int flags; + const struct uacce_ops *ops; + }; + +According to the IOMMU capability, uacce_interface flags can be: + +:: + + /** + * UACCE Device flags: + * UACCE_DEV_SVA: Shared Virtual Addresses + * Support PASID + * Support device page faults (PCI PRI or SMMU Stall) + */ + #define UACCE_DEV_SVA BIT(0) + + struct uacce_device *uacce_alloc(struct device *parent, + struct uacce_interface *interface); + int uacce_register(struct uacce_device *uacce); + void uacce_remove(struct uacce_device *uacce); + +uacce_register results can be: + +a. If uacce module is not compiled, ERR_PTR(-ENODEV) + +b. Succeed with the desired flags + +c. Succeed with the negotiated flags, for example + + uacce_interface.flags = UACCE_DEV_SVA but uacce->flags = ~UACCE_DEV_SVA + + So user driver need check return value as well as the negotiated uacce->flags. + + +The user driver +--------------- + +The queue file mmap space will need a user driver to wrap the communication +protocol. Uacce provides some attributes in sysfs for the user driver to +match the right accelerator accordingly. +More details in Documentation/ABI/testing/sysfs-driver-uacce. diff --git a/MAINTAINERS b/MAINTAINERS index 195052943574..b76e11384c3b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4577,7 +4577,9 @@ S: Supported F: drivers/scsi/cxgbi/cxgb3i CXGB4 CRYPTO DRIVER (chcr) -M: Atul Gupta <atul.gupta@chelsio.com> +M: Ayush Sawal <ayush.sawal@chelsio.com> +M: Vinay Kumar Yadav <vinay.yadav@chelsio.com> +M: Rohit Maheshwari <rohitm@chelsio.com> L: linux-crypto@vger.kernel.org W: http://www.chelsio.com S: Supported @@ -10066,6 +10068,7 @@ F: Documentation/devicetree/bindings/phy/phy-mvebu-utmi.txt MARVELL CRYPTO DRIVER M: Boris Brezillon <bbrezillon@kernel.org> M: Arnaud Ebalard <arno@natisbad.org> +M: Srujana Challa <schalla@marvell.com> F: drivers/crypto/marvell/ S: Maintained L: linux-crypto@vger.kernel.org @@ -17139,6 +17142,18 @@ W: http://linuxtv.org S: Maintained F: drivers/media/pci/tw686x/ +UACCE ACCELERATOR FRAMEWORK +M: Zhangfei Gao <zhangfei.gao@linaro.org> +M: Zhou Wang <wangzhou1@hisilicon.com> +L: linux-accelerators@lists.ozlabs.org +L: linux-kernel@vger.kernel.org +S: Maintained +F: Documentation/ABI/testing/sysfs-driver-uacce +F: Documentation/misc-devices/uacce.rst +F: drivers/misc/uacce/ +F: include/linux/uacce.h +F: include/uapi/misc/uacce/ + UBI FILE SYSTEM (UBIFS) M: Richard Weinberger <richard@nod.at> L: linux-mtd@lists.infradead.org diff --git a/arch/arm/crypto/.gitignore b/arch/arm/crypto/.gitignore index 31e1f538df7d..a3c7ad52a469 100644 --- a/arch/arm/crypto/.gitignore +++ b/arch/arm/crypto/.gitignore @@ -1,3 +1,4 @@ aesbs-core.S sha256-core.S sha512-core.S +poly1305-core.S diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index e85839a8aaeb..e6fd32919c81 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -138,6 +138,7 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, kernel_neon_begin(); aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); kernel_neon_end(); + memzero_explicit(&rk, sizeof(rk)); return crypto_cipher_setkey(ctx->enc_tfm, in_key, key_len); } diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S index 534c9647726d..9f51e3fa4526 100644 --- a/arch/arm/crypto/ghash-ce-core.S +++ b/arch/arm/crypto/ghash-ce-core.S @@ -8,6 +8,9 @@ #include <linux/linkage.h> #include <asm/assembler.h> + .arch armv8-a + .fpu crypto-neon-fp-armv8 + SHASH .req q0 T1 .req q1 XL .req q2 @@ -88,8 +91,6 @@ T3_H .req d17 .text - .arch armv8-a - .fpu crypto-neon-fp-armv8 .macro __pmull_p64, rd, rn, rm, b1, b2, b3, b4 vmull.p64 \rd, \rn, \rm diff --git a/arch/arm64/crypto/.gitignore b/arch/arm64/crypto/.gitignore index 879df8781ed5..e403b1343328 100644 --- a/arch/arm64/crypto/.gitignore +++ b/arch/arm64/crypto/.gitignore @@ -1,2 +1,3 @@ sha256-core.S sha512-core.S +poly1305-core.S diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index e3e27349a9fe..fb507d569922 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -151,6 +151,7 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, kernel_neon_begin(); aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); kernel_neon_end(); + memzero_explicit(&rk, sizeof(rk)); return 0; } diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index 63c875d3314b..565ef604ca04 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -91,12 +91,32 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out) return sha1_base_finish(desc, out); } +static int sha1_ce_export(struct shash_desc *desc, void *out) +{ + struct sha1_ce_state *sctx = shash_desc_ctx(desc); + + memcpy(out, &sctx->sst, sizeof(struct sha1_state)); + return 0; +} + +static int sha1_ce_import(struct shash_desc *desc, const void *in) +{ + struct sha1_ce_state *sctx = shash_desc_ctx(desc); + + memcpy(&sctx->sst, in, sizeof(struct sha1_state)); + sctx->finalize = 0; + return 0; +} + static struct shash_alg alg = { .init = sha1_base_init, .update = sha1_ce_update, .final = sha1_ce_final, .finup = sha1_ce_finup, + .import = sha1_ce_import, + .export = sha1_ce_export, .descsize = sizeof(struct sha1_ce_state), + .statesize = sizeof(struct sha1_state), .digestsize = SHA1_DIGEST_SIZE, .base = { .cra_name = "sha1", diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index a8e67bafba3d..9450d19b9e6e 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -109,12 +109,32 @@ static int sha256_ce_final(struct shash_desc *desc, u8 *out) return sha256_base_finish(desc, out); } +static int sha256_ce_export(struct shash_desc *desc, void *out) +{ + struct sha256_ce_state *sctx = shash_desc_ctx(desc); + + memcpy(out, &sctx->sst, sizeof(struct sha256_state)); + return 0; +} + +static int sha256_ce_import(struct shash_desc *desc, const void *in) +{ + struct sha256_ce_state *sctx = shash_desc_ctx(desc); + + memcpy(&sctx->sst, in, sizeof(struct sha256_state)); + sctx->finalize = 0; + return 0; +} + static struct shash_alg algs[] = { { .init = sha224_base_init, .update = sha256_ce_update, .final = sha256_ce_final, .finup = sha256_ce_finup, + .export = sha256_ce_export, + .import = sha256_ce_import, .descsize = sizeof(struct sha256_ce_state), + .statesize = sizeof(struct sha256_state), .digestsize = SHA224_DIGEST_SIZE, .base = { .cra_name = "sha224", @@ -128,7 +148,10 @@ static struct shash_alg algs[] = { { .update = sha256_ce_update, .final = sha256_ce_final, .finup = sha256_ce_finup, + .export = sha256_ce_export, + .import = sha256_ce_import, .descsize = sizeof(struct sha256_ce_state), + .statesize = sizeof(struct sha256_state), .digestsize = SHA256_DIGEST_SIZE, .base = { .cra_name = "sha256", diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c index eec7d2d24239..8a17621f7d3a 100644 --- a/arch/x86/crypto/curve25519-x86_64.c +++ b/arch/x86/crypto/curve25519-x86_64.c @@ -1,8 +1,7 @@ -// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// SPDX-License-Identifier: GPL-2.0 OR MIT /* - * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>. All Rights Reserved. - * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. - * Copyright (C) 2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. + * Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation */ #include <crypto/curve25519.h> @@ -16,2337 +15,1378 @@ #include <asm/cpufeature.h> #include <asm/processor.h> -static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_adx); - -enum { NUM_WORDS_ELTFP25519 = 4 }; -typedef __aligned(32) u64 eltfp25519_1w[NUM_WORDS_ELTFP25519]; -typedef __aligned(32) u64 eltfp25519_1w_buffer[2 * NUM_WORDS_ELTFP25519]; - -#define mul_eltfp25519_1w_adx(c, a, b) do { \ - mul_256x256_integer_adx(m.buffer, a, b); \ - red_eltfp25519_1w_adx(c, m.buffer); \ -} while (0) - -#define mul_eltfp25519_1w_bmi2(c, a, b) do { \ - mul_256x256_integer_bmi2(m.buffer, a, b); \ - red_eltfp25519_1w_bmi2(c, m.buffer); \ -} while (0) - -#define sqr_eltfp25519_1w_adx(a) do { \ - sqr_256x256_integer_adx(m.buffer, a); \ - red_eltfp25519_1w_adx(a, m.buffer); \ -} while (0) - -#define sqr_eltfp25519_1w_bmi2(a) do { \ - sqr_256x256_integer_bmi2(m.buffer, a); \ - red_eltfp25519_1w_bmi2(a, m.buffer); \ -} while (0) - -#define mul_eltfp25519_2w_adx(c, a, b) do { \ - mul2_256x256_integer_adx(m.buffer, a, b); \ - red_eltfp25519_2w_adx(c, m.buffer); \ -} while (0) - -#define mul_eltfp25519_2w_bmi2(c, a, b) do { \ - mul2_256x256_integer_bmi2(m.buffer, a, b); \ - red_eltfp25519_2w_bmi2(c, m.buffer); \ -} while (0) - -#define sqr_eltfp25519_2w_adx(a) do { \ - sqr2_256x256_integer_adx(m.buffer, a); \ - red_eltfp25519_2w_adx(a, m.buffer); \ -} while (0) - -#define sqr_eltfp25519_2w_bmi2(a) do { \ - sqr2_256x256_integer_bmi2(m.buffer, a); \ - red_eltfp25519_2w_bmi2(a, m.buffer); \ -} while (0) - -#define sqrn_eltfp25519_1w_adx(a, times) do { \ - int ____counter = (times); \ - while (____counter-- > 0) \ - sqr_eltfp25519_1w_adx(a); \ -} while (0) - -#define sqrn_eltfp25519_1w_bmi2(a, times) do { \ - int ____counter = (times); \ - while (____counter-- > 0) \ - sqr_eltfp25519_1w_bmi2(a); \ -} while (0) - -#define copy_eltfp25519_1w(C, A) do { \ - (C)[0] = (A)[0]; \ - (C)[1] = (A)[1]; \ - (C)[2] = (A)[2]; \ - (C)[3] = (A)[3]; \ -} while (0) - -#define setzero_eltfp25519_1w(C) do { \ - (C)[0] = 0; \ - (C)[1] = 0; \ - (C)[2] = 0; \ - (C)[3] = 0; \ -} while (0) - -__aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = { - /* 1 */ 0xfffffffffffffff3UL, 0xffffffffffffffffUL, - 0xffffffffffffffffUL, 0x5fffffffffffffffUL, - /* 2 */ 0x6b8220f416aafe96UL, 0x82ebeb2b4f566a34UL, - 0xd5a9a5b075a5950fUL, 0x5142b2cf4b2488f4UL, - /* 3 */ 0x6aaebc750069680cUL, 0x89cf7820a0f99c41UL, - 0x2a58d9183b56d0f4UL, 0x4b5aca80e36011a4UL, - /* 4 */ 0x329132348c29745dUL, 0xf4a2e616e1642fd7UL, - 0x1e45bb03ff67bc34UL, 0x306912d0f42a9b4aUL, - /* 5 */ 0xff886507e6af7154UL, 0x04f50e13dfeec82fUL, - 0xaa512fe82abab5ceUL, 0x174e251a68d5f222UL, - /* 6 */ 0xcf96700d82028898UL, 0x1743e3370a2c02c5UL, - 0x379eec98b4e86eaaUL, 0x0c59888a51e0482eUL, - /* 7 */ 0xfbcbf1d699b5d189UL, 0xacaef0d58e9fdc84UL, - 0xc1c20d06231f7614UL, 0x2938218da274f972UL, - /* 8 */ 0xf6af49beff1d7f18UL, 0xcc541c22387ac9c2UL, - 0x96fcc9ef4015c56bUL, 0x69c1627c690913a9UL, - /* 9 */ 0x7a86fd2f4733db0eUL, 0xfdb8c4f29e087de9UL, - 0x095e4b1a8ea2a229UL, 0x1ad7a7c829b37a79UL, - /* 10 */ 0x342d89cad17ea0c0UL, 0x67bedda6cced2051UL, - 0x19ca31bf2bb42f74UL, 0x3df7b4c84980acbbUL, - /* 11 */ 0xa8c6444dc80ad883UL, 0xb91e440366e3ab85UL, - 0xc215cda00164f6d8UL, 0x3d867c6ef247e668UL, - /* 12 */ 0xc7dd582bcc3e658cUL, 0xfd2c4748ee0e5528UL, - 0xa0fd9b95cc9f4f71UL, 0x7529d871b0675ddfUL, - /* 13 */ 0xb8f568b42d3cbd78UL, 0x1233011b91f3da82UL, - 0x2dce6ccd4a7c3b62UL, 0x75e7fc8e9e498603UL, - /* 14 */ 0x2f4f13f1fcd0b6ecUL, 0xf1a8ca1f29ff7a45UL, - 0xc249c1a72981e29bUL, 0x6ebe0dbb8c83b56aUL, - /* 15 */ 0x7114fa8d170bb222UL, 0x65a2dcd5bf93935fUL, - 0xbdc41f68b59c979aUL, 0x2f0eef79a2ce9289UL, - /* 16 */ 0x42ecbf0c083c37ceUL, 0x2930bc09ec496322UL, - 0xf294b0c19cfeac0dUL, 0x3780aa4bedfabb80UL, - /* 17 */ 0x56c17d3e7cead929UL, 0xe7cb4beb2e5722c5UL, - 0x0ce931732dbfe15aUL, 0x41b883c7621052f8UL, - /* 18 */ 0xdbf75ca0c3d25350UL, 0x2936be086eb1e351UL, - 0xc936e03cb4a9b212UL, 0x1d45bf82322225aaUL, - /* 19 */ 0xe81ab1036a024cc5UL, 0xe212201c304c9a72UL, - 0xc5d73fba6832b1fcUL, 0x20ffdb5a4d839581UL, - /* 20 */ 0xa283d367be5d0fadUL, 0x6c2b25ca8b164475UL, - 0x9d4935467caaf22eUL, 0x5166408eee85ff49UL, - /* 21 */ 0x3c67baa2fab4e361UL, 0xb3e433c67ef35cefUL, - 0x5259729241159b1cUL, 0x6a621892d5b0ab33UL, - /* 22 */ 0x20b74a387555cdcbUL, 0x532aa10e1208923fUL, - 0xeaa17b7762281dd1UL, 0x61ab3443f05c44bfUL, - /* 23 */ 0x257a6c422324def8UL, 0x131c6c1017e3cf7fUL, - 0x23758739f630a257UL, 0x295a407a01a78580UL, - /* 24 */ 0xf8c443246d5da8d9UL, 0x19d775450c52fa5dUL, - 0x2afcfc92731bf83dUL, 0x7d10c8e81b2b4700UL, - /* 25 */ 0xc8e0271f70baa20bUL, 0x993748867ca63957UL, - 0x5412efb3cb7ed4bbUL, 0x3196d36173e62975UL, - /* 26 */ 0xde5bcad141c7dffcUL, 0x47cc8cd2b395c848UL, - 0xa34cd942e11af3cbUL, 0x0256dbf2d04ecec2UL, - /* 27 */ 0x875ab7e94b0e667fUL, 0xcad4dd83c0850d10UL, - 0x47f12e8f4e72c79fUL, 0x5f1a87bb8c85b19bUL, - /* 28 */ 0x7ae9d0b6437f51b8UL, 0x12c7ce5518879065UL, - 0x2ade09fe5cf77aeeUL, 0x23a05a2f7d2c5627UL, - /* 29 */ 0x5908e128f17c169aUL, 0xf77498dd8ad0852dUL, - 0x74b4c4ceab102f64UL, 0x183abadd10139845UL, - /* 30 */ 0xb165ba8daa92aaacUL, 0xd5c5ef9599386705UL, - 0xbe2f8f0cf8fc40d1UL, 0x2701e635ee204514UL, - /* 31 */ 0x629fa80020156514UL, 0xf223868764a8c1ceUL, - 0x5b894fff0b3f060eUL, 0x60d9944cf708a3faUL, - /* 32 */ 0xaeea001a1c7a201fUL, 0xebf16a633ee2ce63UL, - 0x6f7709594c7a07e1UL, 0x79b958150d0208cbUL, - /* 33 */ 0x24b55e5301d410e7UL, 0xe3a34edff3fdc84dUL, - 0xd88768e4904032d8UL, 0x131384427b3aaeecUL, - /* 34 */ 0x8405e51286234f14UL, 0x14dc4739adb4c529UL, - 0xb8a2b5b250634ffdUL, 0x2fe2a94ad8a7ff93UL, - /* 35 */ 0xec5c57efe843faddUL, 0x2843ce40f0bb9918UL, - 0xa4b561d6cf3d6305UL, 0x743629bde8fb777eUL, - /* 36 */ 0x343edd46bbaf738fUL, 0xed981828b101a651UL, - 0xa401760b882c797aUL, 0x1fc223e28dc88730UL, - /* 37 */ 0x48604e91fc0fba0eUL, 0xb637f78f052c6fa4UL, - 0x91ccac3d09e9239cUL, 0x23f7eed4437a687cUL, - /* 38 */ 0x5173b1118d9bd800UL, 0x29d641b63189d4a7UL, - 0xfdbf177988bbc586UL, 0x2959894fcad81df5UL, - /* 39 */ 0xaebc8ef3b4bbc899UL, 0x4148995ab26992b9UL, - 0x24e20b0134f92cfbUL, 0x40d158894a05dee8UL, - /* 40 */ 0x46b00b1185af76f6UL, 0x26bac77873187a79UL, - 0x3dc0bf95ab8fff5fUL, 0x2a608bd8945524d7UL, - /* 41 */ 0x26449588bd446302UL, 0x7c4bc21c0388439cUL, - 0x8e98a4f383bd11b2UL, 0x26218d7bc9d876b9UL, - /* 42 */ 0xe3081542997c178aUL, 0x3c2d29a86fb6606fUL, - 0x5c217736fa279374UL, 0x7dde05734afeb1faUL, - /* 43 */ 0x3bf10e3906d42babUL, 0xe4f7803e1980649cUL, - 0xe6053bf89595bf7aUL, 0x394faf38da245530UL, - /* 44 */ 0x7a8efb58896928f4UL, 0xfbc778e9cc6a113cUL, - 0x72670ce330af596fUL, 0x48f222a81d3d6cf7UL, - /* 45 */ 0xf01fce410d72caa7UL, 0x5a20ecc7213b5595UL, - 0x7bc21165c1fa1483UL, 0x07f89ae31da8a741UL, - /* 46 */ 0x05d2c2b4c6830ff9UL, 0xd43e330fc6316293UL, - 0xa5a5590a96d3a904UL, 0x705edb91a65333b6UL, - /* 47 */ 0x048ee15e0bb9a5f7UL, 0x3240cfca9e0aaf5dUL, - 0x8f4b71ceedc4a40bUL, 0x621c0da3de544a6dUL, - /* 48 */ 0x92872836a08c4091UL, 0xce8375b010c91445UL, - 0x8a72eb524f276394UL, 0x2667fcfa7ec83635UL, - /* 49 */ 0x7f4c173345e8752aUL, 0x061b47feee7079a5UL, - 0x25dd9afa9f86ff34UL, 0x3780cef5425dc89cUL, - /* 50 */ 0x1a46035a513bb4e9UL, 0x3e1ef379ac575adaUL, - 0xc78c5f1c5fa24b50UL, 0x321a967634fd9f22UL, - /* 51 */ 0x946707b8826e27faUL, 0x3dca84d64c506fd0UL, - 0xc189218075e91436UL, 0x6d9284169b3b8484UL, - /* 52 */ 0x3a67e840383f2ddfUL, 0x33eec9a30c4f9b75UL, - 0x3ec7c86fa783ef47UL, 0x26ec449fbac9fbc4UL, - /* 53 */ 0x5c0f38cba09b9e7dUL, 0x81168cc762a3478cUL, - 0x3e23b0d306fc121cUL, 0x5a238aa0a5efdcddUL, - /* 54 */ 0x1ba26121c4ea43ffUL, 0x36f8c77f7c8832b5UL, - 0x88fbea0b0adcf99aUL, 0x5ca9938ec25bebf9UL, - /* 55 */ 0xd5436a5e51fccda0UL, 0x1dbc4797c2cd893bUL, - 0x19346a65d3224a08UL, 0x0f5034e49b9af466UL, - /* 56 */ 0xf23c3967a1e0b96eUL, 0xe58b08fa867a4d88UL, - 0xfb2fabc6a7341679UL, 0x2a75381eb6026946UL, - /* 57 */ 0xc80a3be4c19420acUL, 0x66b1f6c681f2b6dcUL, - 0x7cf7036761e93388UL, 0x25abbbd8a660a4c4UL, - /* 58 */ 0x91ea12ba14fd5198UL, 0x684950fc4a3cffa9UL, - 0xf826842130f5ad28UL, 0x3ea988f75301a441UL, - /* 59 */ 0xc978109a695f8c6fUL, 0x1746eb4a0530c3f3UL, - 0x444d6d77b4459995UL, 0x75952b8c054e5cc7UL, - /* 60 */ 0xa3703f7915f4d6aaUL, 0x66c346202f2647d8UL, - 0xd01469df811d644bUL, 0x77fea47d81a5d71fUL, - /* 61 */ 0xc5e9529ef57ca381UL, 0x6eeeb4b9ce2f881aUL, - 0xb6e91a28e8009bd6UL, 0x4b80be3e9afc3fecUL, - /* 62 */ 0x7e3773c526aed2c5UL, 0x1b4afcb453c9a49dUL, - 0xa920bdd7baffb24dUL, 0x7c54699f122d400eUL, - /* 63 */ 0xef46c8e14fa94bc8UL, 0xe0b074ce2952ed5eUL, - 0xbea450e1dbd885d5UL, 0x61b68649320f712cUL, - /* 64 */ 0x8a485f7309ccbdd1UL, 0xbd06320d7d4d1a2dUL, - 0x25232973322dbef4UL, 0x445dc4758c17f770UL, - /* 65 */ 0xdb0434177cc8933cUL, 0xed6fe82175ea059fUL, - 0x1efebefdc053db34UL, 0x4adbe867c65daf99UL, - /* 66 */ 0x3acd71a2a90609dfUL, 0xe5e991856dd04050UL, - 0x1ec69b688157c23cUL, 0x697427f6885cfe4dUL, - /* 67 */ 0xd7be7b9b65e1a851UL, 0xa03d28d522c536ddUL, - 0x28399d658fd2b645UL, 0x49e5b7e17c2641e1UL, - /* 68 */ 0x6f8c3a98700457a4UL, 0x5078f0a25ebb6778UL, - 0xd13c3ccbc382960fUL, 0x2e003258a7df84b1UL, - /* 69 */ 0x8ad1f39be6296a1cUL, 0xc1eeaa652a5fbfb2UL, - 0x33ee0673fd26f3cbUL, 0x59256173a69d2cccUL, - /* 70 */ 0x41ea07aa4e18fc41UL, 0xd9fc19527c87a51eUL, - 0xbdaacb805831ca6fUL, 0x445b652dc916694fUL, - /* 71 */ 0xce92a3a7f2172315UL, 0x1edc282de11b9964UL, - 0xa1823aafe04c314aUL, 0x790a2d94437cf586UL, - /* 72 */ 0x71c447fb93f6e009UL, 0x8922a56722845276UL, - 0xbf70903b204f5169UL, 0x2f7a89891ba319feUL, - /* 73 */ 0x02a08eb577e2140cUL, 0xed9a4ed4427bdcf4UL, - 0x5253ec44e4323cd1UL, 0x3e88363c14e9355bUL, - /* 74 */ 0xaa66c14277110b8cUL, 0x1ae0391610a23390UL, - 0x2030bd12c93fc2a2UL, 0x3ee141579555c7abUL, - /* 75 */ 0x9214de3a6d6e7d41UL, 0x3ccdd88607f17efeUL, - 0x674f1288f8e11217UL, 0x5682250f329f93d0UL, - /* 76 */ 0x6cf00b136d2e396eUL, 0x6e4cf86f1014debfUL, - 0x5930b1b5bfcc4e83UL, 0x047069b48aba16b6UL, - /* 77 */ 0x0d4ce4ab69b20793UL, 0xb24db91a97d0fb9eUL, - 0xcdfa50f54e00d01dUL, 0x221b1085368bddb5UL, - /* 78 */ 0xe7e59468b1e3d8d2UL, 0x53c56563bd122f93UL, - 0xeee8a903e0663f09UL, 0x61efa662cbbe3d42UL, - /* 79 */ 0x2cf8ddddde6eab2aUL, 0x9bf80ad51435f231UL, - 0x5deadacec9f04973UL, 0x29275b5d41d29b27UL, - /* 80 */ 0xcfde0f0895ebf14fUL, 0xb9aab96b054905a7UL, - 0xcae80dd9a1c420fdUL, 0x0a63bf2f1673bbc7UL, - /* 81 */ 0x092f6e11958fbc8cUL, 0x672a81e804822fadUL, - 0xcac8351560d52517UL, 0x6f3f7722c8f192f8UL, - /* 82 */ 0xf8ba90ccc2e894b7UL, 0x2c7557a438ff9f0dUL, - 0x894d1d855ae52359UL, 0x68e122157b743d69UL, - /* 83 */ 0xd87e5570cfb919f3UL, 0x3f2cdecd95798db9UL, - 0x2121154710c0a2ceUL, 0x3c66a115246dc5b2UL, - /* 84 */ 0xcbedc562294ecb72UL, 0xba7143c36a280b16UL, - 0x9610c2efd4078b67UL, 0x6144735d946a4b1eUL, - /* 85 */ 0x536f111ed75b3350UL, 0x0211db8c2041d81bUL, - 0xf93cb1000e10413cUL, 0x149dfd3c039e8876UL, - /* 86 */ 0xd479dde46b63155bUL, 0xb66e15e93c837976UL, - 0xdafde43b1f13e038UL, 0x5fafda1a2e4b0b35UL, - /* 87 */ 0x3600bbdf17197581UL, 0x3972050bbe3cd2c2UL, - 0x5938906dbdd5be86UL, 0x34fce5e43f9b860fUL, - /* 88 */ 0x75a8a4cd42d14d02UL, 0x828dabc53441df65UL, - 0x33dcabedd2e131d3UL, 0x3ebad76fb814d25fUL, - /* 89 */ 0xd4906f566f70e10fUL, 0x5d12f7aa51690f5aUL, - 0x45adb16e76cefcf2UL, 0x01f768aead232999UL, - /* 90 */ 0x2b6cc77b6248febdUL, 0x3cd30628ec3aaffdUL, - 0xce1c0b80d4ef486aUL, 0x4c3bff2ea6f66c23UL, - /* 91 */ 0x3f2ec4094aeaeb5fUL, 0x61b19b286e372ca7UL, - 0x5eefa966de2a701dUL, 0x23b20565de55e3efUL, - /* 92 */ 0xe301ca5279d58557UL, 0x07b2d4ce27c2874fUL, - 0xa532cd8a9dcf1d67UL, 0x2a52fee23f2bff56UL, - /* 93 */ 0x8624efb37cd8663dUL, 0xbbc7ac20ffbd7594UL, - 0x57b85e9c82d37445UL, 0x7b3052cb86a6ec66UL, - /* 94 */ 0x3482f0ad2525e91eUL, 0x2cb68043d28edca0UL, - 0xaf4f6d052e1b003aUL, 0x185f8c2529781b0aUL, - /* 95 */ 0xaa41de5bd80ce0d6UL, 0x9407b2416853e9d6UL, - 0x563ec36e357f4c3aUL, 0x4cc4b8dd0e297bceUL, - /* 96 */ 0xa2fc1a52ffb8730eUL, 0x1811f16e67058e37UL, - 0x10f9a366cddf4ee1UL, 0x72f4a0c4a0b9f099UL, - /* 97 */ 0x8c16c06f663f4ea7UL, 0x693b3af74e970fbaUL, - 0x2102e7f1d69ec345UL, 0x0ba53cbc968a8089UL, - /* 98 */ 0xca3d9dc7fea15537UL, 0x4c6824bb51536493UL, - 0xb9886314844006b1UL, 0x40d2a72ab454cc60UL, - /* 99 */ 0x5936a1b712570975UL, 0x91b9d648debda657UL, - 0x3344094bb64330eaUL, 0x006ba10d12ee51d0UL, - /* 100 */ 0x19228468f5de5d58UL, 0x0eb12f4c38cc05b0UL, - 0xa1039f9dd5601990UL, 0x4502d4ce4fff0e0bUL, - /* 101 */ 0xeb2054106837c189UL, 0xd0f6544c6dd3b93cUL, - 0x40727064c416d74fUL, 0x6e15c6114b502ef0UL, - /* 102 */ 0x4df2a398cfb1a76bUL, 0x11256c7419f2f6b1UL, - 0x4a497962066e6043UL, 0x705b3aab41355b44UL, - /* 103 */ 0x365ef536d797b1d8UL, 0x00076bd622ddf0dbUL, - 0x3bbf33b0e0575a88UL, 0x3777aa05c8e4ca4dUL, - /* 104 */ 0x392745c85578db5fUL, 0x6fda4149dbae5ae2UL, - 0xb1f0b00b8adc9867UL, 0x09963437d36f1da3UL, - /* 105 */ 0x7e824e90a5dc3853UL, 0xccb5f6641f135cbdUL, - 0x6736d86c87ce8fccUL, 0x625f3ce26604249fUL, - /* 106 */ 0xaf8ac8059502f63fUL, 0x0c05e70a2e351469UL, - 0x35292e9c764b6305UL, 0x1a394360c7e23ac3UL, - /* 107 */ 0xd5c6d53251183264UL, 0x62065abd43c2b74fUL, - 0xb5fbf5d03b973f9bUL, 0x13a3da3661206e5eUL, - /* 108 */ 0xc6bd5837725d94e5UL, 0x18e30912205016c5UL, - 0x2088ce1570033c68UL, 0x7fba1f495c837987UL, - /* 109 */ 0x5a8c7423f2f9079dUL, 0x1735157b34023fc5UL, - 0xe4f9b49ad2fab351UL, 0x6691ff72c878e33cUL, - /* 110 */ 0x122c2adedc5eff3eUL, 0xf8dd4bf1d8956cf4UL, - 0xeb86205d9e9e5bdaUL, 0x049b92b9d975c743UL, - /* 111 */ 0xa5379730b0f6c05aUL, 0x72a0ffacc6f3a553UL, - 0xb0032c34b20dcd6dUL, 0x470e9dbc88d5164aUL, - /* 112 */ 0xb19cf10ca237c047UL, 0xb65466711f6c81a2UL, - 0xb3321bd16dd80b43UL, 0x48c14f600c5fbe8eUL, - /* 113 */ 0x66451c264aa6c803UL, 0xb66e3904a4fa7da6UL, - 0xd45f19b0b3128395UL, 0x31602627c3c9bc10UL, - /* 114 */ 0x3120dc4832e4e10dUL, 0xeb20c46756c717f7UL, - 0x00f52e3f67280294UL, 0x566d4fc14730c509UL, - /* 115 */ 0x7e3a5d40fd837206UL, 0xc1e926dc7159547aUL, - 0x216730fba68d6095UL, 0x22e8c3843f69cea7UL, - /* 116 */ 0x33d074e8930e4b2bUL, 0xb6e4350e84d15816UL, - 0x5534c26ad6ba2365UL, 0x7773c12f89f1f3f3UL, - /* 117 */ 0x8cba404da57962aaUL, 0x5b9897a81999ce56UL, - 0x508e862f121692fcUL, 0x3a81907fa093c291UL, - /* 118 */ 0x0dded0ff4725a510UL, 0x10d8cc10673fc503UL, - 0x5b9d151c9f1f4e89UL, 0x32a5c1d5cb09a44cUL, - /* 119 */ 0x1e0aa442b90541fbUL, 0x5f85eb7cc1b485dbUL, - 0xbee595ce8a9df2e5UL, 0x25e496c722422236UL, - /* 120 */ 0x5edf3c46cd0fe5b9UL, 0x34e75a7ed2a43388UL, - 0xe488de11d761e352UL, 0x0e878a01a085545cUL, - /* 121 */ 0xba493c77e021bb04UL, 0x2b4d1843c7df899aUL, - 0x9ea37a487ae80d67UL, 0x67a9958011e41794UL, - /* 122 */ 0x4b58051a6697b065UL, 0x47e33f7d8d6ba6d4UL, - 0xbb4da8d483ca46c1UL, 0x68becaa181c2db0dUL, - /* 123 */ 0x8d8980e90b989aa5UL, 0xf95eb14a2c93c99bUL, - 0x51c6c7c4796e73a2UL, 0x6e228363b5efb569UL, - /* 124 */ 0xc6bbc0b02dd624c8UL, 0x777eb47dec8170eeUL, - 0x3cde15a004cfafa9UL, 0x1dc6bc087160bf9bUL, - /* 125 */ 0x2e07e043eec34002UL, 0x18e9fc677a68dc7fUL, - 0xd8da03188bd15b9aUL, 0x48fbc3bb00568253UL, - /* 126 */ 0x57547d4cfb654ce1UL, 0xd3565b82a058e2adUL, - 0xf63eaf0bbf154478UL, 0x47531ef114dfbb18UL, - /* 127 */ 0xe1ec630a4278c587UL, 0x5507d546ca8e83f3UL, - 0x85e135c63adc0c2bUL, 0x0aa7efa85682844eUL, - /* 128 */ 0x72691ba8b3e1f615UL, 0x32b4e9701fbe3ffaUL, - 0x97b6d92e39bb7868UL, 0x2cfe53dea02e39e8UL, - /* 129 */ 0x687392cd85cd52b0UL, 0x27ff66c910e29831UL, - 0x97134556a9832d06UL, 0x269bb0360a84f8a0UL, - /* 130 */ 0x706e55457643f85cUL, 0x3734a48c9b597d1bUL, - 0x7aee91e8c6efa472UL, 0x5cd6abc198a9d9e0UL, - /* 131 */ 0x0e04de06cb3ce41aUL, 0xd8c6eb893402e138UL, - 0x904659bb686e3772UL, 0x7215c371746ba8c8UL, - /* 132 */ 0xfd12a97eeae4a2d9UL, 0x9514b7516394f2c5UL, - 0x266fd5809208f294UL, 0x5c847085619a26b9UL, - /* 133 */ 0x52985410fed694eaUL, 0x3c905b934a2ed254UL, - 0x10bb47692d3be467UL, 0x063b3d2d69e5e9e1UL, - /* 134 */ 0x472726eedda57debUL, 0xefb6c4ae10f41891UL, - 0x2b1641917b307614UL, 0x117c554fc4f45b7cUL, - /* 135 */ 0xc07cf3118f9d8812UL, 0x01dbd82050017939UL, - 0xd7e803f4171b2827UL, 0x1015e87487d225eaUL, - /* 136 */ 0xc58de3fed23acc4dUL, 0x50db91c294a7be2dUL, - 0x0b94d43d1c9cf457UL, 0x6b1640fa6e37524aUL, - /* 137 */ 0x692f346c5fda0d09UL, 0x200b1c59fa4d3151UL, - 0xb8c46f760777a296UL, 0x4b38395f3ffdfbcfUL, - /* 138 */ 0x18d25e00be54d671UL, 0x60d50582bec8aba6UL, - 0x87ad8f263b78b982UL, 0x50fdf64e9cda0432UL, - /* 139 */ 0x90f567aac578dcf0UL, 0xef1e9b0ef2a3133bUL, - 0x0eebba9242d9de71UL, 0x15473c9bf03101c7UL, - /* 140 */ 0x7c77e8ae56b78095UL, 0xb678e7666e6f078eUL, - 0x2da0b9615348ba1fUL, 0x7cf931c1ff733f0bUL, - /* 141 */ 0x26b357f50a0a366cUL, 0xe9708cf42b87d732UL, - 0xc13aeea5f91cb2c0UL, 0x35d90c991143bb4cUL, - /* 142 */ 0x47c1c404a9a0d9dcUL, 0x659e58451972d251UL, - 0x3875a8c473b38c31UL, 0x1fbd9ed379561f24UL, - /* 143 */ 0x11fabc6fd41ec28dUL, 0x7ef8dfe3cd2a2dcaUL, - 0x72e73b5d8c404595UL, 0x6135fa4954b72f27UL, - /* 144 */ 0xccfc32a2de24b69cUL, 0x3f55698c1f095d88UL, - 0xbe3350ed5ac3f929UL, 0x5e9bf806ca477eebUL, - /* 145 */ 0xe9ce8fb63c309f68UL, 0x5376f63565e1f9f4UL, - 0xd1afcfb35a6393f1UL, 0x6632a1ede5623506UL, - /* 146 */ 0x0b7d6c390c2ded4cUL, 0x56cb3281df04cb1fUL, - 0x66305a1249ecc3c7UL, 0x5d588b60a38ca72aUL, - /* 147 */ 0xa6ecbf78e8e5f42dUL, 0x86eeb44b3c8a3eecUL, - 0xec219c48fbd21604UL, 0x1aaf1af517c36731UL, - /* 148 */ 0xc306a2836769bde7UL, 0x208280622b1e2adbUL, - 0x8027f51ffbff94a6UL, 0x76cfa1ce1124f26bUL, - /* 149 */ 0x18eb00562422abb6UL, 0xf377c4d58f8c29c3UL, - 0x4dbbc207f531561aUL, 0x0253b7f082128a27UL, - /* 150 */ 0x3d1f091cb62c17e0UL, 0x4860e1abd64628a9UL, - 0x52d17436309d4253UL, 0x356f97e13efae576UL, - /* 151 */ 0xd351e11aa150535bUL, 0x3e6b45bb1dd878ccUL, - 0x0c776128bed92c98UL, 0x1d34ae93032885b8UL, - /* 152 */ 0x4ba0488ca85ba4c3UL, 0x985348c33c9ce6ceUL, - 0x66124c6f97bda770UL, 0x0f81a0290654124aUL, - /* 153 */ 0x9ed09ca6569b86fdUL, 0x811009fd18af9a2dUL, - 0xff08d03f93d8c20aUL, 0x52a148199faef26bUL, - /* 154 */ 0x3e03f9dc2d8d1b73UL, 0x4205801873961a70UL, - 0xc0d987f041a35970UL, 0x07aa1f15a1c0d549UL, - /* 155 */ 0xdfd46ce08cd27224UL, 0x6d0a024f934e4239UL, - 0x808a7a6399897b59UL, 0x0a4556e9e13d95a2UL, - /* 156 */ 0xd21a991fe9c13045UL, 0x9b0e8548fe7751b8UL, - 0x5da643cb4bf30035UL, 0x77db28d63940f721UL, - /* 157 */ 0xfc5eeb614adc9011UL, 0x5229419ae8c411ebUL, - 0x9ec3e7787d1dcf74UL, 0x340d053e216e4cb5UL, - /* 158 */ 0xcac7af39b48df2b4UL, 0xc0faec2871a10a94UL, - 0x140a69245ca575edUL, 0x0cf1c37134273a4cUL, - /* 159 */ 0xc8ee306ac224b8a5UL, 0x57eaee7ccb4930b0UL, - 0xa1e806bdaacbe74fUL, 0x7d9a62742eeb657dUL, - /* 160 */ 0x9eb6b6ef546c4830UL, 0x885cca1fddb36e2eUL, - 0xe6b9f383ef0d7105UL, 0x58654fef9d2e0412UL, - /* 161 */ 0xa905c4ffbe0e8e26UL, 0x942de5df9b31816eUL, - 0x497d723f802e88e1UL, 0x30684dea602f408dUL, - /* 162 */ 0x21e5a278a3e6cb34UL, 0xaefb6e6f5b151dc4UL, - 0xb30b8e049d77ca15UL, 0x28c3c9cf53b98981UL, - /* 163 */ 0x287fb721556cdd2aUL, 0x0d317ca897022274UL, - 0x7468c7423a543258UL, 0x4a7f11464eb5642fUL, - /* 164 */ 0xa237a4774d193aa6UL, 0xd865986ea92129a1UL, - 0x24c515ecf87c1a88UL, 0x604003575f39f5ebUL, - /* 165 */ 0x47b9f189570a9b27UL, 0x2b98cede465e4b78UL, - 0x026df551dbb85c20UL, 0x74fcd91047e21901UL, - /* 166 */ 0x13e2a90a23c1bfa3UL, 0x0cb0074e478519f6UL, - 0x5ff1cbbe3af6cf44UL, 0x67fe5438be812dbeUL, - /* 167 */ 0xd13cf64fa40f05b0UL, 0x054dfb2f32283787UL, - 0x4173915b7f0d2aeaUL, 0x482f144f1f610d4eUL, - /* 168 */ 0xf6210201b47f8234UL, 0x5d0ae1929e70b990UL, - 0xdcd7f455b049567cUL, 0x7e93d0f1f0916f01UL, - /* 169 */ 0xdd79cbf18a7db4faUL, 0xbe8391bf6f74c62fUL, - 0x027145d14b8291bdUL, 0x585a73ea2cbf1705UL, - /* 170 */ 0x485ca03e928a0db2UL, 0x10fc01a5742857e7UL, - 0x2f482edbd6d551a7UL, 0x0f0433b5048fdb8aUL, - /* 171 */ 0x60da2e8dd7dc6247UL, 0x88b4c9d38cd4819aUL, - 0x13033ac001f66697UL, 0x273b24fe3b367d75UL, - /* 172 */ 0xc6e8f66a31b3b9d4UL, 0x281514a494df49d5UL, - 0xd1726fdfc8b23da7UL, 0x4b3ae7d103dee548UL, - /* 173 */ 0xc6256e19ce4b9d7eUL, 0xff5c5cf186e3c61cUL, - 0xacc63ca34b8ec145UL, 0x74621888fee66574UL, - /* 174 */ 0x956f409645290a1eUL, 0xef0bf8e3263a962eUL, - 0xed6a50eb5ec2647bUL, 0x0694283a9dca7502UL, - /* 175 */ 0x769b963643a2dcd1UL, 0x42b7c8ea09fc5353UL, - 0x4f002aee13397eabUL, 0x63005e2c19b7d63aUL, - /* 176 */ 0xca6736da63023beaUL, 0x966c7f6db12a99b7UL, - 0xace09390c537c5e1UL, 0x0b696063a1aa89eeUL, - /* 177 */ 0xebb03e97288c56e5UL, 0x432a9f9f938c8be8UL, - 0xa6a5a93d5b717f71UL, 0x1a5fb4c3e18f9d97UL, - /* 178 */ 0x1c94e7ad1c60cdceUL, 0xee202a43fc02c4a0UL, - 0x8dafe4d867c46a20UL, 0x0a10263c8ac27b58UL, - /* 179 */ 0xd0dea9dfe4432a4aUL, 0x856af87bbe9277c5UL, - 0xce8472acc212c71aUL, 0x6f151b6d9bbb1e91UL, - /* 180 */ 0x26776c527ceed56aUL, 0x7d211cb7fbf8faecUL, - 0x37ae66a6fd4609ccUL, 0x1f81b702d2770c42UL, - /* 181 */ 0x2fb0b057eac58392UL, 0xe1dd89fe29744e9dUL, - 0xc964f8eb17beb4f8UL, 0x29571073c9a2d41eUL, - /* 182 */ 0xa948a18981c0e254UL, 0x2df6369b65b22830UL, - 0xa33eb2d75fcfd3c6UL, 0x078cd6ec4199a01fUL, - /* 183 */ 0x4a584a41ad900d2fUL, 0x32142b78e2c74c52UL, - 0x68c4e8338431c978UL, 0x7f69ea9008689fc2UL, - /* 184 */ 0x52f2c81e46a38265UL, 0xfd78072d04a832fdUL, - 0x8cd7d5fa25359e94UL, 0x4de71b7454cc29d2UL, - /* 185 */ 0x42eb60ad1eda6ac9UL, 0x0aad37dfdbc09c3aUL, - 0x81004b71e33cc191UL, 0x44e6be345122803cUL, - /* 186 */ 0x03fe8388ba1920dbUL, 0xf5d57c32150db008UL, - 0x49c8c4281af60c29UL, 0x21edb518de701aeeUL, - /* 187 */ 0x7fb63e418f06dc99UL, 0xa4460d99c166d7b8UL, - 0x24dd5248ce520a83UL, 0x5ec3ad712b928358UL, - /* 188 */ 0x15022a5fbd17930fUL, 0xa4f64a77d82570e3UL, - 0x12bc8d6915783712UL, 0x498194c0fc620abbUL, - /* 189 */ 0x38a2d9d255686c82UL, 0x785c6bd9193e21f0UL, - 0xe4d5c81ab24a5484UL, 0x56307860b2e20989UL, - /* 190 */ 0x429d55f78b4d74c4UL, 0x22f1834643350131UL, - 0x1e60c24598c71fffUL, 0x59f2f014979983efUL, - /* 191 */ 0x46a47d56eb494a44UL, 0x3e22a854d636a18eUL, - 0xb346e15274491c3bUL, 0x2ceafd4e5390cde7UL, - /* 192 */ 0xba8a8538be0d6675UL, 0x4b9074bb50818e23UL, - 0xcbdab89085d304c3UL, 0x61a24fe0e56192c4UL, - /* 193 */ 0xcb7615e6db525bcbUL, 0xdd7d8c35a567e4caUL, - 0xe6b4153acafcdd69UL, 0x2d668e097f3c9766UL, - /* 194 */ 0xa57e7e265ce55ef0UL, 0x5d9f4e527cd4b967UL, - 0xfbc83606492fd1e5UL, 0x090d52beb7c3f7aeUL, - /* 195 */ 0x09b9515a1e7b4d7cUL, 0x1f266a2599da44c0UL, - 0xa1c49548e2c55504UL, 0x7ef04287126f15ccUL, - /* 196 */ 0xfed1659dbd30ef15UL, 0x8b4ab9eec4e0277bUL, - 0x884d6236a5df3291UL, 0x1fd96ea6bf5cf788UL, - /* 197 */ 0x42a161981f190d9aUL, 0x61d849507e6052c1UL, - 0x9fe113bf285a2cd5UL, 0x7c22d676dbad85d8UL, - /* 198 */ 0x82e770ed2bfbd27dUL, 0x4c05b2ece996f5a5UL, - 0xcd40a9c2b0900150UL, 0x5895319213d9bf64UL, - /* 199 */ 0xe7cc5d703fea2e08UL, 0xb50c491258e2188cUL, - 0xcce30baa48205bf0UL, 0x537c659ccfa32d62UL, - /* 200 */ 0x37b6623a98cfc088UL, 0xfe9bed1fa4d6aca4UL, - 0x04d29b8e56a8d1b0UL, 0x725f71c40b519575UL, - /* 201 */ 0x28c7f89cd0339ce6UL, 0x8367b14469ddc18bUL, - 0x883ada83a6a1652cUL, 0x585f1974034d6c17UL, - /* 202 */ 0x89cfb266f1b19188UL, 0xe63b4863e7c35217UL, - 0xd88c9da6b4c0526aUL, 0x3e035c9df0954635UL, - /* 203 */ 0xdd9d5412fb45de9dUL, 0xdd684532e4cff40dUL, - 0x4b5c999b151d671cUL, 0x2d8c2cc811e7f690UL, - /* 204 */ 0x7f54be1d90055d40UL, 0xa464c5df464aaf40UL, - 0x33979624f0e917beUL, 0x2c018dc527356b30UL, - /* 205 */ 0xa5415024e330b3d4UL, 0x73ff3d96691652d3UL, - 0x94ec42c4ef9b59f1UL, 0x0747201618d08e5aUL, - /* 206 */ 0x4d6ca48aca411c53UL, 0x66415f2fcfa66119UL, - 0x9c4dd40051e227ffUL, 0x59810bc09a02f7ebUL, - /* 207 */ 0x2a7eb171b3dc101dUL, 0x441c5ab99ffef68eUL, - 0x32025c9b93b359eaUL, 0x5e8ce0a71e9d112fUL, - /* 208 */ 0xbfcccb92429503fdUL, 0xd271ba752f095d55UL, - 0x345ead5e972d091eUL, 0x18c8df11a83103baUL, - /* 209 */ 0x90cd949a9aed0f4cUL, 0xc5d1f4cb6660e37eUL, - 0xb8cac52d56c52e0bUL, 0x6e42e400c5808e0dUL, - /* 210 */ 0xa3b46966eeaefd23UL, 0x0c4f1f0be39ecdcaUL, - 0x189dc8c9d683a51dUL, 0x51f27f054c09351bUL, - /* 211 */ 0x4c487ccd2a320682UL, 0x587ea95bb3df1c96UL, - 0xc8ccf79e555cb8e8UL, 0x547dc829a206d73dUL, - /* 212 */ 0xb822a6cd80c39b06UL, 0xe96d54732000d4c6UL, - 0x28535b6f91463b4dUL, 0x228f4660e2486e1dUL, - /* 213 */ 0x98799538de8d3abfUL, 0x8cd8330045ebca6eUL, - 0x79952a008221e738UL, 0x4322e1a7535cd2bbUL, - /* 214 */ 0xb114c11819d1801cUL, 0x2016e4d84f3f5ec7UL, - 0xdd0e2df409260f4cUL, 0x5ec362c0ae5f7266UL, - /* 215 */ 0xc0462b18b8b2b4eeUL, 0x7cc8d950274d1afbUL, - 0xf25f7105436b02d2UL, 0x43bbf8dcbff9ccd3UL, - /* 216 */ 0xb6ad1767a039e9dfUL, 0xb0714da8f69d3583UL, - 0x5e55fa18b42931f5UL, 0x4ed5558f33c60961UL, - /* 217 */ 0x1fe37901c647a5ddUL, 0x593ddf1f8081d357UL, - 0x0249a4fd813fd7a6UL, 0x69acca274e9caf61UL, - /* 218 */ 0x047ba3ea330721c9UL, 0x83423fc20e7e1ea0UL, - 0x1df4c0af01314a60UL, 0x09a62dab89289527UL, - /* 219 */ 0xa5b325a49cc6cb00UL, 0xe94b5dc654b56cb6UL, - 0x3be28779adc994a0UL, 0x4296e8f8ba3a4aadUL, - /* 220 */ 0x328689761e451eabUL, 0x2e4d598bff59594aUL, - 0x49b96853d7a7084aUL, 0x4980a319601420a8UL, - /* 221 */ 0x9565b9e12f552c42UL, 0x8a5318db7100fe96UL, - 0x05c90b4d43add0d7UL, 0x538b4cd66a5d4edaUL, - /* 222 */ 0xf4e94fc3e89f039fUL, 0x592c9af26f618045UL, - 0x08a36eb5fd4b9550UL, 0x25fffaf6c2ed1419UL, - /* 223 */ 0x34434459cc79d354UL, 0xeeecbfb4b1d5476bUL, - 0xddeb34a061615d99UL, 0x5129cecceb64b773UL, - /* 224 */ 0xee43215894993520UL, 0x772f9c7cf14c0b3bUL, - 0xd2e2fce306bedad5UL, 0x715f42b546f06a97UL, - /* 225 */ 0x434ecdceda5b5f1aUL, 0x0da17115a49741a9UL, - 0x680bd77c73edad2eUL, 0x487c02354edd9041UL, - /* 226 */ 0xb8efeff3a70ed9c4UL, 0x56a32aa3e857e302UL, - 0xdf3a68bd48a2a5a0UL, 0x07f650b73176c444UL, - /* 227 */ 0xe38b9b1626e0ccb1UL, 0x79e053c18b09fb36UL, - 0x56d90319c9f94964UL, 0x1ca941e7ac9ff5c4UL, - /* 228 */ 0x49c4df29162fa0bbUL, 0x8488cf3282b33305UL, - 0x95dfda14cabb437dUL, 0x3391f78264d5ad86UL, - /* 229 */ 0x729ae06ae2b5095dUL, 0xd58a58d73259a946UL, - 0xe9834262d13921edUL, 0x27fedafaa54bb592UL, - /* 230 */ 0xa99dc5b829ad48bbUL, 0x5f025742499ee260UL, - 0x802c8ecd5d7513fdUL, 0x78ceb3ef3f6dd938UL, - /* 231 */ 0xc342f44f8a135d94UL, 0x7b9edb44828cdda3UL, - 0x9436d11a0537cfe7UL, 0x5064b164ec1ab4c8UL, - /* 232 */ 0x7020eccfd37eb2fcUL, 0x1f31ea3ed90d25fcUL, - 0x1b930d7bdfa1bb34UL, 0x5344467a48113044UL, - /* 233 */ 0x70073170f25e6dfbUL, 0xe385dc1a50114cc8UL, - 0x2348698ac8fc4f00UL, 0x2a77a55284dd40d8UL, - /* 234 */ 0xfe06afe0c98c6ce4UL, 0xc235df96dddfd6e4UL, - 0x1428d01e33bf1ed3UL, 0x785768ec9300bdafUL, - /* 235 */ 0x9702e57a91deb63bUL, 0x61bdb8bfe5ce8b80UL, - 0x645b426f3d1d58acUL, 0x4804a82227a557bcUL, - /* 236 */ 0x8e57048ab44d2601UL, 0x68d6501a4b3a6935UL, - 0xc39c9ec3f9e1c293UL, 0x4172f257d4de63e2UL, - /* 237 */ 0xd368b450330c6401UL, 0x040d3017418f2391UL, - 0x2c34bb6090b7d90dUL, 0x16f649228fdfd51fUL, - /* 238 */ 0xbea6818e2b928ef5UL, 0xe28ccf91cdc11e72UL, - 0x594aaa68e77a36cdUL, 0x313034806c7ffd0fUL, - /* 239 */ 0x8a9d27ac2249bd65UL, 0x19a3b464018e9512UL, - 0xc26ccff352b37ec7UL, 0x056f68341d797b21UL, - /* 240 */ 0x5e79d6757efd2327UL, 0xfabdbcb6553afe15UL, - 0xd3e7222c6eaf5a60UL, 0x7046c76d4dae743bUL, - /* 241 */ 0x660be872b18d4a55UL, 0x19992518574e1496UL, - 0xc103053a302bdcbbUL, 0x3ed8e9800b218e8eUL, - /* 242 */ 0x7b0b9239fa75e03eUL, 0xefe9fb684633c083UL, - 0x98a35fbe391a7793UL, 0x6065510fe2d0fe34UL, - /* 243 */ 0x55cb668548abad0cUL, 0xb4584548da87e527UL, - 0x2c43ecea0107c1ddUL, 0x526028809372de35UL, - /* 244 */ 0x3415c56af9213b1fUL, 0x5bee1a4d017e98dbUL, - 0x13f6b105b5cf709bUL, 0x5ff20e3482b29ab6UL, - /* 245 */ 0x0aa29c75cc2e6c90UL, 0xfc7d73ca3a70e206UL, - 0x899fc38fc4b5c515UL, 0x250386b124ffc207UL, - /* 246 */ 0x54ea28d5ae3d2b56UL, 0x9913149dd6de60ceUL, - 0x16694fc58f06d6c1UL, 0x46b23975eb018fc7UL, - /* 247 */ 0x470a6a0fb4b7b4e2UL, 0x5d92475a8f7253deUL, - 0xabeee5b52fbd3adbUL, 0x7fa20801a0806968UL, - /* 248 */ 0x76f3faf19f7714d2UL, 0xb3e840c12f4660c3UL, - 0x0fb4cd8df212744eUL, 0x4b065a251d3a2dd2UL, - /* 249 */ 0x5cebde383d77cd4aUL, 0x6adf39df882c9cb1UL, - 0xa2dd242eb09af759UL, 0x3147c0e50e5f6422UL, - /* 250 */ 0x164ca5101d1350dbUL, 0xf8d13479c33fc962UL, - 0xe640ce4d13e5da08UL, 0x4bdee0c45061f8baUL, - /* 251 */ 0xd7c46dc1a4edb1c9UL, 0x5514d7b6437fd98aUL, - 0x58942f6bb2a1c00bUL, 0x2dffb2ab1d70710eUL, - /* 252 */ 0xccdfcf2fc18b6d68UL, 0xa8ebcba8b7806167UL, - 0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL -}; - -/* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7] - * a is two 256-bit integers: a0[0:3] and a1[4:7] - * b is two 256-bit integers: b0[0:3] and b1[4:7] - */ -static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a, - const u64 *const b) -{ - asm volatile( - "xorl %%r14d, %%r14d ;" - "movq (%1), %%rdx; " /* A[0] */ - "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ - "xorl %%r10d, %%r10d ;" - "movq %%r8, (%0) ;" - "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ - "adox %%r10, %%r15 ;" - "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ - "adox %%r8, %%rax ;" - "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ - "adox %%r10, %%rbx ;" - /******************************************/ - "adox %%r14, %%rcx ;" - - "movq 8(%1), %%rdx; " /* A[1] */ - "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ - "adox %%r15, %%r8 ;" - "movq %%r8, 8(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ - "adox %%r10, %%r9 ;" - "adcx %%r9, %%rax ;" - "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ - "adox %%r8, %%r11 ;" - "adcx %%r11, %%rbx ;" - "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ - "adox %%r10, %%r13 ;" - "adcx %%r13, %%rcx ;" - /******************************************/ - "adox %%r14, %%r15 ;" - "adcx %%r14, %%r15 ;" - - "movq 16(%1), %%rdx; " /* A[2] */ - "xorl %%r10d, %%r10d ;" - "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ - "adox %%rax, %%r8 ;" - "movq %%r8, 16(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ - "adox %%r10, %%r9 ;" - "adcx %%r9, %%rbx ;" - "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ - "adox %%r8, %%r11 ;" - "adcx %%r11, %%rcx ;" - "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ - "adox %%r10, %%r13 ;" - "adcx %%r13, %%r15 ;" - /******************************************/ - "adox %%r14, %%rax ;" - "adcx %%r14, %%rax ;" - - "movq 24(%1), %%rdx; " /* A[3] */ - "xorl %%r10d, %%r10d ;" - "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ - "adox %%rbx, %%r8 ;" - "movq %%r8, 24(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ - "adox %%r10, %%r9 ;" - "adcx %%r9, %%rcx ;" - "movq %%rcx, 32(%0) ;" - "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ - "adox %%r8, %%r11 ;" - "adcx %%r11, %%r15 ;" - "movq %%r15, 40(%0) ;" - "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ - "adox %%r10, %%r13 ;" - "adcx %%r13, %%rax ;" - "movq %%rax, 48(%0) ;" - /******************************************/ - "adox %%r14, %%rbx ;" - "adcx %%r14, %%rbx ;" - "movq %%rbx, 56(%0) ;" - - "movq 32(%1), %%rdx; " /* C[0] */ - "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */ - "xorl %%r10d, %%r10d ;" - "movq %%r8, 64(%0);" - "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */ - "adox %%r10, %%r15 ;" - "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */ - "adox %%r8, %%rax ;" - "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */ - "adox %%r10, %%rbx ;" - /******************************************/ - "adox %%r14, %%rcx ;" - - "movq 40(%1), %%rdx; " /* C[1] */ - "xorl %%r10d, %%r10d ;" - "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */ - "adox %%r15, %%r8 ;" - "movq %%r8, 72(%0);" - "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */ - "adox %%r10, %%r9 ;" - "adcx %%r9, %%rax ;" - "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */ - "adox %%r8, %%r11 ;" - "adcx %%r11, %%rbx ;" - "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */ - "adox %%r10, %%r13 ;" - "adcx %%r13, %%rcx ;" - /******************************************/ - "adox %%r14, %%r15 ;" - "adcx %%r14, %%r15 ;" - - "movq 48(%1), %%rdx; " /* C[2] */ - "xorl %%r10d, %%r10d ;" - "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */ - "adox %%rax, %%r8 ;" - "movq %%r8, 80(%0);" - "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */ - "adox %%r10, %%r9 ;" - "adcx %%r9, %%rbx ;" - "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */ - "adox %%r8, %%r11 ;" - "adcx %%r11, %%rcx ;" - "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */ - "adox %%r10, %%r13 ;" - "adcx %%r13, %%r15 ;" - /******************************************/ - "adox %%r14, %%rax ;" - "adcx %%r14, %%rax ;" - - "movq 56(%1), %%rdx; " /* C[3] */ - "xorl %%r10d, %%r10d ;" - "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */ - "adox %%rbx, %%r8 ;" - "movq %%r8, 88(%0);" - "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */ - "adox %%r10, %%r9 ;" - "adcx %%r9, %%rcx ;" - "movq %%rcx, 96(%0) ;" - "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */ - "adox %%r8, %%r11 ;" - "adcx %%r11, %%r15 ;" - "movq %%r15, 104(%0) ;" - "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */ - "adox %%r10, %%r13 ;" - "adcx %%r13, %%rax ;" - "movq %%rax, 112(%0) ;" - /******************************************/ - "adox %%r14, %%rbx ;" - "adcx %%r14, %%rbx ;" - "movq %%rbx, 120(%0) ;" - : - : "r"(c), "r"(a), "r"(b) - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", - "%r10", "%r11", "%r13", "%r14", "%r15"); -} - -static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a, - const u64 *const b) +static __always_inline u64 eq_mask(u64 a, u64 b) { - asm volatile( - "movq (%1), %%rdx; " /* A[0] */ - "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ - "movq %%r8, (%0) ;" - "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ - "addq %%r10, %%r15 ;" - "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ - "adcq %%r8, %%rax ;" - "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ - "adcq %%r10, %%rbx ;" - /******************************************/ - "adcq $0, %%rcx ;" - - "movq 8(%1), %%rdx; " /* A[1] */ - "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ - "addq %%r15, %%r8 ;" - "movq %%r8, 8(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ - "adcq %%r10, %%r9 ;" - "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ - "adcq %%r8, %%r11 ;" - "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%r15 ;" - - "addq %%r9, %%rax ;" - "adcq %%r11, %%rbx ;" - "adcq %%r13, %%rcx ;" - "adcq $0, %%r15 ;" - - "movq 16(%1), %%rdx; " /* A[2] */ - "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ - "addq %%rax, %%r8 ;" - "movq %%r8, 16(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ - "adcq %%r10, %%r9 ;" - "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ - "adcq %%r8, %%r11 ;" - "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%rax ;" - - "addq %%r9, %%rbx ;" - "adcq %%r11, %%rcx ;" - "adcq %%r13, %%r15 ;" - "adcq $0, %%rax ;" - - "movq 24(%1), %%rdx; " /* A[3] */ - "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ - "addq %%rbx, %%r8 ;" - "movq %%r8, 24(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ - "adcq %%r10, %%r9 ;" - "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ - "adcq %%r8, %%r11 ;" - "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%rbx ;" - - "addq %%r9, %%rcx ;" - "movq %%rcx, 32(%0) ;" - "adcq %%r11, %%r15 ;" - "movq %%r15, 40(%0) ;" - "adcq %%r13, %%rax ;" - "movq %%rax, 48(%0) ;" - "adcq $0, %%rbx ;" - "movq %%rbx, 56(%0) ;" - - "movq 32(%1), %%rdx; " /* C[0] */ - "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */ - "movq %%r8, 64(%0) ;" - "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */ - "addq %%r10, %%r15 ;" - "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */ - "adcq %%r8, %%rax ;" - "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */ - "adcq %%r10, %%rbx ;" - /******************************************/ - "adcq $0, %%rcx ;" - - "movq 40(%1), %%rdx; " /* C[1] */ - "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */ - "addq %%r15, %%r8 ;" - "movq %%r8, 72(%0) ;" - "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */ - "adcq %%r10, %%r9 ;" - "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */ - "adcq %%r8, %%r11 ;" - "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%r15 ;" - - "addq %%r9, %%rax ;" - "adcq %%r11, %%rbx ;" - "adcq %%r13, %%rcx ;" - "adcq $0, %%r15 ;" - - "movq 48(%1), %%rdx; " /* C[2] */ - "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */ - "addq %%rax, %%r8 ;" - "movq %%r8, 80(%0) ;" - "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */ - "adcq %%r10, %%r9 ;" - "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */ - "adcq %%r8, %%r11 ;" - "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%rax ;" - - "addq %%r9, %%rbx ;" - "adcq %%r11, %%rcx ;" - "adcq %%r13, %%r15 ;" - "adcq $0, %%rax ;" - - "movq 56(%1), %%rdx; " /* C[3] */ - "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */ - "addq %%rbx, %%r8 ;" - "movq %%r8, 88(%0) ;" - "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */ - "adcq %%r10, %%r9 ;" - "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */ - "adcq %%r8, %%r11 ;" - "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%rbx ;" - - "addq %%r9, %%rcx ;" - "movq %%rcx, 96(%0) ;" - "adcq %%r11, %%r15 ;" - "movq %%r15, 104(%0) ;" - "adcq %%r13, %%rax ;" - "movq %%rax, 112(%0) ;" - "adcq $0, %%rbx ;" - "movq %%rbx, 120(%0) ;" - : - : "r"(c), "r"(a), "r"(b) - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", - "%r10", "%r11", "%r13", "%r15"); + u64 x = a ^ b; + u64 minus_x = ~x + (u64)1U; + u64 x_or_minus_x = x | minus_x; + u64 xnx = x_or_minus_x >> (u32)63U; + return xnx - (u64)1U; } -static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a) +static __always_inline u64 gte_mask(u64 a, u64 b) { - asm volatile( - "movq (%1), %%rdx ;" /* A[0] */ - "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */ - "xorl %%r15d, %%r15d;" - "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */ - "adcx %%r14, %%r9 ;" - "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */ - "adcx %%rax, %%r10 ;" - "movq 24(%1), %%rdx ;" /* A[3] */ - "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */ - "adcx %%rcx, %%r11 ;" - "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */ - "adcx %%rax, %%rbx ;" - "movq 8(%1), %%rdx ;" /* A[1] */ - "adcx %%r15, %%r13 ;" - "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */ - "movq $0, %%r14 ;" - /******************************************/ - "adcx %%r15, %%r14 ;" - - "xorl %%r15d, %%r15d;" - "adox %%rax, %%r10 ;" - "adcx %%r8, %%r8 ;" - "adox %%rcx, %%r11 ;" - "adcx %%r9, %%r9 ;" - "adox %%r15, %%rbx ;" - "adcx %%r10, %%r10 ;" - "adox %%r15, %%r13 ;" - "adcx %%r11, %%r11 ;" - "adox %%r15, %%r14 ;" - "adcx %%rbx, %%rbx ;" - "adcx %%r13, %%r13 ;" - "adcx %%r14, %%r14 ;" - - "movq (%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ - /*******************/ - "movq %%rax, 0(%0) ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, 8(%0) ;" - "movq 8(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ - "adcq %%rax, %%r9 ;" - "movq %%r9, 16(%0) ;" - "adcq %%rcx, %%r10 ;" - "movq %%r10, 24(%0) ;" - "movq 16(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ - "adcq %%rax, %%r11 ;" - "movq %%r11, 32(%0) ;" - "adcq %%rcx, %%rbx ;" - "movq %%rbx, 40(%0) ;" - "movq 24(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ - "adcq %%rax, %%r13 ;" - "movq %%r13, 48(%0) ;" - "adcq %%rcx, %%r14 ;" - "movq %%r14, 56(%0) ;" - - - "movq 32(%1), %%rdx ;" /* B[0] */ - "mulx 40(%1), %%r8, %%r14 ;" /* B[1]*B[0] */ - "xorl %%r15d, %%r15d;" - "mulx 48(%1), %%r9, %%r10 ;" /* B[2]*B[0] */ - "adcx %%r14, %%r9 ;" - "mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */ - "adcx %%rax, %%r10 ;" - "movq 56(%1), %%rdx ;" /* B[3] */ - "mulx 40(%1), %%r11, %%rbx ;" /* B[1]*B[3] */ - "adcx %%rcx, %%r11 ;" - "mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */ - "adcx %%rax, %%rbx ;" - "movq 40(%1), %%rdx ;" /* B[1] */ - "adcx %%r15, %%r13 ;" - "mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */ - "movq $0, %%r14 ;" - /******************************************/ - "adcx %%r15, %%r14 ;" - - "xorl %%r15d, %%r15d;" - "adox %%rax, %%r10 ;" - "adcx %%r8, %%r8 ;" - "adox %%rcx, %%r11 ;" - "adcx %%r9, %%r9 ;" - "adox %%r15, %%rbx ;" - "adcx %%r10, %%r10 ;" - "adox %%r15, %%r13 ;" - "adcx %%r11, %%r11 ;" - "adox %%r15, %%r14 ;" - "adcx %%rbx, %%rbx ;" - "adcx %%r13, %%r13 ;" - "adcx %%r14, %%r14 ;" - - "movq 32(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */ - /*******************/ - "movq %%rax, 64(%0) ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, 72(%0) ;" - "movq 40(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */ - "adcq %%rax, %%r9 ;" - "movq %%r9, 80(%0) ;" - "adcq %%rcx, %%r10 ;" - "movq %%r10, 88(%0) ;" - "movq 48(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */ - "adcq %%rax, %%r11 ;" - "movq %%r11, 96(%0) ;" - "adcq %%rcx, %%rbx ;" - "movq %%rbx, 104(%0) ;" - "movq 56(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */ - "adcq %%rax, %%r13 ;" - "movq %%r13, 112(%0) ;" - "adcq %%rcx, %%r14 ;" - "movq %%r14, 120(%0) ;" - : - : "r"(c), "r"(a) - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", - "%r10", "%r11", "%r13", "%r14", "%r15"); + u64 x = a; + u64 y = b; + u64 x_xor_y = x ^ y; + u64 x_sub_y = x - y; + u64 x_sub_y_xor_y = x_sub_y ^ y; + u64 q = x_xor_y | x_sub_y_xor_y; + u64 x_xor_q = x ^ q; + u64 x_xor_q_ = x_xor_q >> (u32)63U; + return x_xor_q_ - (u64)1U; } -static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a) +/* Computes the addition of four-element f1 with value in f2 + * and returns the carry (if any) */ +static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2) { - asm volatile( - "movq 8(%1), %%rdx ;" /* A[1] */ - "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */ - "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */ - "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */ - - "movq 16(%1), %%rdx ;" /* A[2] */ - "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */ - "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */ - - "addq %%rax, %%r9 ;" - "adcq %%rdx, %%r10 ;" - "adcq %%rcx, %%r11 ;" - "adcq %%r14, %%r15 ;" - "adcq $0, %%r13 ;" - "movq $0, %%r14 ;" - "adcq $0, %%r14 ;" - - "movq (%1), %%rdx ;" /* A[0] */ - "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */ - - "addq %%rax, %%r10 ;" - "adcq %%rcx, %%r11 ;" - "adcq $0, %%r15 ;" - "adcq $0, %%r13 ;" - "adcq $0, %%r14 ;" - - "shldq $1, %%r13, %%r14 ;" - "shldq $1, %%r15, %%r13 ;" - "shldq $1, %%r11, %%r15 ;" - "shldq $1, %%r10, %%r11 ;" - "shldq $1, %%r9, %%r10 ;" - "shldq $1, %%r8, %%r9 ;" - "shlq $1, %%r8 ;" - - /*******************/ - "mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */ - /*******************/ - "movq %%rax, 0(%0) ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, 8(%0) ;" - "movq 8(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */ - "adcq %%rax, %%r9 ;" - "movq %%r9, 16(%0) ;" - "adcq %%rcx, %%r10 ;" - "movq %%r10, 24(%0) ;" - "movq 16(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */ - "adcq %%rax, %%r11 ;" - "movq %%r11, 32(%0) ;" - "adcq %%rcx, %%r15 ;" - "movq %%r15, 40(%0) ;" - "movq 24(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */ - "adcq %%rax, %%r13 ;" - "movq %%r13, 48(%0) ;" - "adcq %%rcx, %%r14 ;" - "movq %%r14, 56(%0) ;" - - "movq 40(%1), %%rdx ;" /* B[1] */ - "mulx 32(%1), %%r8, %%r9 ;" /* B[0]*B[1] */ - "mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */ - "mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */ - - "movq 48(%1), %%rdx ;" /* B[2] */ - "mulx 56(%1), %%r15, %%r13 ;" /* B[3]*B[2] */ - "mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */ - - "addq %%rax, %%r9 ;" - "adcq %%rdx, %%r10 ;" - "adcq %%rcx, %%r11 ;" - "adcq %%r14, %%r15 ;" - "adcq $0, %%r13 ;" - "movq $0, %%r14 ;" - "adcq $0, %%r14 ;" - - "movq 32(%1), %%rdx ;" /* B[0] */ - "mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */ - - "addq %%rax, %%r10 ;" - "adcq %%rcx, %%r11 ;" - "adcq $0, %%r15 ;" - "adcq $0, %%r13 ;" - "adcq $0, %%r14 ;" - - "shldq $1, %%r13, %%r14 ;" - "shldq $1, %%r15, %%r13 ;" - "shldq $1, %%r11, %%r15 ;" - "shldq $1, %%r10, %%r11 ;" - "shldq $1, %%r9, %%r10 ;" - "shldq $1, %%r8, %%r9 ;" - "shlq $1, %%r8 ;" - - /*******************/ - "mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */ - /*******************/ - "movq %%rax, 64(%0) ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, 72(%0) ;" - "movq 40(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */ - "adcq %%rax, %%r9 ;" - "movq %%r9, 80(%0) ;" - "adcq %%rcx, %%r10 ;" - "movq %%r10, 88(%0) ;" - "movq 48(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */ - "adcq %%rax, %%r11 ;" - "movq %%r11, 96(%0) ;" - "adcq %%rcx, %%r15 ;" - "movq %%r15, 104(%0) ;" - "movq 56(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */ - "adcq %%rax, %%r13 ;" - "movq %%r13, 112(%0) ;" - "adcq %%rcx, %%r14 ;" - "movq %%r14, 120(%0) ;" - : - : "r"(c), "r"(a) - : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", - "%r11", "%r13", "%r14", "%r15"); -} + u64 carry_r; -static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a) -{ asm volatile( - "movl $38, %%edx; " /* 2*c = 38 = 2^256 */ - "mulx 32(%1), %%r8, %%r10; " /* c*C[4] */ - "xorl %%ebx, %%ebx ;" - "adox (%1), %%r8 ;" - "mulx 40(%1), %%r9, %%r11; " /* c*C[5] */ - "adcx %%r10, %%r9 ;" - "adox 8(%1), %%r9 ;" - "mulx 48(%1), %%r10, %%rax; " /* c*C[6] */ - "adcx %%r11, %%r10 ;" - "adox 16(%1), %%r10 ;" - "mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */ - "adcx %%rax, %%r11 ;" - "adox 24(%1), %%r11 ;" - /***************************************/ - "adcx %%rbx, %%rcx ;" - "adox %%rbx, %%rcx ;" - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ - "adcx %%rcx, %%r8 ;" - "adcx %%rbx, %%r9 ;" - "movq %%r9, 8(%0) ;" - "adcx %%rbx, %%r10 ;" - "movq %%r10, 16(%0) ;" - "adcx %%rbx, %%r11 ;" - "movq %%r11, 24(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%edx, %%ecx ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, (%0) ;" - - "mulx 96(%1), %%r8, %%r10; " /* c*C[4] */ - "xorl %%ebx, %%ebx ;" - "adox 64(%1), %%r8 ;" - "mulx 104(%1), %%r9, %%r11; " /* c*C[5] */ - "adcx %%r10, %%r9 ;" - "adox 72(%1), %%r9 ;" - "mulx 112(%1), %%r10, %%rax; " /* c*C[6] */ - "adcx %%r11, %%r10 ;" - "adox 80(%1), %%r10 ;" - "mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */ - "adcx %%rax, %%r11 ;" - "adox 88(%1), %%r11 ;" - /****************************************/ - "adcx %%rbx, %%rcx ;" - "adox %%rbx, %%rcx ;" - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ - "adcx %%rcx, %%r8 ;" - "adcx %%rbx, %%r9 ;" - "movq %%r9, 40(%0) ;" - "adcx %%rbx, %%r10 ;" - "movq %%r10, 48(%0) ;" - "adcx %%rbx, %%r11 ;" - "movq %%r11, 56(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%edx, %%ecx ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, 32(%0) ;" - : - : "r"(c), "r"(a) - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", - "%r10", "%r11"); -} + /* Clear registers to propagate the carry bit */ + " xor %%r8, %%r8;" + " xor %%r9, %%r9;" + " xor %%r10, %%r10;" + " xor %%r11, %%r11;" + " xor %1, %1;" + + /* Begin addition chain */ + " addq 0(%3), %0;" + " movq %0, 0(%2);" + " adcxq 8(%3), %%r8;" + " movq %%r8, 8(%2);" + " adcxq 16(%3), %%r9;" + " movq %%r9, 16(%2);" + " adcxq 24(%3), %%r10;" + " movq %%r10, 24(%2);" + + /* Return the carry bit in a register */ + " adcx %%r11, %1;" + : "+&r" (f2), "=&r" (carry_r) + : "r" (out), "r" (f1) + : "%r8", "%r9", "%r10", "%r11", "memory", "cc" + ); -static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a) -{ - asm volatile( - "movl $38, %%edx ; " /* 2*c = 38 = 2^256 */ - "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ - "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ - "addq %%r10, %%r9 ;" - "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ - "adcq %%r11, %%r10 ;" - "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ - "adcq %%rax, %%r11 ;" - /***************************************/ - "adcq $0, %%rcx ;" - "addq (%1), %%r8 ;" - "adcq 8(%1), %%r9 ;" - "adcq 16(%1), %%r10 ;" - "adcq 24(%1), %%r11 ;" - "adcq $0, %%rcx ;" - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ - "addq %%rcx, %%r8 ;" - "adcq $0, %%r9 ;" - "movq %%r9, 8(%0) ;" - "adcq $0, %%r10 ;" - "movq %%r10, 16(%0) ;" - "adcq $0, %%r11 ;" - "movq %%r11, 24(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%edx, %%ecx ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, (%0) ;" - - "mulx 96(%1), %%r8, %%r10 ;" /* c*C[4] */ - "mulx 104(%1), %%r9, %%r11 ;" /* c*C[5] */ - "addq %%r10, %%r9 ;" - "mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */ - "adcq %%r11, %%r10 ;" - "mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */ - "adcq %%rax, %%r11 ;" - /****************************************/ - "adcq $0, %%rcx ;" - "addq 64(%1), %%r8 ;" - "adcq 72(%1), %%r9 ;" - "adcq 80(%1), %%r10 ;" - "adcq 88(%1), %%r11 ;" - "adcq $0, %%rcx ;" - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ - "addq %%rcx, %%r8 ;" - "adcq $0, %%r9 ;" - "movq %%r9, 40(%0) ;" - "adcq $0, %%r10 ;" - "movq %%r10, 48(%0) ;" - "adcq $0, %%r11 ;" - "movq %%r11, 56(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%edx, %%ecx ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, 32(%0) ;" - : - : "r"(c), "r"(a) - : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", - "%r11"); + return carry_r; } -static void mul_256x256_integer_adx(u64 *const c, const u64 *const a, - const u64 *const b) +/* Computes the field addition of two field elements */ +static inline void fadd(u64 *out, const u64 *f1, const u64 *f2) { asm volatile( - "movq (%1), %%rdx; " /* A[0] */ - "mulx (%2), %%r8, %%r9; " /* A[0]*B[0] */ - "xorl %%r10d, %%r10d ;" - "movq %%r8, (%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[0]*B[1] */ - "adox %%r9, %%r10 ;" - "movq %%r10, 8(%0) ;" - "mulx 16(%2), %%r15, %%r13; " /* A[0]*B[2] */ - "adox %%r11, %%r15 ;" - "mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */ - "adox %%r13, %%r14 ;" - "movq $0, %%rax ;" - /******************************************/ - "adox %%rdx, %%rax ;" - - "movq 8(%1), %%rdx; " /* A[1] */ - "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ - "xorl %%r10d, %%r10d ;" - "adcx 8(%0), %%r8 ;" - "movq %%r8, 8(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ - "adox %%r9, %%r10 ;" - "adcx %%r15, %%r10 ;" - "movq %%r10, 16(%0) ;" - "mulx 16(%2), %%r15, %%r13; " /* A[1]*B[2] */ - "adox %%r11, %%r15 ;" - "adcx %%r14, %%r15 ;" - "movq $0, %%r8 ;" - "mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */ - "adox %%r13, %%r14 ;" - "adcx %%rax, %%r14 ;" - "movq $0, %%rax ;" - /******************************************/ - "adox %%rdx, %%rax ;" - "adcx %%r8, %%rax ;" - - "movq 16(%1), %%rdx; " /* A[2] */ - "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ - "xorl %%r10d, %%r10d ;" - "adcx 16(%0), %%r8 ;" - "movq %%r8, 16(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ - "adox %%r9, %%r10 ;" - "adcx %%r15, %%r10 ;" - "movq %%r10, 24(%0) ;" - "mulx 16(%2), %%r15, %%r13; " /* A[2]*B[2] */ - "adox %%r11, %%r15 ;" - "adcx %%r14, %%r15 ;" - "movq $0, %%r8 ;" - "mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */ - "adox %%r13, %%r14 ;" - "adcx %%rax, %%r14 ;" - "movq $0, %%rax ;" - /******************************************/ - "adox %%rdx, %%rax ;" - "adcx %%r8, %%rax ;" - - "movq 24(%1), %%rdx; " /* A[3] */ - "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ - "xorl %%r10d, %%r10d ;" - "adcx 24(%0), %%r8 ;" - "movq %%r8, 24(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ - "adox %%r9, %%r10 ;" - "adcx %%r15, %%r10 ;" - "movq %%r10, 32(%0) ;" - "mulx 16(%2), %%r15, %%r13; " /* A[3]*B[2] */ - "adox %%r11, %%r15 ;" - "adcx %%r14, %%r15 ;" - "movq %%r15, 40(%0) ;" - "movq $0, %%r8 ;" - "mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */ - "adox %%r13, %%r14 ;" - "adcx %%rax, %%r14 ;" - "movq %%r14, 48(%0) ;" - "movq $0, %%rax ;" - /******************************************/ - "adox %%rdx, %%rax ;" - "adcx %%r8, %%rax ;" - "movq %%rax, 56(%0) ;" - : - : "r"(c), "r"(a), "r"(b) - : "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", - "%r13", "%r14", "%r15"); + /* Compute the raw addition of f1 + f2 */ + " movq 0(%0), %%r8;" + " addq 0(%2), %%r8;" + " movq 8(%0), %%r9;" + " adcxq 8(%2), %%r9;" + " movq 16(%0), %%r10;" + " adcxq 16(%2), %%r10;" + " movq 24(%0), %%r11;" + " adcxq 24(%2), %%r11;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $0, %%rax;" + " mov $38, %0;" + " cmovc %0, %%rax;" + + /* Step 2: Add carry*38 to the original sum */ + " xor %%rcx, %%rcx;" + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %0, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r" (f2) + : "r" (out), "r" (f1) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc" + ); } -static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a, - const u64 *const b) +/* Computes the field substraction of two field elements */ +static inline void fsub(u64 *out, const u64 *f1, const u64 *f2) { asm volatile( - "movq (%1), %%rdx; " /* A[0] */ - "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ - "movq %%r8, (%0) ;" - "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ - "addq %%r10, %%r15 ;" - "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ - "adcq %%r8, %%rax ;" - "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ - "adcq %%r10, %%rbx ;" - /******************************************/ - "adcq $0, %%rcx ;" - - "movq 8(%1), %%rdx; " /* A[1] */ - "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ - "addq %%r15, %%r8 ;" - "movq %%r8, 8(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ - "adcq %%r10, %%r9 ;" - "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ - "adcq %%r8, %%r11 ;" - "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%r15 ;" - - "addq %%r9, %%rax ;" - "adcq %%r11, %%rbx ;" - "adcq %%r13, %%rcx ;" - "adcq $0, %%r15 ;" - - "movq 16(%1), %%rdx; " /* A[2] */ - "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ - "addq %%rax, %%r8 ;" - "movq %%r8, 16(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ - "adcq %%r10, %%r9 ;" - "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ - "adcq %%r8, %%r11 ;" - "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%rax ;" - - "addq %%r9, %%rbx ;" - "adcq %%r11, %%rcx ;" - "adcq %%r13, %%r15 ;" - "adcq $0, %%rax ;" - - "movq 24(%1), %%rdx; " /* A[3] */ - "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ - "addq %%rbx, %%r8 ;" - "movq %%r8, 24(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ - "adcq %%r10, %%r9 ;" - "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ - "adcq %%r8, %%r11 ;" - "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%rbx ;" - - "addq %%r9, %%rcx ;" - "movq %%rcx, 32(%0) ;" - "adcq %%r11, %%r15 ;" - "movq %%r15, 40(%0) ;" - "adcq %%r13, %%rax ;" - "movq %%rax, 48(%0) ;" - "adcq $0, %%rbx ;" - "movq %%rbx, 56(%0) ;" - : - : "r"(c), "r"(a), "r"(b) - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", - "%r10", "%r11", "%r13", "%r15"); + /* Compute the raw substraction of f1-f2 */ + " movq 0(%1), %%r8;" + " subq 0(%2), %%r8;" + " movq 8(%1), %%r9;" + " sbbq 8(%2), %%r9;" + " movq 16(%1), %%r10;" + " sbbq 16(%2), %%r10;" + " movq 24(%1), %%r11;" + " sbbq 24(%2), %%r11;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $0, %%rax;" + " mov $38, %%rcx;" + " cmovc %%rcx, %%rax;" + + /* Step 2: Substract carry*38 from the original difference */ + " sub %%rax, %%r8;" + " sbb $0, %%r9;" + " sbb $0, %%r10;" + " sbb $0, %%r11;" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rcx, %%rax;" + " sub %%rax, %%r8;" + + /* Store the result */ + " movq %%r8, 0(%0);" + " movq %%r9, 8(%0);" + " movq %%r10, 16(%0);" + " movq %%r11, 24(%0);" + : + : "r" (out), "r" (f1), "r" (f2) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc" + ); } -static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a) +/* Computes a field multiplication: out <- f1 * f2 + * Uses the 8-element buffer tmp for intermediate results */ +static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) { asm volatile( - "movq (%1), %%rdx ;" /* A[0] */ - "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */ - "xorl %%r15d, %%r15d;" - "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */ - "adcx %%r14, %%r9 ;" - "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */ - "adcx %%rax, %%r10 ;" - "movq 24(%1), %%rdx ;" /* A[3] */ - "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */ - "adcx %%rcx, %%r11 ;" - "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */ - "adcx %%rax, %%rbx ;" - "movq 8(%1), %%rdx ;" /* A[1] */ - "adcx %%r15, %%r13 ;" - "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */ - "movq $0, %%r14 ;" - /******************************************/ - "adcx %%r15, %%r14 ;" - - "xorl %%r15d, %%r15d;" - "adox %%rax, %%r10 ;" - "adcx %%r8, %%r8 ;" - "adox %%rcx, %%r11 ;" - "adcx %%r9, %%r9 ;" - "adox %%r15, %%rbx ;" - "adcx %%r10, %%r10 ;" - "adox %%r15, %%r13 ;" - "adcx %%r11, %%r11 ;" - "adox %%r15, %%r14 ;" - "adcx %%rbx, %%rbx ;" - "adcx %%r13, %%r13 ;" - "adcx %%r14, %%r14 ;" - - "movq (%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ - /*******************/ - "movq %%rax, 0(%0) ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, 8(%0) ;" - "movq 8(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ - "adcq %%rax, %%r9 ;" - "movq %%r9, 16(%0) ;" - "adcq %%rcx, %%r10 ;" - "movq %%r10, 24(%0) ;" - "movq 16(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ - "adcq %%rax, %%r11 ;" - "movq %%r11, 32(%0) ;" - "adcq %%rcx, %%rbx ;" - "movq %%rbx, 40(%0) ;" - "movq 24(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ - "adcq %%rax, %%r13 ;" - "movq %%r13, 48(%0) ;" - "adcq %%rcx, %%r14 ;" - "movq %%r14, 56(%0) ;" - : - : "r"(c), "r"(a) - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", - "%r10", "%r11", "%r13", "%r14", "%r15"); + /* Compute the raw multiplication: tmp <- src1 * src2 */ + + /* Compute src1[0] * src2 */ + " movq 0(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" + /* Compute src1[1] * src2 */ + " movq 8(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);" + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[2] * src2 */ + " movq 16(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);" + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[3] * src2 */ + " movq 24(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);" + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" + /* Line up pointers */ + " mov %0, %1;" + " mov %2, %0;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%1), %%r8, %%r13;" + " xor %3, %3;" + " adoxq 0(%1), %%r8;" + " mulxq 40(%1), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%1), %%r9;" + " mulxq 48(%1), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%1), %%r10;" + " mulxq 56(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%1), %%r11;" + " adcx %3, %%rax;" + " adox %3, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %3, %%r9;" + " movq %%r9, 8(%0);" + " adcx %3, %%r10;" + " movq %%r10, 16(%0);" + " adcx %3, %%r11;" + " movq %%r11, 24(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%0);" + : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) + : + : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc" + ); } -static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a) +/* Computes two field multiplications: + * out[0] <- f1[0] * f2[0] + * out[1] <- f1[1] * f2[1] + * Uses the 16-element buffer tmp for intermediate results. */ +static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) { asm volatile( - "movq 8(%1), %%rdx ;" /* A[1] */ - "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */ - "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */ - "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */ - - "movq 16(%1), %%rdx ;" /* A[2] */ - "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */ - "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */ - - "addq %%rax, %%r9 ;" - "adcq %%rdx, %%r10 ;" - "adcq %%rcx, %%r11 ;" - "adcq %%r14, %%r15 ;" - "adcq $0, %%r13 ;" - "movq $0, %%r14 ;" - "adcq $0, %%r14 ;" - - "movq (%1), %%rdx ;" /* A[0] */ - "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */ - - "addq %%rax, %%r10 ;" - "adcq %%rcx, %%r11 ;" - "adcq $0, %%r15 ;" - "adcq $0, %%r13 ;" - "adcq $0, %%r14 ;" - - "shldq $1, %%r13, %%r14 ;" - "shldq $1, %%r15, %%r13 ;" - "shldq $1, %%r11, %%r15 ;" - "shldq $1, %%r10, %%r11 ;" - "shldq $1, %%r9, %%r10 ;" - "shldq $1, %%r8, %%r9 ;" - "shlq $1, %%r8 ;" - - /*******************/ - "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ - /*******************/ - "movq %%rax, 0(%0) ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, 8(%0) ;" - "movq 8(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ - "adcq %%rax, %%r9 ;" - "movq %%r9, 16(%0) ;" - "adcq %%rcx, %%r10 ;" - "movq %%r10, 24(%0) ;" - "movq 16(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ - "adcq %%rax, %%r11 ;" - "movq %%r11, 32(%0) ;" - "adcq %%rcx, %%r15 ;" - "movq %%r15, 40(%0) ;" - "movq 24(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ - "adcq %%rax, %%r13 ;" - "movq %%r13, 48(%0) ;" - "adcq %%rcx, %%r14 ;" - "movq %%r14, 56(%0) ;" - : - : "r"(c), "r"(a) - : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", - "%r11", "%r13", "%r14", "%r15"); + /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */ + + /* Compute src1[0] * src2 */ + " movq 0(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" + /* Compute src1[1] * src2 */ + " movq 8(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);" + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[2] * src2 */ + " movq 16(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);" + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[3] * src2 */ + " movq 24(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);" + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" + + /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */ + + /* Compute src1[0] * src2 */ + " movq 32(%1), %%rdx;" + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);" + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);" + " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" + /* Compute src1[1] * src2 */ + " movq 40(%1), %%rdx;" + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);" + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);" + " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[2] * src2 */ + " movq 48(%1), %%rdx;" + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);" + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);" + " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[3] * src2 */ + " movq 56(%1), %%rdx;" + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);" + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);" + " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 104(%0);" " mov $0, %%r8;" + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);" + /* Line up pointers */ + " mov %0, %1;" + " mov %2, %0;" + + /* Wrap the results back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%1), %%r8, %%r13;" + " xor %3, %3;" + " adoxq 0(%1), %%r8;" + " mulxq 40(%1), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%1), %%r9;" + " mulxq 48(%1), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%1), %%r10;" + " mulxq 56(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%1), %%r11;" + " adcx %3, %%rax;" + " adox %3, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %3, %%r9;" + " movq %%r9, 8(%0);" + " adcx %3, %%r10;" + " movq %%r10, 16(%0);" + " adcx %3, %%r11;" + " movq %%r11, 24(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%0);" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 96(%1), %%r8, %%r13;" + " xor %3, %3;" + " adoxq 64(%1), %%r8;" + " mulxq 104(%1), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 72(%1), %%r9;" + " mulxq 112(%1), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 80(%1), %%r10;" + " mulxq 120(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%1), %%r11;" + " adcx %3, %%rax;" + " adox %3, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %3, %%r9;" + " movq %%r9, 40(%0);" + " adcx %3, %%r10;" + " movq %%r10, 48(%0);" + " adcx %3, %%r11;" + " movq %%r11, 56(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%0);" + : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) + : + : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc" + ); } -static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a) +/* Computes the field multiplication of four-element f1 with value in f2 */ +static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) { - asm volatile( - "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */ - "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ - "xorl %%ebx, %%ebx ;" - "adox (%1), %%r8 ;" - "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ - "adcx %%r10, %%r9 ;" - "adox 8(%1), %%r9 ;" - "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ - "adcx %%r11, %%r10 ;" - "adox 16(%1), %%r10 ;" - "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ - "adcx %%rax, %%r11 ;" - "adox 24(%1), %%r11 ;" - /***************************************/ - "adcx %%rbx, %%rcx ;" - "adox %%rbx, %%rcx ;" - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ - "adcx %%rcx, %%r8 ;" - "adcx %%rbx, %%r9 ;" - "movq %%r9, 8(%0) ;" - "adcx %%rbx, %%r10 ;" - "movq %%r10, 16(%0) ;" - "adcx %%rbx, %%r11 ;" - "movq %%r11, 24(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%edx, %%ecx ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, (%0) ;" - : - : "r"(c), "r"(a) - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", - "%r10", "%r11"); -} + register u64 f2_r asm("rdx") = f2; -static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) -{ asm volatile( - "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */ - "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ - "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ - "addq %%r10, %%r9 ;" - "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ - "adcq %%r11, %%r10 ;" - "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ - "adcq %%rax, %%r11 ;" - /***************************************/ - "adcq $0, %%rcx ;" - "addq (%1), %%r8 ;" - "adcq 8(%1), %%r9 ;" - "adcq 16(%1), %%r10 ;" - "adcq 24(%1), %%r11 ;" - "adcq $0, %%rcx ;" - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ - "addq %%rcx, %%r8 ;" - "adcq $0, %%r9 ;" - "movq %%r9, 8(%0) ;" - "adcq $0, %%r10 ;" - "movq %%r10, 16(%0) ;" - "adcq $0, %%r11 ;" - "movq %%r11, 24(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%edx, %%ecx ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, (%0) ;" - : - : "r"(c), "r"(a) - : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", - "%r11"); + /* Compute the raw multiplication of f1*f2 */ + " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ + " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */ + " add %%rcx, %%r9;" + " mov $0, %%rcx;" + " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ + " adcx %%rbx, %%r10;" + " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ + " adcx %%r13, %%r11;" + " adcx %%rcx, %%rax;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $38, %%rdx;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r" (f2_r) + : "r" (out), "r" (f1) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "memory", "cc" + ); } -static __always_inline void -add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b) +/* Computes p1 <- bit ? p2 : p1 in constant time */ +static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2) { asm volatile( - "mov $38, %%eax ;" - "xorl %%ecx, %%ecx ;" - "movq (%2), %%r8 ;" - "adcx (%1), %%r8 ;" - "movq 8(%2), %%r9 ;" - "adcx 8(%1), %%r9 ;" - "movq 16(%2), %%r10 ;" - "adcx 16(%1), %%r10 ;" - "movq 24(%2), %%r11 ;" - "adcx 24(%1), %%r11 ;" - "cmovc %%eax, %%ecx ;" - "xorl %%eax, %%eax ;" - "adcx %%rcx, %%r8 ;" - "adcx %%rax, %%r9 ;" - "movq %%r9, 8(%0) ;" - "adcx %%rax, %%r10 ;" - "movq %%r10, 16(%0) ;" - "adcx %%rax, %%r11 ;" - "movq %%r11, 24(%0) ;" - "mov $38, %%ecx ;" - "cmovc %%ecx, %%eax ;" - "addq %%rax, %%r8 ;" - "movq %%r8, (%0) ;" - : - : "r"(c), "r"(a), "r"(b) - : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); + /* Invert the polarity of bit to match cmov expectations */ + " add $18446744073709551615, %0;" + + /* cswap p1[0], p2[0] */ + " movq 0(%1), %%r8;" + " movq 0(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 0(%1);" + " movq %%r9, 0(%2);" + + /* cswap p1[1], p2[1] */ + " movq 8(%1), %%r8;" + " movq 8(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 8(%1);" + " movq %%r9, 8(%2);" + + /* cswap p1[2], p2[2] */ + " movq 16(%1), %%r8;" + " movq 16(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 16(%1);" + " movq %%r9, 16(%2);" + + /* cswap p1[3], p2[3] */ + " movq 24(%1), %%r8;" + " movq 24(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 24(%1);" + " movq %%r9, 24(%2);" + + /* cswap p1[4], p2[4] */ + " movq 32(%1), %%r8;" + " movq 32(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 32(%1);" + " movq %%r9, 32(%2);" + + /* cswap p1[5], p2[5] */ + " movq 40(%1), %%r8;" + " movq 40(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 40(%1);" + " movq %%r9, 40(%2);" + + /* cswap p1[6], p2[6] */ + " movq 48(%1), %%r8;" + " movq 48(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 48(%1);" + " movq %%r9, 48(%2);" + + /* cswap p1[7], p2[7] */ + " movq 56(%1), %%r8;" + " movq 56(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 56(%1);" + " movq %%r9, 56(%2);" + : "+&r" (bit) + : "r" (p1), "r" (p2) + : "%r8", "%r9", "%r10", "memory", "cc" + ); } -static __always_inline void -add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b) +/* Computes the square of a field element: out <- f * f + * Uses the 8-element buffer tmp for intermediate results */ +static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) { asm volatile( - "mov $38, %%eax ;" - "movq (%2), %%r8 ;" - "addq (%1), %%r8 ;" - "movq 8(%2), %%r9 ;" - "adcq 8(%1), %%r9 ;" - "movq 16(%2), %%r10 ;" - "adcq 16(%1), %%r10 ;" - "movq 24(%2), %%r11 ;" - "adcq 24(%1), %%r11 ;" - "mov $0, %%ecx ;" - "cmovc %%eax, %%ecx ;" - "addq %%rcx, %%r8 ;" - "adcq $0, %%r9 ;" - "movq %%r9, 8(%0) ;" - "adcq $0, %%r10 ;" - "movq %%r10, 16(%0) ;" - "adcq $0, %%r11 ;" - "movq %%r11, 24(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%eax, %%ecx ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, (%0) ;" - : - : "r"(c), "r"(a), "r"(b) - : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); + /* Compute the raw multiplication: tmp <- f * f */ + + /* Step 1: Compute all partial products */ + " movq 0(%1), %%rdx;" /* f[0] */ + " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ + " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 24(%1), %%rdx;" /* f[3] */ + " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ + " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15, %%r15;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 0(%0);" + " add %%rcx, %%r8;" " movq %%r8, 8(%0);" + " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" " movq %%r9, 16(%0);" + " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" + " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" + " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);" + " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" + " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" + + /* Line up pointers */ + " mov %0, %1;" + " mov %2, %0;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%1), %%r8, %%r13;" + " xor %%rcx, %%rcx;" + " adoxq 0(%1), %%r8;" + " mulxq 40(%1), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%1), %%r9;" + " mulxq 48(%1), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%1), %%r10;" + " mulxq 56(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%1), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%0);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%0);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%0);" + : "+&r" (tmp), "+&r" (f), "+&r" (out) + : + : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc" + ); } -static __always_inline void -sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b) +/* Computes two field squarings: + * out[0] <- f[0] * f[0] + * out[1] <- f[1] * f[1] + * Uses the 16-element buffer tmp for intermediate results */ +static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) { asm volatile( - "mov $38, %%eax ;" - "movq (%1), %%r8 ;" - "subq (%2), %%r8 ;" - "movq 8(%1), %%r9 ;" - "sbbq 8(%2), %%r9 ;" - "movq 16(%1), %%r10 ;" - "sbbq 16(%2), %%r10 ;" - "movq 24(%1), %%r11 ;" - "sbbq 24(%2), %%r11 ;" - "mov $0, %%ecx ;" - "cmovc %%eax, %%ecx ;" - "subq %%rcx, %%r8 ;" - "sbbq $0, %%r9 ;" - "movq %%r9, 8(%0) ;" - "sbbq $0, %%r10 ;" - "movq %%r10, 16(%0) ;" - "sbbq $0, %%r11 ;" - "movq %%r11, 24(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%eax, %%ecx ;" - "subq %%rcx, %%r8 ;" - "movq %%r8, (%0) ;" - : - : "r"(c), "r"(a), "r"(b) - : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); + /* Step 1: Compute all partial products */ + " movq 0(%1), %%rdx;" /* f[0] */ + " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ + " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 24(%1), %%rdx;" /* f[3] */ + " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ + " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15, %%r15;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 0(%0);" + " add %%rcx, %%r8;" " movq %%r8, 8(%0);" + " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" " movq %%r9, 16(%0);" + " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" + " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" + " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);" + " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" + " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" + + /* Step 1: Compute all partial products */ + " movq 32(%1), %%rdx;" /* f[0] */ + " mulxq 40(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ + " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 56(%1), %%rdx;" /* f[3] */ + " mulxq 40(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ + " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15, %%r15;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 32(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 64(%0);" + " add %%rcx, %%r8;" " movq %%r8, 72(%0);" + " movq 40(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" " movq %%r9, 80(%0);" + " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);" + " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" " movq %%r11, 96(%0);" + " adcx %%rcx, %%rbx;" " movq %%rbx, 104(%0);" + " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" " movq %%r13, 112(%0);" + " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);" + + /* Line up pointers */ + " mov %0, %1;" + " mov %2, %0;" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%1), %%r8, %%r13;" + " xor %%rcx, %%rcx;" + " adoxq 0(%1), %%r8;" + " mulxq 40(%1), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%1), %%r9;" + " mulxq 48(%1), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%1), %%r10;" + " mulxq 56(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%1), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%0);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%0);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%0);" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 96(%1), %%r8, %%r13;" + " xor %%rcx, %%rcx;" + " adoxq 64(%1), %%r8;" + " mulxq 104(%1), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 72(%1), %%r9;" + " mulxq 112(%1), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 80(%1), %%r10;" + " mulxq 120(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%1), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 40(%0);" + " adcx %%rcx, %%r10;" + " movq %%r10, 48(%0);" + " adcx %%rcx, %%r11;" + " movq %%r11, 56(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%0);" + : "+&r" (tmp), "+&r" (f), "+&r" (out) + : + : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc" + ); } -/* Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 */ -static __always_inline void -mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a) +static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2) { - const u64 a24 = 121666; - asm volatile( - "movq %2, %%rdx ;" - "mulx (%1), %%r8, %%r10 ;" - "mulx 8(%1), %%r9, %%r11 ;" - "addq %%r10, %%r9 ;" - "mulx 16(%1), %%r10, %%rax ;" - "adcq %%r11, %%r10 ;" - "mulx 24(%1), %%r11, %%rcx ;" - "adcq %%rax, %%r11 ;" - /**************************/ - "adcq $0, %%rcx ;" - "movl $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/ - "imul %%rdx, %%rcx ;" - "addq %%rcx, %%r8 ;" - "adcq $0, %%r9 ;" - "movq %%r9, 8(%0) ;" - "adcq $0, %%r10 ;" - "movq %%r10, 16(%0) ;" - "adcq $0, %%r11 ;" - "movq %%r11, 24(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%edx, %%ecx ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, (%0) ;" - : - : "r"(c), "r"(a), "r"(a24) - : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", - "%r11"); + u64 *nq = p01_tmp1; + u64 *nq_p1 = p01_tmp1 + (u32)8U; + u64 *tmp1 = p01_tmp1 + (u32)16U; + u64 *x1 = q; + u64 *x2 = nq; + u64 *z2 = nq + (u32)4U; + u64 *z3 = nq_p1 + (u32)4U; + u64 *a = tmp1; + u64 *b = tmp1 + (u32)4U; + u64 *ab = tmp1; + u64 *dc = tmp1 + (u32)8U; + u64 *x3; + u64 *z31; + u64 *d0; + u64 *c0; + u64 *a1; + u64 *b1; + u64 *d; + u64 *c; + u64 *ab1; + u64 *dc1; + fadd(a, x2, z2); + fsub(b, x2, z2); + x3 = nq_p1; + z31 = nq_p1 + (u32)4U; + d0 = dc; + c0 = dc + (u32)4U; + fadd(c0, x3, z31); + fsub(d0, x3, z31); + fmul2(dc, dc, ab, tmp2); + fadd(x3, d0, c0); + fsub(z31, d0, c0); + a1 = tmp1; + b1 = tmp1 + (u32)4U; + d = tmp1 + (u32)8U; + c = tmp1 + (u32)12U; + ab1 = tmp1; + dc1 = tmp1 + (u32)8U; + fsqr2(dc1, ab1, tmp2); + fsqr2(nq_p1, nq_p1, tmp2); + a1[0U] = c[0U]; + a1[1U] = c[1U]; + a1[2U] = c[2U]; + a1[3U] = c[3U]; + fsub(c, d, c); + fmul_scalar(b1, c, (u64)121665U); + fadd(b1, b1, d); + fmul2(nq, dc1, ab1, tmp2); + fmul(z3, z3, x1, tmp2); } -static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a) +static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2) { - struct { - eltfp25519_1w_buffer buffer; - eltfp25519_1w x0, x1, x2; - } __aligned(32) m; - u64 *T[4]; - - T[0] = m.x0; - T[1] = c; /* x^(-1) */ - T[2] = m.x1; - T[3] = m.x2; - - copy_eltfp25519_1w(T[1], a); - sqrn_eltfp25519_1w_adx(T[1], 1); - copy_eltfp25519_1w(T[2], T[1]); - sqrn_eltfp25519_1w_adx(T[2], 2); - mul_eltfp25519_1w_adx(T[0], a, T[2]); - mul_eltfp25519_1w_adx(T[1], T[1], T[0]); - copy_eltfp25519_1w(T[2], T[1]); - sqrn_eltfp25519_1w_adx(T[2], 1); - mul_eltfp25519_1w_adx(T[0], T[0], T[2]); - copy_eltfp25519_1w(T[2], T[0]); - sqrn_eltfp25519_1w_adx(T[2], 5); - mul_eltfp25519_1w_adx(T[0], T[0], T[2]); - copy_eltfp25519_1w(T[2], T[0]); - sqrn_eltfp25519_1w_adx(T[2], 10); - mul_eltfp25519_1w_adx(T[2], T[2], T[0]); - copy_eltfp25519_1w(T[3], T[2]); - sqrn_eltfp25519_1w_adx(T[3], 20); - mul_eltfp25519_1w_adx(T[3], T[3], T[2]); - sqrn_eltfp25519_1w_adx(T[3], 10); - mul_eltfp25519_1w_adx(T[3], T[3], T[0]); - copy_eltfp25519_1w(T[0], T[3]); - sqrn_eltfp25519_1w_adx(T[0], 50); - mul_eltfp25519_1w_adx(T[0], T[0], T[3]); - copy_eltfp25519_1w(T[2], T[0]); - sqrn_eltfp25519_1w_adx(T[2], 100); - mul_eltfp25519_1w_adx(T[2], T[2], T[0]); - sqrn_eltfp25519_1w_adx(T[2], 50); - mul_eltfp25519_1w_adx(T[2], T[2], T[3]); - sqrn_eltfp25519_1w_adx(T[2], 5); - mul_eltfp25519_1w_adx(T[1], T[1], T[2]); - - memzero_explicit(&m, sizeof(m)); + u64 *x2 = nq; + u64 *z2 = nq + (u32)4U; + u64 *a = tmp1; + u64 *b = tmp1 + (u32)4U; + u64 *d = tmp1 + (u32)8U; + u64 *c = tmp1 + (u32)12U; + u64 *ab = tmp1; + u64 *dc = tmp1 + (u32)8U; + fadd(a, x2, z2); + fsub(b, x2, z2); + fsqr2(dc, ab, tmp2); + a[0U] = c[0U]; + a[1U] = c[1U]; + a[2U] = c[2U]; + a[3U] = c[3U]; + fsub(c, d, c); + fmul_scalar(b, c, (u64)121665U); + fadd(b, b, d); + fmul2(nq, dc, ab, tmp2); } -static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) +static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1) { - struct { - eltfp25519_1w_buffer buffer; - eltfp25519_1w x0, x1, x2; - } __aligned(32) m; - u64 *T[5]; - - T[0] = m.x0; - T[1] = c; /* x^(-1) */ - T[2] = m.x1; - T[3] = m.x2; - - copy_eltfp25519_1w(T[1], a); - sqrn_eltfp25519_1w_bmi2(T[1], 1); - copy_eltfp25519_1w(T[2], T[1]); - sqrn_eltfp25519_1w_bmi2(T[2], 2); - mul_eltfp25519_1w_bmi2(T[0], a, T[2]); - mul_eltfp25519_1w_bmi2(T[1], T[1], T[0]); - copy_eltfp25519_1w(T[2], T[1]); - sqrn_eltfp25519_1w_bmi2(T[2], 1); - mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]); - copy_eltfp25519_1w(T[2], T[0]); - sqrn_eltfp25519_1w_bmi2(T[2], 5); - mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]); - copy_eltfp25519_1w(T[2], T[0]); - sqrn_eltfp25519_1w_bmi2(T[2], 10); - mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]); - copy_eltfp25519_1w(T[3], T[2]); - sqrn_eltfp25519_1w_bmi2(T[3], 20); - mul_eltfp25519_1w_bmi2(T[3], T[3], T[2]); - sqrn_eltfp25519_1w_bmi2(T[3], 10); - mul_eltfp25519_1w_bmi2(T[3], T[3], T[0]); - copy_eltfp25519_1w(T[0], T[3]); - sqrn_eltfp25519_1w_bmi2(T[0], 50); - mul_eltfp25519_1w_bmi2(T[0], T[0], T[3]); - copy_eltfp25519_1w(T[2], T[0]); - sqrn_eltfp25519_1w_bmi2(T[2], 100); - mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]); - sqrn_eltfp25519_1w_bmi2(T[2], 50); - mul_eltfp25519_1w_bmi2(T[2], T[2], T[3]); - sqrn_eltfp25519_1w_bmi2(T[2], 5); - mul_eltfp25519_1w_bmi2(T[1], T[1], T[2]); - - memzero_explicit(&m, sizeof(m)); + u64 tmp2[16U] = { 0U }; + u64 p01_tmp1_swap[33U] = { 0U }; + u64 *p0 = p01_tmp1_swap; + u64 *p01 = p01_tmp1_swap; + u64 *p03 = p01; + u64 *p11 = p01 + (u32)8U; + u64 *x0; + u64 *z0; + u64 *p01_tmp1; + u64 *p01_tmp11; + u64 *nq10; + u64 *nq_p11; + u64 *swap1; + u64 sw0; + u64 *nq1; + u64 *tmp1; + memcpy(p11, init1, (u32)8U * sizeof(init1[0U])); + x0 = p03; + z0 = p03 + (u32)4U; + x0[0U] = (u64)1U; + x0[1U] = (u64)0U; + x0[2U] = (u64)0U; + x0[3U] = (u64)0U; + z0[0U] = (u64)0U; + z0[1U] = (u64)0U; + z0[2U] = (u64)0U; + z0[3U] = (u64)0U; + p01_tmp1 = p01_tmp1_swap; + p01_tmp11 = p01_tmp1_swap; + nq10 = p01_tmp1_swap; + nq_p11 = p01_tmp1_swap + (u32)8U; + swap1 = p01_tmp1_swap + (u32)32U; + cswap2((u64)1U, nq10, nq_p11); + point_add_and_double(init1, p01_tmp11, tmp2); + swap1[0U] = (u64)1U; + { + u32 i; + for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) { + u64 *p01_tmp12 = p01_tmp1_swap; + u64 *swap2 = p01_tmp1_swap + (u32)32U; + u64 *nq2 = p01_tmp12; + u64 *nq_p12 = p01_tmp12 + (u32)8U; + u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U); + u64 sw = swap2[0U] ^ bit; + cswap2(sw, nq2, nq_p12); + point_add_and_double(init1, p01_tmp12, tmp2); + swap2[0U] = bit; + } + } + sw0 = swap1[0U]; + cswap2(sw0, nq10, nq_p11); + nq1 = p01_tmp1; + tmp1 = p01_tmp1 + (u32)16U; + point_double(nq1, tmp1, tmp2); + point_double(nq1, tmp1, tmp2); + point_double(nq1, tmp1, tmp2); + memcpy(out, p0, (u32)8U * sizeof(p0[0U])); + + memzero_explicit(tmp2, sizeof(tmp2)); + memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap)); } -/* Given c, a 256-bit number, fred_eltfp25519_1w updates c - * with a number such that 0 <= C < 2**255-19. - */ -static __always_inline void fred_eltfp25519_1w(u64 *const c) +static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1) { - u64 tmp0 = 38, tmp1 = 19; - asm volatile( - "btrq $63, %3 ;" /* Put bit 255 in carry flag and clear */ - "cmovncl %k5, %k4 ;" /* c[255] ? 38 : 19 */ - - /* Add either 19 or 38 to c */ - "addq %4, %0 ;" - "adcq $0, %1 ;" - "adcq $0, %2 ;" - "adcq $0, %3 ;" - - /* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */ - "movl $0, %k4 ;" - "cmovnsl %k5, %k4 ;" /* c[255] ? 0 : 19 */ - "btrq $63, %3 ;" /* Clear bit 255 */ - - /* Subtract 19 if necessary */ - "subq %4, %0 ;" - "sbbq $0, %1 ;" - "sbbq $0, %2 ;" - "sbbq $0, %3 ;" - - : "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "+r"(tmp0), - "+r"(tmp1) - : - : "memory", "cc"); + u32 i; + fsqr(o, inp, tmp); + for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U) + fsqr(o, o, tmp); } -static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py) +static void finv(u64 *o, const u64 *i, u64 *tmp) { - u64 temp; - asm volatile( - "test %9, %9 ;" - "movq %0, %8 ;" - "cmovnzq %4, %0 ;" - "cmovnzq %8, %4 ;" - "movq %1, %8 ;" - "cmovnzq %5, %1 ;" - "cmovnzq %8, %5 ;" - "movq %2, %8 ;" - "cmovnzq %6, %2 ;" - "cmovnzq %8, %6 ;" - "movq %3, %8 ;" - "cmovnzq %7, %3 ;" - "cmovnzq %8, %7 ;" - : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]), - "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]), - "=r"(temp) - : "r"(bit) - : "cc" - ); + u64 t1[16U] = { 0U }; + u64 *a0 = t1; + u64 *b = t1 + (u32)4U; + u64 *c = t1 + (u32)8U; + u64 *t00 = t1 + (u32)12U; + u64 *tmp1 = tmp; + u64 *a; + u64 *t0; + fsquare_times(a0, i, tmp1, (u32)1U); + fsquare_times(t00, a0, tmp1, (u32)2U); + fmul(b, t00, i, tmp); + fmul(a0, b, a0, tmp); + fsquare_times(t00, a0, tmp1, (u32)1U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)5U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)10U); + fmul(c, t00, b, tmp); + fsquare_times(t00, c, tmp1, (u32)20U); + fmul(t00, t00, c, tmp); + fsquare_times(t00, t00, tmp1, (u32)10U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)50U); + fmul(c, t00, b, tmp); + fsquare_times(t00, c, tmp1, (u32)100U); + fmul(t00, t00, c, tmp); + fsquare_times(t00, t00, tmp1, (u32)50U); + fmul(t00, t00, b, tmp); + fsquare_times(t00, t00, tmp1, (u32)5U); + a = t1; + t0 = t1 + (u32)12U; + fmul(o, t0, a, tmp); } -static __always_inline void cselect(u8 bit, u64 *const px, const u64 *const py) +static void store_felem(u64 *b, u64 *f) { - asm volatile( - "test %4, %4 ;" - "cmovnzq %5, %0 ;" - "cmovnzq %6, %1 ;" - "cmovnzq %7, %2 ;" - "cmovnzq %8, %3 ;" - : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]) - : "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3]) - : "cc" - ); + u64 f30 = f[3U]; + u64 top_bit0 = f30 >> (u32)63U; + u64 carry0; + u64 f31; + u64 top_bit; + u64 carry; + u64 f0; + u64 f1; + u64 f2; + u64 f3; + u64 m0; + u64 m1; + u64 m2; + u64 m3; + u64 mask; + u64 f0_; + u64 f1_; + u64 f2_; + u64 f3_; + u64 o0; + u64 o1; + u64 o2; + u64 o3; + f[3U] = f30 & (u64)0x7fffffffffffffffU; + carry0 = add_scalar(f, f, (u64)19U * top_bit0); + f31 = f[3U]; + top_bit = f31 >> (u32)63U; + f[3U] = f31 & (u64)0x7fffffffffffffffU; + carry = add_scalar(f, f, (u64)19U * top_bit); + f0 = f[0U]; + f1 = f[1U]; + f2 = f[2U]; + f3 = f[3U]; + m0 = gte_mask(f0, (u64)0xffffffffffffffedU); + m1 = eq_mask(f1, (u64)0xffffffffffffffffU); + m2 = eq_mask(f2, (u64)0xffffffffffffffffU); + m3 = eq_mask(f3, (u64)0x7fffffffffffffffU); + mask = ((m0 & m1) & m2) & m3; + f0_ = f0 - (mask & (u64)0xffffffffffffffedU); + f1_ = f1 - (mask & (u64)0xffffffffffffffffU); + f2_ = f2 - (mask & (u64)0xffffffffffffffffU); + f3_ = f3 - (mask & (u64)0x7fffffffffffffffU); + o0 = f0_; + o1 = f1_; + o2 = f2_; + o3 = f3_; + b[0U] = o0; + b[1U] = o1; + b[2U] = o2; + b[3U] = o3; } -static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE], - const u8 private_key[CURVE25519_KEY_SIZE], - const u8 session_key[CURVE25519_KEY_SIZE]) +static void encode_point(u8 *o, const u64 *i) { - struct { - u64 buffer[4 * NUM_WORDS_ELTFP25519]; - u64 coordinates[4 * NUM_WORDS_ELTFP25519]; - u64 workspace[6 * NUM_WORDS_ELTFP25519]; - u8 session[CURVE25519_KEY_SIZE]; - u8 private[CURVE25519_KEY_SIZE]; - } __aligned(32) m; - - int i = 0, j = 0; - u64 prev = 0; - u64 *const X1 = (u64 *)m.session; - u64 *const key = (u64 *)m.private; - u64 *const Px = m.coordinates + 0; - u64 *const Pz = m.coordinates + 4; - u64 *const Qx = m.coordinates + 8; - u64 *const Qz = m.coordinates + 12; - u64 *const X2 = Qx; - u64 *const Z2 = Qz; - u64 *const X3 = Px; - u64 *const Z3 = Pz; - u64 *const X2Z2 = Qx; - u64 *const X3Z3 = Px; - - u64 *const A = m.workspace + 0; - u64 *const B = m.workspace + 4; - u64 *const D = m.workspace + 8; - u64 *const C = m.workspace + 12; - u64 *const DA = m.workspace + 16; - u64 *const CB = m.workspace + 20; - u64 *const AB = A; - u64 *const DC = D; - u64 *const DACB = DA; - - memcpy(m.private, private_key, sizeof(m.private)); - memcpy(m.session, session_key, sizeof(m.session)); - - curve25519_clamp_secret(m.private); - - /* As in the draft: - * When receiving such an array, implementations of curve25519 - * MUST mask the most-significant bit in the final byte. This - * is done to preserve compatibility with point formats which - * reserve the sign bit for use in other protocols and to - * increase resistance to implementation fingerprinting - */ - m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1; - - copy_eltfp25519_1w(Px, X1); - setzero_eltfp25519_1w(Pz); - setzero_eltfp25519_1w(Qx); - setzero_eltfp25519_1w(Qz); - - Pz[0] = 1; - Qx[0] = 1; - - /* main-loop */ - prev = 0; - j = 62; - for (i = 3; i >= 0; --i) { - while (j >= 0) { - u64 bit = (key[i] >> j) & 0x1; - u64 swap = bit ^ prev; - prev = bit; - - add_eltfp25519_1w_adx(A, X2, Z2); /* A = (X2+Z2) */ - sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */ - add_eltfp25519_1w_adx(C, X3, Z3); /* C = (X3+Z3) */ - sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */ - mul_eltfp25519_2w_adx(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */ - - cselect(swap, A, C); - cselect(swap, B, D); - - sqr_eltfp25519_2w_adx(AB); /* [AA|BB] = [A^2|B^2] */ - add_eltfp25519_1w_adx(X3, DA, CB); /* X3 = (DA+CB) */ - sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */ - sqr_eltfp25519_2w_adx(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */ - - copy_eltfp25519_1w(X2, B); /* X2 = B^2 */ - sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */ - - mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */ - add_eltfp25519_1w_adx(B, B, X2); /* B = a24*E+B */ - mul_eltfp25519_2w_adx(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */ - mul_eltfp25519_1w_adx(Z3, Z3, X1); /* Z3 = Z3*X1 */ - --j; - } - j = 63; - } - - inv_eltfp25519_1w_adx(A, Qz); - mul_eltfp25519_1w_adx((u64 *)shared, Qx, A); - fred_eltfp25519_1w((u64 *)shared); - - memzero_explicit(&m, sizeof(m)); + const u64 *x = i; + const u64 *z = i + (u32)4U; + u64 tmp[4U] = { 0U }; + u64 tmp_w[16U] = { 0U }; + finv(tmp, z, tmp_w); + fmul(tmp, tmp, x, tmp_w); + store_felem((u64 *)o, tmp); } -static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE], - const u8 private_key[CURVE25519_KEY_SIZE]) +static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub) { - struct { - u64 buffer[4 * NUM_WORDS_ELTFP25519]; - u64 coordinates[4 * NUM_WORDS_ELTFP25519]; - u64 workspace[4 * NUM_WORDS_ELTFP25519]; - u8 private[CURVE25519_KEY_SIZE]; - } __aligned(32) m; - - const int ite[4] = { 64, 64, 64, 63 }; - const int q = 3; - u64 swap = 1; - - int i = 0, j = 0, k = 0; - u64 *const key = (u64 *)m.private; - u64 *const Ur1 = m.coordinates + 0; - u64 *const Zr1 = m.coordinates + 4; - u64 *const Ur2 = m.coordinates + 8; - u64 *const Zr2 = m.coordinates + 12; - - u64 *const UZr1 = m.coordinates + 0; - u64 *const ZUr2 = m.coordinates + 8; - - u64 *const A = m.workspace + 0; - u64 *const B = m.workspace + 4; - u64 *const C = m.workspace + 8; - u64 *const D = m.workspace + 12; - - u64 *const AB = m.workspace + 0; - u64 *const CD = m.workspace + 8; - - const u64 *const P = table_ladder_8k; - - memcpy(m.private, private_key, sizeof(m.private)); - - curve25519_clamp_secret(m.private); - - setzero_eltfp25519_1w(Ur1); - setzero_eltfp25519_1w(Zr1); - setzero_eltfp25519_1w(Zr2); - Ur1[0] = 1; - Zr1[0] = 1; - Zr2[0] = 1; - - /* G-S */ - Ur2[3] = 0x1eaecdeee27cab34UL; - Ur2[2] = 0xadc7a0b9235d48e2UL; - Ur2[1] = 0xbbf095ae14b2edf8UL; - Ur2[0] = 0x7e94e1fec82faabdUL; - - /* main-loop */ - j = q; - for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) { - while (j < ite[i]) { - u64 bit = (key[i] >> j) & 0x1; - k = (64 * i + j - q); - swap = swap ^ bit; - cswap(swap, Ur1, Ur2); - cswap(swap, Zr1, Zr2); - swap = bit; - /* Addition */ - sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ - add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */ - mul_eltfp25519_1w_adx(C, &P[4 * k], B); /* C = M0-B */ - sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */ - add_eltfp25519_1w_adx(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */ - sqr_eltfp25519_2w_adx(AB); /* A = A^2 | B = B^2 */ - mul_eltfp25519_2w_adx(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */ - ++j; + u64 init1[8U] = { 0U }; + u64 tmp[4U] = { 0U }; + u64 tmp3; + u64 *x; + u64 *z; + { + u32 i; + for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) { + u64 *os = tmp; + const u8 *bj = pub + i * (u32)8U; + u64 u = *(u64 *)bj; + u64 r = u; + u64 x0 = r; + os[i] = x0; } - j = 0; } - - /* Doubling */ - for (i = 0; i < q; ++i) { - add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */ - sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ - sqr_eltfp25519_2w_adx(AB); /* A = A**2 B = B**2 */ - copy_eltfp25519_1w(C, B); /* C = B */ - sub_eltfp25519_1w(B, A, B); /* B = A-B */ - mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */ - add_eltfp25519_1w_adx(D, D, C); /* D = D+C */ - mul_eltfp25519_2w_adx(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */ - } - - /* Convert to affine coordinates */ - inv_eltfp25519_1w_adx(A, Zr1); - mul_eltfp25519_1w_adx((u64 *)session_key, Ur1, A); - fred_eltfp25519_1w((u64 *)session_key); - - memzero_explicit(&m, sizeof(m)); + tmp3 = tmp[3U]; + tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU; + x = init1; + z = init1 + (u32)4U; + z[0U] = (u64)1U; + z[1U] = (u64)0U; + z[2U] = (u64)0U; + z[3U] = (u64)0U; + x[0U] = tmp[0U]; + x[1U] = tmp[1U]; + x[2U] = tmp[2U]; + x[3U] = tmp[3U]; + montgomery_ladder(init1, priv, init1); + encode_point(out, init1); } -static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE], - const u8 private_key[CURVE25519_KEY_SIZE], - const u8 session_key[CURVE25519_KEY_SIZE]) -{ - struct { - u64 buffer[4 * NUM_WORDS_ELTFP25519]; - u64 coordinates[4 * NUM_WORDS_ELTFP25519]; - u64 workspace[6 * NUM_WORDS_ELTFP25519]; - u8 session[CURVE25519_KEY_SIZE]; - u8 private[CURVE25519_KEY_SIZE]; - } __aligned(32) m; - - int i = 0, j = 0; - u64 prev = 0; - u64 *const X1 = (u64 *)m.session; - u64 *const key = (u64 *)m.private; - u64 *const Px = m.coordinates + 0; - u64 *const Pz = m.coordinates + 4; - u64 *const Qx = m.coordinates + 8; - u64 *const Qz = m.coordinates + 12; - u64 *const X2 = Qx; - u64 *const Z2 = Qz; - u64 *const X3 = Px; - u64 *const Z3 = Pz; - u64 *const X2Z2 = Qx; - u64 *const X3Z3 = Px; - - u64 *const A = m.workspace + 0; - u64 *const B = m.workspace + 4; - u64 *const D = m.workspace + 8; - u64 *const C = m.workspace + 12; - u64 *const DA = m.workspace + 16; - u64 *const CB = m.workspace + 20; - u64 *const AB = A; - u64 *const DC = D; - u64 *const DACB = DA; - - memcpy(m.private, private_key, sizeof(m.private)); - memcpy(m.session, session_key, sizeof(m.session)); - - curve25519_clamp_secret(m.private); - - /* As in the draft: - * When receiving such an array, implementations of curve25519 - * MUST mask the most-significant bit in the final byte. This - * is done to preserve compatibility with point formats which - * reserve the sign bit for use in other protocols and to - * increase resistance to implementation fingerprinting - */ - m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1; - - copy_eltfp25519_1w(Px, X1); - setzero_eltfp25519_1w(Pz); - setzero_eltfp25519_1w(Qx); - setzero_eltfp25519_1w(Qz); - - Pz[0] = 1; - Qx[0] = 1; - - /* main-loop */ - prev = 0; - j = 62; - for (i = 3; i >= 0; --i) { - while (j >= 0) { - u64 bit = (key[i] >> j) & 0x1; - u64 swap = bit ^ prev; - prev = bit; - - add_eltfp25519_1w_bmi2(A, X2, Z2); /* A = (X2+Z2) */ - sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */ - add_eltfp25519_1w_bmi2(C, X3, Z3); /* C = (X3+Z3) */ - sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */ - mul_eltfp25519_2w_bmi2(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */ - - cselect(swap, A, C); - cselect(swap, B, D); - - sqr_eltfp25519_2w_bmi2(AB); /* [AA|BB] = [A^2|B^2] */ - add_eltfp25519_1w_bmi2(X3, DA, CB); /* X3 = (DA+CB) */ - sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */ - sqr_eltfp25519_2w_bmi2(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */ - - copy_eltfp25519_1w(X2, B); /* X2 = B^2 */ - sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */ - - mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */ - add_eltfp25519_1w_bmi2(B, B, X2); /* B = a24*E+B */ - mul_eltfp25519_2w_bmi2(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */ - mul_eltfp25519_1w_bmi2(Z3, Z3, X1); /* Z3 = Z3*X1 */ - --j; - } - j = 63; - } - - inv_eltfp25519_1w_bmi2(A, Qz); - mul_eltfp25519_1w_bmi2((u64 *)shared, Qx, A); - fred_eltfp25519_1w((u64 *)shared); +/* The below constants were generated using this sage script: + * + * #!/usr/bin/env sage + * import sys + * from sage.all import * + * def limbs(n): + * n = int(n) + * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64) + * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l + * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0]) + * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0] + * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s)) + * print("static const u64 table_ladder[] = {") + * p = ec.lift_x(9) + * for i in range(252): + * l = (p[0] + p[2]) / (p[0] - p[2]) + * print(("\t%s" + ("," if i != 251 else "")) % limbs(l)) + * p = p * 2 + * print("};") + * + */ - memzero_explicit(&m, sizeof(m)); -} +static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL }; + +static const u64 table_ladder[] = { + 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL, + 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL, + 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL, + 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL, + 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL, + 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL, + 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL, + 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL, + 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL, + 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL, + 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL, + 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL, + 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL, + 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL, + 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL, + 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL, + 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL, + 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL, + 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL, + 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL, + 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL, + 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL, + 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL, + 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL, + 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL, + 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL, + 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL, + 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL, + 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL, + 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL, + 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL, + 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL, + 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL, + 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL, + 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL, + 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL, + 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL, + 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL, + 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL, + 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL, + 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL, + 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL, + 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL, + 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL, + 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL, + 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL, + 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL, + 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL, + 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL, + 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL, + 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL, + 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL, + 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL, + 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL, + 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL, + 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL, + 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL, + 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL, + 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL, + 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL, + 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL, + 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL, + 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL, + 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL, + 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL, + 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL, + 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL, + 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL, + 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL, + 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL, + 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL, + 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL, + 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL, + 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL, + 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL, + 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL, + 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL, + 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL, + 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL, + 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL, + 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL, + 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL, + 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL, + 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL, + 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL, + 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL, + 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL, + 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL, + 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL, + 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL, + 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL, + 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL, + 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL, + 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL, + 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL, + 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL, + 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL, + 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL, + 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL, + 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL, + 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL, + 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL, + 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL, + 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL, + 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL, + 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL, + 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL, + 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL, + 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL, + 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL, + 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL, + 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL, + 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL, + 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL, + 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL, + 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL, + 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL, + 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL, + 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL, + 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL, + 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL, + 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL, + 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL, + 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL, + 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL, + 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL, + 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL, + 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL, + 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL, + 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL, + 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL, + 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL, + 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL, + 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL, + 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL, + 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL, + 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL, + 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL, + 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL, + 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL, + 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL, + 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL, + 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL, + 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL, + 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL, + 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL, + 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL, + 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL, + 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL, + 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL, + 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL, + 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL, + 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL, + 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL, + 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL, + 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL, + 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL, + 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL, + 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL, + 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL, + 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL, + 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL, + 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL, + 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL, + 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL, + 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL, + 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL, + 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL, + 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL, + 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL, + 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL, + 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL, + 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL, + 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL, + 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL, + 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL, + 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL, + 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL, + 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL, + 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL, + 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL, + 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL, + 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL, + 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL, + 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL, + 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL, + 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL, + 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL, + 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL, + 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL, + 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL, + 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL, + 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL, + 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL, + 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL, + 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL, + 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL, + 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL, + 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL, + 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL, + 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL, + 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL, + 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL, + 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL, + 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL, + 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL, + 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL, + 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL, + 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL, + 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL, + 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL, + 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL, + 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL, + 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL, + 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL, + 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL, + 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL, + 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL, + 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL, + 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL, + 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL, + 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL, + 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL, + 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL, + 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL, + 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL, + 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL, + 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL, + 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL, + 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL, + 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL, + 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL, + 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL, + 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL, + 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL, + 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL, + 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL, + 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL, + 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL, + 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL, + 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL, + 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL, + 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL, + 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL, + 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL, + 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL, + 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL, + 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL, + 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL, + 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL, + 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL, + 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL +}; -static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE], - const u8 private_key[CURVE25519_KEY_SIZE]) +static void curve25519_ever64_base(u8 *out, const u8 *priv) { - struct { - u64 buffer[4 * NUM_WORDS_ELTFP25519]; - u64 coordinates[4 * NUM_WORDS_ELTFP25519]; - u64 workspace[4 * NUM_WORDS_ELTFP25519]; - u8 private[CURVE25519_KEY_SIZE]; - } __aligned(32) m; - - const int ite[4] = { 64, 64, 64, 63 }; - const int q = 3; u64 swap = 1; - - int i = 0, j = 0, k = 0; - u64 *const key = (u64 *)m.private; - u64 *const Ur1 = m.coordinates + 0; - u64 *const Zr1 = m.coordinates + 4; - u64 *const Ur2 = m.coordinates + 8; - u64 *const Zr2 = m.coordinates + 12; - - u64 *const UZr1 = m.coordinates + 0; - u64 *const ZUr2 = m.coordinates + 8; - - u64 *const A = m.workspace + 0; - u64 *const B = m.workspace + 4; - u64 *const C = m.workspace + 8; - u64 *const D = m.workspace + 12; - - u64 *const AB = m.workspace + 0; - u64 *const CD = m.workspace + 8; - - const u64 *const P = table_ladder_8k; - - memcpy(m.private, private_key, sizeof(m.private)); - - curve25519_clamp_secret(m.private); - - setzero_eltfp25519_1w(Ur1); - setzero_eltfp25519_1w(Zr1); - setzero_eltfp25519_1w(Zr2); - Ur1[0] = 1; - Zr1[0] = 1; - Zr2[0] = 1; - - /* G-S */ - Ur2[3] = 0x1eaecdeee27cab34UL; - Ur2[2] = 0xadc7a0b9235d48e2UL; - Ur2[1] = 0xbbf095ae14b2edf8UL; - Ur2[0] = 0x7e94e1fec82faabdUL; - - /* main-loop */ - j = q; - for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) { - while (j < ite[i]) { - u64 bit = (key[i] >> j) & 0x1; - k = (64 * i + j - q); + int i, j, k; + u64 tmp[16 + 32 + 4]; + u64 *x1 = &tmp[0]; + u64 *z1 = &tmp[4]; + u64 *x2 = &tmp[8]; + u64 *z2 = &tmp[12]; + u64 *xz1 = &tmp[0]; + u64 *xz2 = &tmp[8]; + u64 *a = &tmp[0 + 16]; + u64 *b = &tmp[4 + 16]; + u64 *c = &tmp[8 + 16]; + u64 *ab = &tmp[0 + 16]; + u64 *abcd = &tmp[0 + 16]; + u64 *ef = &tmp[16 + 16]; + u64 *efgh = &tmp[16 + 16]; + u64 *key = &tmp[0 + 16 + 32]; + + memcpy(key, priv, 32); + ((u8 *)key)[0] &= 248; + ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64; + + x1[0] = 1, x1[1] = x1[2] = x1[3] = 0; + z1[0] = 1, z1[1] = z1[2] = z1[3] = 0; + z2[0] = 1, z2[1] = z2[2] = z2[3] = 0; + memcpy(x2, p_minus_s, sizeof(p_minus_s)); + + j = 3; + for (i = 0; i < 4; ++i) { + while (j < (const int[]){ 64, 64, 64, 63 }[i]) { + u64 bit = (key[i] >> j) & 1; + k = (64 * i + j - 3); swap = swap ^ bit; - cswap(swap, Ur1, Ur2); - cswap(swap, Zr1, Zr2); + cswap2(swap, xz1, xz2); swap = bit; - /* Addition */ - sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ - add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */ - mul_eltfp25519_1w_bmi2(C, &P[4 * k], B);/* C = M0-B */ - sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */ - add_eltfp25519_1w_bmi2(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */ - sqr_eltfp25519_2w_bmi2(AB); /* A = A^2 | B = B^2 */ - mul_eltfp25519_2w_bmi2(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */ + fsub(b, x1, z1); + fadd(a, x1, z1); + fmul(c, &table_ladder[4 * k], b, ef); + fsub(b, a, c); + fadd(a, a, c); + fsqr2(ab, ab, efgh); + fmul2(xz1, xz2, ab, efgh); ++j; } j = 0; } - /* Doubling */ - for (i = 0; i < q; ++i) { - add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */ - sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ - sqr_eltfp25519_2w_bmi2(AB); /* A = A**2 B = B**2 */ - copy_eltfp25519_1w(C, B); /* C = B */ - sub_eltfp25519_1w(B, A, B); /* B = A-B */ - mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */ - add_eltfp25519_1w_bmi2(D, D, C); /* D = D+C */ - mul_eltfp25519_2w_bmi2(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */ - } + point_double(xz1, abcd, efgh); + point_double(xz1, abcd, efgh); + point_double(xz1, abcd, efgh); + encode_point(out, xz1); - /* Convert to affine coordinates */ - inv_eltfp25519_1w_bmi2(A, Zr1); - mul_eltfp25519_1w_bmi2((u64 *)session_key, Ur1, A); - fred_eltfp25519_1w((u64 *)session_key); - - memzero_explicit(&m, sizeof(m)); + memzero_explicit(tmp, sizeof(tmp)); } +static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2_adx); + void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE], const u8 basepoint[CURVE25519_KEY_SIZE]) { - if (static_branch_likely(&curve25519_use_adx)) - curve25519_adx(mypublic, secret, basepoint); - else if (static_branch_likely(&curve25519_use_bmi2)) - curve25519_bmi2(mypublic, secret, basepoint); + if (static_branch_likely(&curve25519_use_bmi2_adx)) + curve25519_ever64(mypublic, secret, basepoint); else curve25519_generic(mypublic, secret, basepoint); } @@ -2355,10 +1395,8 @@ EXPORT_SYMBOL(curve25519_arch); void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE]) { - if (static_branch_likely(&curve25519_use_adx)) - curve25519_adx_base(pub, secret); - else if (static_branch_likely(&curve25519_use_bmi2)) - curve25519_bmi2_base(pub, secret); + if (static_branch_likely(&curve25519_use_bmi2_adx)) + curve25519_ever64_base(pub, secret); else curve25519_generic(pub, secret, curve25519_base_point); } @@ -2449,12 +1487,11 @@ static struct kpp_alg curve25519_alg = { .max_size = curve25519_max_size, }; + static int __init curve25519_mod_init(void) { - if (boot_cpu_has(X86_FEATURE_BMI2)) - static_branch_enable(&curve25519_use_bmi2); - else if (boot_cpu_has(X86_FEATURE_ADX)) - static_branch_enable(&curve25519_use_adx); + if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX)) + static_branch_enable(&curve25519_use_bmi2_adx); else return 0; return IS_REACHABLE(CONFIG_CRYPTO_KPP) ? @@ -2474,3 +1511,4 @@ module_exit(curve25519_mod_exit); MODULE_ALIAS_CRYPTO("curve25519"); MODULE_ALIAS_CRYPTO("curve25519-x86"); MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 439367a8e95c..b1cd3535c525 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -821,8 +821,8 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size, struct af_alg_tsgl *sgl; struct af_alg_control con = {}; long copied = 0; - bool enc = 0; - bool init = 0; + bool enc = false; + bool init = false; int err = 0; if (msg->msg_controllen) { @@ -830,13 +830,13 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size, if (err) return err; - init = 1; + init = true; switch (con.op) { case ALG_OP_ENCRYPT: - enc = 1; + enc = true; break; case ALG_OP_DECRYPT: - enc = 0; + enc = false; break; default: return -EINVAL; diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 178f4cd75ef1..da1ffa4f7f8d 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -83,7 +83,7 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg, goto unlock; } - ctx->more = 0; + ctx->more = false; while (msg_data_left(msg)) { int len = msg_data_left(msg); @@ -211,7 +211,7 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, } if (!result || ctx->more) { - ctx->more = 0; + ctx->more = false; err = crypto_wait_req(crypto_ahash_final(&ctx->req), &ctx->wait); if (err) @@ -436,7 +436,7 @@ static int hash_accept_parent_nokey(void *private, struct sock *sk) ctx->result = NULL; ctx->len = len; - ctx->more = 0; + ctx->more = false; crypto_init_wait(&ctx->wait); ask->private = ctx; diff --git a/crypto/authencesn.c b/crypto/authencesn.c index 589008146fce..149b70df2a91 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -458,7 +458,7 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl, inst->alg.encrypt = crypto_authenc_esn_encrypt; inst->alg.decrypt = crypto_authenc_esn_decrypt; - inst->free = crypto_authenc_esn_free, + inst->free = crypto_authenc_esn_free; err = aead_register_instance(tmpl, inst); if (err) { diff --git a/crypto/ccm.c b/crypto/ccm.c index 241ecdc5c4e0..d1fb01bbc814 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -717,7 +717,6 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl, struct aead_instance *inst; struct crypto_aead_spawn *spawn; struct aead_alg *alg; - const char *ccm_name; int err; algt = crypto_get_attr_type(tb); @@ -729,19 +728,15 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl, mask = crypto_requires_sync(algt->type, algt->mask); - ccm_name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(ccm_name)) - return PTR_ERR(ccm_name); - inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return -ENOMEM; spawn = aead_instance_ctx(inst); err = crypto_grab_aead(spawn, aead_crypto_instance(inst), - ccm_name, 0, mask); + crypto_attr_alg_name(tb[1]), 0, mask); if (err) - goto out_free_inst; + goto err_free_inst; alg = crypto_spawn_aead_alg(spawn); @@ -749,11 +744,11 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl, /* We only support 16-byte blocks. */ if (crypto_aead_alg_ivsize(alg) != 16) - goto out_drop_alg; + goto err_free_inst; /* Not a stream cipher? */ if (alg->base.cra_blocksize != 1) - goto out_drop_alg; + goto err_free_inst; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, @@ -762,7 +757,7 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl, snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "rfc4309(%s)", alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) - goto out_drop_alg; + goto err_free_inst; inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = alg->base.cra_priority; @@ -786,17 +781,11 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl, inst->free = crypto_rfc4309_free; err = aead_register_instance(tmpl, inst); - if (err) - goto out_drop_alg; - -out: + if (err) { +err_free_inst: + crypto_rfc4309_free(inst); + } return err; - -out_drop_alg: - crypto_drop_aead(spawn); -out_free_inst: - kfree(inst); - goto out; } static int crypto_cbcmac_digest_setkey(struct crypto_shash *parent, diff --git a/crypto/cryptd.c b/crypto/cryptd.c index d94c75c840a5..283212262adb 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -369,7 +369,6 @@ static int cryptd_create_skcipher(struct crypto_template *tmpl, struct skcipherd_instance_ctx *ctx; struct skcipher_instance *inst; struct skcipher_alg *alg; - const char *name; u32 type; u32 mask; int err; @@ -379,10 +378,6 @@ static int cryptd_create_skcipher(struct crypto_template *tmpl, cryptd_check_internal(tb, &type, &mask); - name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(name)) - return PTR_ERR(name); - inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; @@ -391,14 +386,14 @@ static int cryptd_create_skcipher(struct crypto_template *tmpl, ctx->queue = queue; err = crypto_grab_skcipher(&ctx->spawn, skcipher_crypto_instance(inst), - name, type, mask); + crypto_attr_alg_name(tb[1]), type, mask); if (err) - goto out_free_inst; + goto err_free_inst; alg = crypto_spawn_skcipher_alg(&ctx->spawn); err = cryptd_init_instance(skcipher_crypto_instance(inst), &alg->base); if (err) - goto out_drop_skcipher; + goto err_free_inst; inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC | (alg->base.cra_flags & CRYPTO_ALG_INTERNAL); @@ -421,10 +416,8 @@ static int cryptd_create_skcipher(struct crypto_template *tmpl, err = skcipher_register_instance(tmpl, inst); if (err) { -out_drop_skcipher: - crypto_drop_skcipher(&ctx->spawn); -out_free_inst: - kfree(inst); +err_free_inst: + cryptd_skcipher_free(inst); } return err; } @@ -694,8 +687,7 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, err = ahash_register_instance(tmpl, inst); if (err) { err_free_inst: - crypto_drop_shash(&ctx->spawn); - kfree(inst); + cryptd_hash_free(inst); } return err; } @@ -833,17 +825,12 @@ static int cryptd_create_aead(struct crypto_template *tmpl, struct aead_instance_ctx *ctx; struct aead_instance *inst; struct aead_alg *alg; - const char *name; u32 type = 0; u32 mask = CRYPTO_ALG_ASYNC; int err; cryptd_check_internal(tb, &type, &mask); - name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(name)) - return PTR_ERR(name); - inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; @@ -852,14 +839,14 @@ static int cryptd_create_aead(struct crypto_template *tmpl, ctx->queue = queue; err = crypto_grab_aead(&ctx->aead_spawn, aead_crypto_instance(inst), - name, type, mask); + crypto_attr_alg_name(tb[1]), type, mask); if (err) - goto out_free_inst; + goto err_free_inst; alg = crypto_spawn_aead_alg(&ctx->aead_spawn); err = cryptd_init_instance(aead_crypto_instance(inst), &alg->base); if (err) - goto out_drop_aead; + goto err_free_inst; inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC | (alg->base.cra_flags & CRYPTO_ALG_INTERNAL); @@ -879,10 +866,8 @@ static int cryptd_create_aead(struct crypto_template *tmpl, err = aead_register_instance(tmpl, inst); if (err) { -out_drop_aead: - crypto_drop_aead(&ctx->aead_spawn); -out_free_inst: - kfree(inst); +err_free_inst: + cryptd_aead_free(inst); } return err; } diff --git a/crypto/ctr.c b/crypto/ctr.c index a8feab621c6c..31ac4ae598e1 100644 --- a/crypto/ctr.c +++ b/crypto/ctr.c @@ -260,7 +260,6 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl, struct skcipher_instance *inst; struct skcipher_alg *alg; struct crypto_skcipher_spawn *spawn; - const char *cipher_name; u32 mask; int err; @@ -272,10 +271,6 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl, if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask) return -EINVAL; - cipher_name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(cipher_name)) - return PTR_ERR(cipher_name); - inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return -ENOMEM; @@ -287,7 +282,7 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl, spawn = skcipher_instance_ctx(inst); err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst), - cipher_name, 0, mask); + crypto_attr_alg_name(tb[1]), 0, mask); if (err) goto err_free_inst; @@ -296,20 +291,20 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl, /* We only support 16-byte blocks. */ err = -EINVAL; if (crypto_skcipher_alg_ivsize(alg) != CTR_RFC3686_BLOCK_SIZE) - goto err_drop_spawn; + goto err_free_inst; /* Not a stream cipher? */ if (alg->base.cra_blocksize != 1) - goto err_drop_spawn; + goto err_free_inst; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "rfc3686(%s)", alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME) - goto err_drop_spawn; + goto err_free_inst; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "rfc3686(%s)", alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) - goto err_drop_spawn; + goto err_free_inst; inst->alg.base.cra_priority = alg->base.cra_priority; inst->alg.base.cra_blocksize = 1; @@ -336,17 +331,11 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl, inst->free = crypto_rfc3686_free; err = skcipher_register_instance(tmpl, inst); - if (err) - goto err_drop_spawn; - -out: - return err; - -err_drop_spawn: - crypto_drop_skcipher(spawn); + if (err) { err_free_inst: - kfree(inst); - goto out; + crypto_rfc3686_free(inst); + } + return err; } static struct crypto_template crypto_ctr_tmpls[] = { diff --git a/crypto/cts.c b/crypto/cts.c index 48188adc8e91..5e005c4f0221 100644 --- a/crypto/cts.c +++ b/crypto/cts.c @@ -327,7 +327,6 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb) struct skcipher_instance *inst; struct crypto_attr_type *algt; struct skcipher_alg *alg; - const char *cipher_name; u32 mask; int err; @@ -340,10 +339,6 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb) mask = crypto_requires_sync(algt->type, algt->mask); - cipher_name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(cipher_name)) - return PTR_ERR(cipher_name); - inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return -ENOMEM; @@ -351,7 +346,7 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb) spawn = skcipher_instance_ctx(inst); err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst), - cipher_name, 0, mask); + crypto_attr_alg_name(tb[1]), 0, mask); if (err) goto err_free_inst; @@ -359,15 +354,15 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb) err = -EINVAL; if (crypto_skcipher_alg_ivsize(alg) != alg->base.cra_blocksize) - goto err_drop_spawn; + goto err_free_inst; if (strncmp(alg->base.cra_name, "cbc(", 4)) - goto err_drop_spawn; + goto err_free_inst; err = crypto_inst_setname(skcipher_crypto_instance(inst), "cts", &alg->base); if (err) - goto err_drop_spawn; + goto err_free_inst; inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = alg->base.cra_priority; @@ -391,17 +386,11 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb) inst->free = crypto_cts_free; err = skcipher_register_instance(tmpl, inst); - if (err) - goto err_drop_spawn; - -out: - return err; - -err_drop_spawn: - crypto_drop_skcipher(spawn); + if (err) { err_free_inst: - kfree(inst); - goto out; + crypto_cts_free(inst); + } + return err; } static struct crypto_template crypto_cts_tmpl = { diff --git a/crypto/gcm.c b/crypto/gcm.c index 8e5c0ac65661..0103d28c541e 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -840,7 +840,6 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl, struct aead_instance *inst; struct crypto_aead_spawn *spawn; struct aead_alg *alg; - const char *ccm_name; int err; algt = crypto_get_attr_type(tb); @@ -852,19 +851,15 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl, mask = crypto_requires_sync(algt->type, algt->mask); - ccm_name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(ccm_name)) - return PTR_ERR(ccm_name); - inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return -ENOMEM; spawn = aead_instance_ctx(inst); err = crypto_grab_aead(spawn, aead_crypto_instance(inst), - ccm_name, 0, mask); + crypto_attr_alg_name(tb[1]), 0, mask); if (err) - goto out_free_inst; + goto err_free_inst; alg = crypto_spawn_aead_alg(spawn); @@ -872,11 +867,11 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl, /* Underlying IV size must be 12. */ if (crypto_aead_alg_ivsize(alg) != GCM_AES_IV_SIZE) - goto out_drop_alg; + goto err_free_inst; /* Not a stream cipher? */ if (alg->base.cra_blocksize != 1) - goto out_drop_alg; + goto err_free_inst; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, @@ -885,7 +880,7 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl, snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "rfc4106(%s)", alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) - goto out_drop_alg; + goto err_free_inst; inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = alg->base.cra_priority; @@ -909,17 +904,11 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl, inst->free = crypto_rfc4106_free; err = aead_register_instance(tmpl, inst); - if (err) - goto out_drop_alg; - -out: + if (err) { +err_free_inst: + crypto_rfc4106_free(inst); + } return err; - -out_drop_alg: - crypto_drop_aead(spawn); -out_free_inst: - kfree(inst); - goto out; } static int crypto_rfc4543_setkey(struct crypto_aead *parent, const u8 *key, @@ -1071,10 +1060,8 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl, struct crypto_attr_type *algt; u32 mask; struct aead_instance *inst; - struct crypto_aead_spawn *spawn; struct aead_alg *alg; struct crypto_rfc4543_instance_ctx *ctx; - const char *ccm_name; int err; algt = crypto_get_attr_type(tb); @@ -1086,32 +1073,27 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl, mask = crypto_requires_sync(algt->type, algt->mask); - ccm_name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(ccm_name)) - return PTR_ERR(ccm_name); - inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; ctx = aead_instance_ctx(inst); - spawn = &ctx->aead; - err = crypto_grab_aead(spawn, aead_crypto_instance(inst), - ccm_name, 0, mask); + err = crypto_grab_aead(&ctx->aead, aead_crypto_instance(inst), + crypto_attr_alg_name(tb[1]), 0, mask); if (err) - goto out_free_inst; + goto err_free_inst; - alg = crypto_spawn_aead_alg(spawn); + alg = crypto_spawn_aead_alg(&ctx->aead); err = -EINVAL; /* Underlying IV size must be 12. */ if (crypto_aead_alg_ivsize(alg) != GCM_AES_IV_SIZE) - goto out_drop_alg; + goto err_free_inst; /* Not a stream cipher? */ if (alg->base.cra_blocksize != 1) - goto out_drop_alg; + goto err_free_inst; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, @@ -1120,7 +1102,7 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl, snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "rfc4543(%s)", alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) - goto out_drop_alg; + goto err_free_inst; inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = alg->base.cra_priority; @@ -1141,20 +1123,14 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl, inst->alg.encrypt = crypto_rfc4543_encrypt; inst->alg.decrypt = crypto_rfc4543_decrypt; - inst->free = crypto_rfc4543_free, + inst->free = crypto_rfc4543_free; err = aead_register_instance(tmpl, inst); - if (err) - goto out_drop_alg; - -out: + if (err) { +err_free_inst: + crypto_rfc4543_free(inst); + } return err; - -out_drop_alg: - crypto_drop_aead(spawn); -out_free_inst: - kfree(inst); - goto out; } static struct crypto_template crypto_gcm_tmpls[] = { diff --git a/crypto/geniv.c b/crypto/geniv.c index dbcc640274cd..6a90c52d49ad 100644 --- a/crypto/geniv.c +++ b/crypto/geniv.c @@ -41,7 +41,6 @@ static void aead_geniv_free(struct aead_instance *inst) struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl, struct rtattr **tb, u32 type, u32 mask) { - const char *name; struct crypto_aead_spawn *spawn; struct crypto_attr_type *algt; struct aead_instance *inst; @@ -57,10 +56,6 @@ struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl, if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask) return ERR_PTR(-EINVAL); - name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(name)) - return ERR_CAST(name); - inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return ERR_PTR(-ENOMEM); @@ -71,7 +66,7 @@ struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl, mask |= crypto_requires_sync(algt->type, algt->mask); err = crypto_grab_aead(spawn, aead_crypto_instance(inst), - name, type, mask); + crypto_attr_alg_name(tb[1]), type, mask); if (err) goto err_free_inst; @@ -82,17 +77,17 @@ struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl, err = -EINVAL; if (ivsize < sizeof(u64)) - goto err_drop_alg; + goto err_free_inst; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", tmpl->name, alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME) - goto err_drop_alg; + goto err_free_inst; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", tmpl->name, alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) - goto err_drop_alg; + goto err_free_inst; inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = alg->base.cra_priority; @@ -111,10 +106,8 @@ struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl, out: return inst; -err_drop_alg: - crypto_drop_aead(spawn); err_free_inst: - kfree(inst); + aead_geniv_free(inst); inst = ERR_PTR(err); goto out; } diff --git a/crypto/lrw.c b/crypto/lrw.c index 63c485c0d8a6..376d7ed3f1f8 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -343,15 +343,15 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) err = -EINVAL; if (alg->base.cra_blocksize != LRW_BLOCK_SIZE) - goto err_drop_spawn; + goto err_free_inst; if (crypto_skcipher_alg_ivsize(alg)) - goto err_drop_spawn; + goto err_free_inst; err = crypto_inst_setname(skcipher_crypto_instance(inst), "lrw", &alg->base); if (err) - goto err_drop_spawn; + goto err_free_inst; err = -EINVAL; cipher_name = alg->base.cra_name; @@ -364,20 +364,20 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) len = strlcpy(ecb_name, cipher_name + 4, sizeof(ecb_name)); if (len < 2 || len >= sizeof(ecb_name)) - goto err_drop_spawn; + goto err_free_inst; if (ecb_name[len - 1] != ')') - goto err_drop_spawn; + goto err_free_inst; ecb_name[len - 1] = 0; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "lrw(%s)", ecb_name) >= CRYPTO_MAX_ALG_NAME) { err = -ENAMETOOLONG; - goto err_drop_spawn; + goto err_free_inst; } } else - goto err_drop_spawn; + goto err_free_inst; inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = alg->base.cra_priority; @@ -403,17 +403,11 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) inst->free = free; err = skcipher_register_instance(tmpl, inst); - if (err) - goto err_drop_spawn; - -out: - return err; - -err_drop_spawn: - crypto_drop_skcipher(spawn); + if (err) { err_free_inst: - kfree(inst); - goto out; + free(inst); + } + return err; } static struct crypto_template crypto_tmpl = { diff --git a/crypto/md5.c b/crypto/md5.c index 22dc60bc0437..72c0c46fb5ee 100644 --- a/crypto/md5.c +++ b/crypto/md5.c @@ -23,9 +23,6 @@ #include <linux/types.h> #include <asm/byteorder.h> -#define MD5_DIGEST_WORDS 4 -#define MD5_MESSAGE_BYTES 64 - const u8 md5_zero_message_hash[MD5_DIGEST_SIZE] = { 0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04, 0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e, diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index 1b632139a8c1..8bddc65cd509 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -232,17 +232,12 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb, struct crypto_attr_type *algt; struct aead_instance *inst; struct aead_alg *alg; - const char *name; int err; algt = crypto_get_attr_type(tb); if (IS_ERR(algt)) return PTR_ERR(algt); - name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(name)) - return PTR_ERR(name); - inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; @@ -252,21 +247,21 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb, ctx = aead_instance_ctx(inst); ctx->psenc = padata_alloc_shell(pencrypt); if (!ctx->psenc) - goto out_free_inst; + goto err_free_inst; ctx->psdec = padata_alloc_shell(pdecrypt); if (!ctx->psdec) - goto out_free_psenc; + goto err_free_inst; err = crypto_grab_aead(&ctx->spawn, aead_crypto_instance(inst), - name, 0, 0); + crypto_attr_alg_name(tb[1]), 0, 0); if (err) - goto out_free_psdec; + goto err_free_inst; alg = crypto_spawn_aead_alg(&ctx->spawn); err = pcrypt_init_instance(aead_crypto_instance(inst), &alg->base); if (err) - goto out_drop_aead; + goto err_free_inst; inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC; @@ -286,21 +281,11 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb, inst->free = pcrypt_free; err = aead_register_instance(tmpl, inst); - if (err) - goto out_drop_aead; - -out: + if (err) { +err_free_inst: + pcrypt_free(inst); + } return err; - -out_drop_aead: - crypto_drop_aead(&ctx->spawn); -out_free_psdec: - padata_free_shell(ctx->psdec); -out_free_psenc: - padata_free_shell(ctx->psenc); -out_free_inst: - kfree(inst); - goto out; } static int pcrypt_create(struct crypto_template *tmpl, struct rtattr **tb) diff --git a/crypto/proc.c b/crypto/proc.c index 7b91557adccb..08d8c2bc7e62 100644 --- a/crypto/proc.c +++ b/crypto/proc.c @@ -60,7 +60,7 @@ static int c_show(struct seq_file *m, void *p) goto out; } - switch (alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL)) { + switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_CIPHER: seq_printf(m, "type : cipher\n"); seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); diff --git a/crypto/rng.c b/crypto/rng.c index 1e21231f71c9..1490d210f1a1 100644 --- a/crypto/rng.c +++ b/crypto/rng.c @@ -37,12 +37,16 @@ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) crypto_stats_get(alg); if (!seed && slen) { buf = kmalloc(slen, GFP_KERNEL); - if (!buf) + if (!buf) { + crypto_alg_put(alg); return -ENOMEM; + } err = get_random_bytes_wait(buf, slen); - if (err) + if (err) { + crypto_alg_put(alg); goto out; + } seed = buf; } diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 176b63afec8d..d31031de51bc 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -596,14 +596,11 @@ static void pkcs1pad_free(struct akcipher_instance *inst) static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) { - const struct rsa_asn1_template *digest_info; struct crypto_attr_type *algt; u32 mask; struct akcipher_instance *inst; struct pkcs1pad_inst_ctx *ctx; - struct crypto_akcipher_spawn *spawn; struct akcipher_alg *rsa_alg; - const char *rsa_alg_name; const char *hash_name; int err; @@ -616,60 +613,49 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) mask = crypto_requires_sync(algt->type, algt->mask); - rsa_alg_name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(rsa_alg_name)) - return PTR_ERR(rsa_alg_name); - - hash_name = crypto_attr_alg_name(tb[2]); - if (IS_ERR(hash_name)) - hash_name = NULL; - - if (hash_name) { - digest_info = rsa_lookup_asn1(hash_name); - if (!digest_info) - return -EINVAL; - } else - digest_info = NULL; - inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; ctx = akcipher_instance_ctx(inst); - spawn = &ctx->spawn; - ctx->digest_info = digest_info; - err = crypto_grab_akcipher(spawn, akcipher_crypto_instance(inst), - rsa_alg_name, 0, mask); + err = crypto_grab_akcipher(&ctx->spawn, akcipher_crypto_instance(inst), + crypto_attr_alg_name(tb[1]), 0, mask); if (err) - goto out_free_inst; + goto err_free_inst; - rsa_alg = crypto_spawn_akcipher_alg(spawn); + rsa_alg = crypto_spawn_akcipher_alg(&ctx->spawn); err = -ENAMETOOLONG; - - if (!hash_name) { + hash_name = crypto_attr_alg_name(tb[2]); + if (IS_ERR(hash_name)) { if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)", rsa_alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME) - goto out_drop_alg; + goto err_free_inst; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)", rsa_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) - goto out_drop_alg; + goto err_free_inst; } else { + ctx->digest_info = rsa_lookup_asn1(hash_name); + if (!ctx->digest_info) { + err = -EINVAL; + goto err_free_inst; + } + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s,%s)", rsa_alg->base.cra_name, hash_name) >= CRYPTO_MAX_ALG_NAME) - goto out_drop_alg; + goto err_free_inst; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s,%s)", rsa_alg->base.cra_driver_name, hash_name) >= CRYPTO_MAX_ALG_NAME) - goto out_drop_alg; + goto err_free_inst; } inst->alg.base.cra_flags = rsa_alg->base.cra_flags & CRYPTO_ALG_ASYNC; @@ -691,15 +677,10 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) inst->free = pkcs1pad_free; err = akcipher_register_instance(tmpl, inst); - if (err) - goto out_drop_alg; - - return 0; - -out_drop_alg: - crypto_drop_akcipher(spawn); -out_free_inst: - kfree(inst); + if (err) { +err_free_inst: + pkcs1pad_free(inst); + } return err; } diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index f42f486e90e8..ba0b7702f2e9 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1514,8 +1514,8 @@ static void test_skcipher_speed(const char *algo, int enc, unsigned int secs, return; } - pr_info("\ntesting speed of async %s (%s) %s\n", algo, - get_driver_name(crypto_skcipher, tfm), e); + pr_info("\ntesting speed of %s %s (%s) %s\n", async ? "async" : "sync", + algo, get_driver_name(crypto_skcipher, tfm), e); req = skcipher_request_alloc(tfm, GFP_KERNEL); if (!req) { diff --git a/crypto/testmgr.c b/crypto/testmgr.c index ccb3d60729fc..6863f911fcee 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -91,10 +91,11 @@ struct aead_test_suite { unsigned int einval_allowed : 1; /* - * Set if the algorithm intentionally ignores the last 8 bytes of the - * AAD buffer during decryption. + * Set if this algorithm requires that the IV be located at the end of + * the AAD buffer, in addition to being given in the normal way. The + * behavior when the two IV copies differ is implementation-defined. */ - unsigned int esp_aad : 1; + unsigned int aad_iv : 1; }; struct cipher_test_suite { @@ -2167,9 +2168,10 @@ struct aead_extra_tests_ctx { * here means the full ciphertext including the authentication tag. The * authentication tag (and hence also the ciphertext) is assumed to be nonempty. */ -static void mutate_aead_message(struct aead_testvec *vec, bool esp_aad) +static void mutate_aead_message(struct aead_testvec *vec, bool aad_iv, + unsigned int ivsize) { - const unsigned int aad_tail_size = esp_aad ? 8 : 0; + const unsigned int aad_tail_size = aad_iv ? ivsize : 0; const unsigned int authsize = vec->clen - vec->plen; if (prandom_u32() % 2 == 0 && vec->alen > aad_tail_size) { @@ -2207,6 +2209,9 @@ static void generate_aead_message(struct aead_request *req, /* Generate the AAD. */ generate_random_bytes((u8 *)vec->assoc, vec->alen); + if (suite->aad_iv && vec->alen >= ivsize) + /* Avoid implementation-defined behavior. */ + memcpy((u8 *)vec->assoc + vec->alen - ivsize, vec->iv, ivsize); if (inauthentic && prandom_u32() % 2 == 0) { /* Generate a random ciphertext. */ @@ -2242,7 +2247,7 @@ static void generate_aead_message(struct aead_request *req, * Mutate the authentic (ciphertext, AAD) pair to get an * inauthentic one. */ - mutate_aead_message(vec, suite->esp_aad); + mutate_aead_message(vec, suite->aad_iv, ivsize); } vec->novrfy = 1; if (suite->einval_allowed) @@ -2507,11 +2512,11 @@ static int test_aead_extra(const char *driver, goto out; } - err = test_aead_inauthentic_inputs(ctx); + err = test_aead_vs_generic_impl(ctx); if (err) goto out; - err = test_aead_vs_generic_impl(ctx); + err = test_aead_inauthentic_inputs(ctx); out: kfree(ctx->vec.key); kfree(ctx->vec.iv); @@ -5229,7 +5234,7 @@ static const struct alg_test_desc alg_test_descs[] = { .aead = { ____VECS(aes_gcm_rfc4106_tv_template), .einval_allowed = 1, - .esp_aad = 1, + .aad_iv = 1, } } }, { @@ -5241,7 +5246,7 @@ static const struct alg_test_desc alg_test_descs[] = { .aead = { ____VECS(aes_ccm_rfc4309_tv_template), .einval_allowed = 1, - .esp_aad = 1, + .aad_iv = 1, } } }, { @@ -5252,6 +5257,7 @@ static const struct alg_test_desc alg_test_descs[] = { .aead = { ____VECS(aes_gcm_rfc4543_tv_template), .einval_allowed = 1, + .aad_iv = 1, } } }, { @@ -5267,7 +5273,7 @@ static const struct alg_test_desc alg_test_descs[] = { .aead = { ____VECS(rfc7539esp_tv_template), .einval_allowed = 1, - .esp_aad = 1, + .aad_iv = 1, } } }, { diff --git a/crypto/xts.c b/crypto/xts.c index 29efa15f1495..dbdd8af629e6 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -379,15 +379,15 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) err = -EINVAL; if (alg->base.cra_blocksize != XTS_BLOCK_SIZE) - goto err_drop_spawn; + goto err_free_inst; if (crypto_skcipher_alg_ivsize(alg)) - goto err_drop_spawn; + goto err_free_inst; err = crypto_inst_setname(skcipher_crypto_instance(inst), "xts", &alg->base); if (err) - goto err_drop_spawn; + goto err_free_inst; err = -EINVAL; cipher_name = alg->base.cra_name; @@ -400,20 +400,20 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) len = strlcpy(ctx->name, cipher_name + 4, sizeof(ctx->name)); if (len < 2 || len >= sizeof(ctx->name)) - goto err_drop_spawn; + goto err_free_inst; if (ctx->name[len - 1] != ')') - goto err_drop_spawn; + goto err_free_inst; ctx->name[len - 1] = 0; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "xts(%s)", ctx->name) >= CRYPTO_MAX_ALG_NAME) { err = -ENAMETOOLONG; - goto err_drop_spawn; + goto err_free_inst; } } else - goto err_drop_spawn; + goto err_free_inst; inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = alg->base.cra_priority; @@ -437,17 +437,11 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) inst->free = free; err = skcipher_register_instance(tmpl, inst); - if (err) - goto err_drop_spawn; - -out: - return err; - -err_drop_spawn: - crypto_drop_skcipher(&ctx->spawn); + if (err) { err_free_inst: - kfree(inst); - goto out; + free(inst); + } + return err; } static struct crypto_template crypto_tmpl = { diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index c78d10ea641f..40526da5c6a6 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -26,6 +26,8 @@ */ #define FSL_MC_DEFAULT_DMA_MASK (~0ULL) +static struct fsl_mc_version mc_version; + /** * struct fsl_mc - Private data of a "fsl,qoriq-mc" platform device * @root_mc_bus_dev: fsl-mc device representing the root DPRC @@ -55,20 +57,6 @@ struct fsl_mc_addr_translation_range { }; /** - * struct mc_version - * @major: Major version number: incremented on API compatibility changes - * @minor: Minor version number: incremented on API additions (that are - * backward compatible); reset when major version is incremented - * @revision: Internal revision number: incremented on implementation changes - * and/or bug fixes that have no impact on API - */ -struct mc_version { - u32 major; - u32 minor; - u32 revision; -}; - -/** * fsl_mc_bus_match - device to driver matching callback * @dev: the fsl-mc device to match against * @drv: the device driver to search for matching fsl-mc object type @@ -338,7 +326,7 @@ EXPORT_SYMBOL_GPL(fsl_mc_driver_unregister); */ static int mc_get_version(struct fsl_mc_io *mc_io, u32 cmd_flags, - struct mc_version *mc_ver_info) + struct fsl_mc_version *mc_ver_info) { struct fsl_mc_command cmd = { 0 }; struct dpmng_rsp_get_version *rsp_params; @@ -364,6 +352,20 @@ static int mc_get_version(struct fsl_mc_io *mc_io, } /** + * fsl_mc_get_version - function to retrieve the MC f/w version information + * + * Return: mc version when called after fsl-mc-bus probe; NULL otherwise. + */ +struct fsl_mc_version *fsl_mc_get_version(void) +{ + if (mc_version.major) + return &mc_version; + + return NULL; +} +EXPORT_SYMBOL_GPL(fsl_mc_get_version); + +/** * fsl_mc_get_root_dprc - function to traverse to the root dprc */ static void fsl_mc_get_root_dprc(struct device *dev, @@ -862,7 +864,6 @@ static int fsl_mc_bus_probe(struct platform_device *pdev) int container_id; phys_addr_t mc_portal_phys_addr; u32 mc_portal_size; - struct mc_version mc_version; struct resource res; mc = devm_kzalloc(&pdev->dev, sizeof(*mc), GFP_KERNEL); diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 914e293ba62b..9bc46da8d77a 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -244,7 +244,8 @@ config HW_RANDOM_MXC_RNGA config HW_RANDOM_IMX_RNGC tristate "Freescale i.MX RNGC Random Number Generator" - depends on ARCH_MXC + depends on HAS_IOMEM && HAVE_CLK + depends on SOC_IMX25 || COMPILE_TEST default HW_RANDOM ---help--- This driver provides kernel-side support for the Random Number @@ -466,6 +467,13 @@ config HW_RANDOM_NPCM If unsure, say Y. +config HW_RANDOM_KEYSTONE + depends on ARCH_KEYSTONE || COMPILE_TEST + default HW_RANDOM + tristate "TI Keystone NETCP SA Hardware random number generator" + help + This option enables Keystone's hardware random generator. + endif # HW_RANDOM config UML_RANDOM @@ -482,10 +490,3 @@ config UML_RANDOM (check your distro, or download from http://sourceforge.net/projects/gkernel/). rngd periodically reads /dev/hwrng and injects the entropy into /dev/random. - -config HW_RANDOM_KEYSTONE - depends on ARCH_KEYSTONE || COMPILE_TEST - default HW_RANDOM - tristate "TI Keystone NETCP SA Hardware random number generator" - help - This option enables Keystone's hardware random generator. diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c index 30cf00f8e9a0..9c47e431ce90 100644 --- a/drivers/char/hw_random/imx-rngc.c +++ b/drivers/char/hw_random/imx-rngc.c @@ -18,12 +18,22 @@ #include <linux/completion.h> #include <linux/io.h> +#define RNGC_VER_ID 0x0000 #define RNGC_COMMAND 0x0004 #define RNGC_CONTROL 0x0008 #define RNGC_STATUS 0x000C #define RNGC_ERROR 0x0010 #define RNGC_FIFO 0x0014 +/* the fields in the ver id register */ +#define RNGC_TYPE_SHIFT 28 +#define RNGC_VER_MAJ_SHIFT 8 + +/* the rng_type field */ +#define RNGC_TYPE_RNGB 0x1 +#define RNGC_TYPE_RNGC 0x2 + + #define RNGC_CMD_CLR_ERR 0x00000020 #define RNGC_CMD_CLR_INT 0x00000010 #define RNGC_CMD_SEED 0x00000002 @@ -31,6 +41,7 @@ #define RNGC_CTRL_MASK_ERROR 0x00000040 #define RNGC_CTRL_MASK_DONE 0x00000020 +#define RNGC_CTRL_AUTO_SEED 0x00000010 #define RNGC_STATUS_ERROR 0x00010000 #define RNGC_STATUS_FIFO_LEVEL_MASK 0x00000f00 @@ -100,15 +111,11 @@ static int imx_rngc_self_test(struct imx_rngc *rngc) writel(cmd | RNGC_CMD_SELF_TEST, rngc->base + RNGC_COMMAND); ret = wait_for_completion_timeout(&rngc->rng_op_done, RNGC_TIMEOUT); - if (!ret) { - imx_rngc_irq_mask_clear(rngc); + imx_rngc_irq_mask_clear(rngc); + if (!ret) return -ETIMEDOUT; - } - if (rngc->err_reg != 0) - return -EIO; - - return 0; + return rngc->err_reg ? -EIO : 0; } static int imx_rngc_read(struct hwrng *rng, void *data, size_t max, bool wait) @@ -165,17 +172,17 @@ static irqreturn_t imx_rngc_irq(int irq, void *priv) static int imx_rngc_init(struct hwrng *rng) { struct imx_rngc *rngc = container_of(rng, struct imx_rngc, rng); - u32 cmd; + u32 cmd, ctrl; int ret; /* clear error */ cmd = readl(rngc->base + RNGC_COMMAND); writel(cmd | RNGC_CMD_CLR_ERR, rngc->base + RNGC_COMMAND); + imx_rngc_irq_unmask(rngc); + /* create seed, repeat while there is some statistical error */ do { - imx_rngc_irq_unmask(rngc); - /* seed creation */ cmd = readl(rngc->base + RNGC_COMMAND); writel(cmd | RNGC_CMD_SEED, rngc->base + RNGC_COMMAND); @@ -184,13 +191,42 @@ static int imx_rngc_init(struct hwrng *rng) RNGC_TIMEOUT); if (!ret) { - imx_rngc_irq_mask_clear(rngc); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto err; } } while (rngc->err_reg == RNGC_ERROR_STATUS_STAT_ERR); - return rngc->err_reg ? -EIO : 0; + if (rngc->err_reg) { + ret = -EIO; + goto err; + } + + /* + * enable automatic seeding, the rngc creates a new seed automatically + * after serving 2^20 random 160-bit words + */ + ctrl = readl(rngc->base + RNGC_CONTROL); + ctrl |= RNGC_CTRL_AUTO_SEED; + writel(ctrl, rngc->base + RNGC_CONTROL); + + /* + * if initialisation was successful, we keep the interrupt + * unmasked until imx_rngc_cleanup is called + * we mask the interrupt ourselves if we return an error + */ + return 0; + +err: + imx_rngc_irq_mask_clear(rngc); + return ret; +} + +static void imx_rngc_cleanup(struct hwrng *rng) +{ + struct imx_rngc *rngc = container_of(rng, struct imx_rngc, rng); + + imx_rngc_irq_mask_clear(rngc); } static int imx_rngc_probe(struct platform_device *pdev) @@ -198,6 +234,8 @@ static int imx_rngc_probe(struct platform_device *pdev) struct imx_rngc *rngc; int ret; int irq; + u32 ver_id; + u8 rng_type; rngc = devm_kzalloc(&pdev->dev, sizeof(*rngc), GFP_KERNEL); if (!rngc) @@ -223,6 +261,17 @@ static int imx_rngc_probe(struct platform_device *pdev) if (ret) return ret; + ver_id = readl(rngc->base + RNGC_VER_ID); + rng_type = ver_id >> RNGC_TYPE_SHIFT; + /* + * This driver supports only RNGC and RNGB. (There's a different + * driver for RNGA.) + */ + if (rng_type != RNGC_TYPE_RNGC && rng_type != RNGC_TYPE_RNGB) { + ret = -ENODEV; + goto err; + } + ret = devm_request_irq(&pdev->dev, irq, imx_rngc_irq, 0, pdev->name, (void *)rngc); if (ret) { @@ -235,6 +284,7 @@ static int imx_rngc_probe(struct platform_device *pdev) rngc->rng.name = pdev->name; rngc->rng.init = imx_rngc_init; rngc->rng.read = imx_rngc_read; + rngc->rng.cleanup = imx_rngc_cleanup; rngc->dev = &pdev->dev; platform_set_drvdata(pdev, rngc); @@ -244,18 +294,21 @@ static int imx_rngc_probe(struct platform_device *pdev) if (self_test) { ret = imx_rngc_self_test(rngc); if (ret) { - dev_err(rngc->dev, "FSL RNGC self test failed.\n"); + dev_err(rngc->dev, "self test failed\n"); goto err; } } ret = hwrng_register(&rngc->rng); if (ret) { - dev_err(&pdev->dev, "FSL RNGC registering failed (%d)\n", ret); + dev_err(&pdev->dev, "hwrng registration failed\n"); goto err; } - dev_info(&pdev->dev, "Freescale RNGC registered.\n"); + dev_info(&pdev->dev, + "Freescale RNG%c registered (HW revision %d.%02d)\n", + rng_type == RNGC_TYPE_RNGB ? 'B' : 'C', + (ver_id >> RNGC_VER_MAJ_SHIFT) & 0xff, ver_id & 0xff); return 0; err: diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c index e08a8887e718..a431c5cbe2be 100644 --- a/drivers/char/hw_random/omap3-rom-rng.c +++ b/drivers/char/hw_random/omap3-rom-rng.c @@ -18,6 +18,7 @@ #include <linux/workqueue.h> #include <linux/clk.h> #include <linux/err.h> +#include <linux/io.h> #include <linux/of.h> #include <linux/of_device.h> #include <linux/platform_device.h> diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index c2767ed54dfe..2c887e4d005a 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -233,20 +233,6 @@ config CRYPTO_CRC32_S390 It is available with IBM z13 or later. -config CRYPTO_DEV_MARVELL_CESA - tristate "Marvell's Cryptographic Engine driver" - depends on PLAT_ORION || ARCH_MVEBU - select CRYPTO_LIB_AES - select CRYPTO_LIB_DES - select CRYPTO_SKCIPHER - select CRYPTO_HASH - select SRAM - help - This driver allows you to utilize the Cryptographic Engines and - Security Accelerator (CESA) which can be found on MVEBU and ORION - platforms. - This driver supports CPU offload through DMA transfers. - config CRYPTO_DEV_NIAGARA2 tristate "Niagara2 Stream Processing Unit driver" select CRYPTO_LIB_DES @@ -606,6 +592,7 @@ config CRYPTO_DEV_MXS_DCP source "drivers/crypto/qat/Kconfig" source "drivers/crypto/cavium/cpt/Kconfig" source "drivers/crypto/cavium/nitrox/Kconfig" +source "drivers/crypto/marvell/Kconfig" config CRYPTO_DEV_CAVIUM_ZIP tristate "Cavium ZIP driver" @@ -685,6 +672,29 @@ choice endchoice +config CRYPTO_DEV_QCE_SW_MAX_LEN + int "Default maximum request size to use software for AES" + depends on CRYPTO_DEV_QCE && CRYPTO_DEV_QCE_SKCIPHER + default 512 + help + This sets the default maximum request size to perform AES requests + using software instead of the crypto engine. It can be changed by + setting the aes_sw_max_len parameter. + + Small blocks are processed faster in software than hardware. + Considering the 256-bit ciphers, software is 2-3 times faster than + qce at 256-bytes, 30% faster at 512, and about even at 768-bytes. + With 128-bit keys, the break-even point would be around 1024-bytes. + + The default is set a little lower, to 512 bytes, to balance the + cost in CPU usage. The minimum recommended setting is 16-bytes + (1 AES block), since AES-GCM will fail if you set it lower. + Setting this to zero will send all requests to the hardware. + + Note that 192-bit keys are not supported by the hardware and are + always processed by the software fallback, and all DES requests + are done by the hardware. + config CRYPTO_DEV_QCOM_RNG tristate "Qualcomm Random Number Generator Driver" depends on ARCH_QCOM || COMPILE_TEST @@ -731,6 +741,18 @@ config CRYPTO_DEV_ROCKCHIP This driver interfaces with the hardware crypto accelerator. Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode. +config CRYPTO_DEV_ZYNQMP_AES + tristate "Support for Xilinx ZynqMP AES hw accelerator" + depends on ZYNQMP_FIRMWARE || COMPILE_TEST + select CRYPTO_AES + select CRYPTO_ENGINE + select CRYPTO_AEAD + help + Xilinx ZynqMP has AES-GCM engine used for symmetric key + encryption and decryption. This driver interfaces with AES hw + accelerator. Select this if you want to use the ZynqMP module + for AES algorithms. + config CRYPTO_DEV_MEDIATEK tristate "MediaTek's EIP97 Cryptographic Engine driver" depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 40229d499476..944ed7226e37 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -18,7 +18,7 @@ obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o -obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell/ +obj-$(CONFIG_CRYPTO_DEV_MARVELL) += marvell/ obj-$(CONFIG_CRYPTO_DEV_MEDIATEK) += mediatek/ obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o @@ -47,5 +47,6 @@ obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/ obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/ obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/ +obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_AES) += xilinx/ obj-y += hisilicon/ obj-$(CONFIG_CRYPTO_DEV_AMLOGIC_GXL) += amlogic/ diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c index f72346a44e69..3e4e4bbda34c 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c @@ -565,10 +565,8 @@ static int sun8i_ce_probe(struct platform_device *pdev) /* Get Non Secure IRQ */ irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(ce->dev, "Cannot get CryptoEngine Non-secure IRQ\n"); + if (irq < 0) return irq; - } ce->reset = devm_reset_control_get(&pdev->dev, NULL); if (IS_ERR(ce->reset)) { diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h index 8f8404c84a4d..0e9eac397e1b 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h @@ -214,7 +214,7 @@ struct sun8i_cipher_tfm_ctx { * this template * @alg: one of sub struct must be used * @stat_req: number of request done on this template - * @stat_fb: total of all data len done on this template + * @stat_fb: number of request which has fallbacked */ struct sun8i_ce_alg_template { u32 type; diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h index b5f855f3de10..29c44f279112 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h @@ -186,7 +186,7 @@ struct sun8i_cipher_tfm_ctx { * this template * @alg: one of sub struct must be used * @stat_req: number of request done on this template - * @stat_fb: total of all data len done on this template + * @stat_fb: number of request which has fallbacked */ struct sun8i_ss_alg_template { u32 type; diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c index 1d3355913b40..e8e8281e027d 100644 --- a/drivers/crypto/atmel-i2c.c +++ b/drivers/crypto/atmel-i2c.c @@ -176,7 +176,8 @@ static int atmel_i2c_wakeup(struct i2c_client *client) * device is idle, asleep or during waking up. Don't check for error * when waking up the device. */ - i2c_master_send(client, i2c_priv->wake_token, i2c_priv->wake_token_sz); + i2c_transfer_buffer_flags(client, i2c_priv->wake_token, + i2c_priv->wake_token_sz, I2C_M_IGNORE_NAK); /* * Wait to wake the device. Typical execution times for ecdh and genkey diff --git a/drivers/crypto/bcm/util.c b/drivers/crypto/bcm/util.c index cd7504101acd..2b304fc78059 100644 --- a/drivers/crypto/bcm/util.c +++ b/drivers/crypto/bcm/util.c @@ -366,88 +366,88 @@ static ssize_t spu_debugfs_read(struct file *filp, char __user *ubuf, ipriv = filp->private_data; out_offset = 0; - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Number of SPUs.........%u\n", ipriv->spu.num_spu); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Current sessions.......%u\n", atomic_read(&ipriv->session_count)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Session count..........%u\n", atomic_read(&ipriv->stream_count)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Cipher setkey..........%u\n", atomic_read(&ipriv->setkey_cnt[SPU_OP_CIPHER])); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Cipher Ops.............%u\n", atomic_read(&ipriv->op_counts[SPU_OP_CIPHER])); for (alg = 0; alg < CIPHER_ALG_LAST; alg++) { for (mode = 0; mode < CIPHER_MODE_LAST; mode++) { op_cnt = atomic_read(&ipriv->cipher_cnt[alg][mode]); if (op_cnt) { - out_offset += snprintf(buf + out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, " %-13s%11u\n", spu_alg_name(alg, mode), op_cnt); } } } - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Hash Ops...............%u\n", atomic_read(&ipriv->op_counts[SPU_OP_HASH])); for (alg = 0; alg < HASH_ALG_LAST; alg++) { op_cnt = atomic_read(&ipriv->hash_cnt[alg]); if (op_cnt) { - out_offset += snprintf(buf + out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, " %-13s%11u\n", hash_alg_name[alg], op_cnt); } } - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "HMAC setkey............%u\n", atomic_read(&ipriv->setkey_cnt[SPU_OP_HMAC])); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "HMAC Ops...............%u\n", atomic_read(&ipriv->op_counts[SPU_OP_HMAC])); for (alg = 0; alg < HASH_ALG_LAST; alg++) { op_cnt = atomic_read(&ipriv->hmac_cnt[alg]); if (op_cnt) { - out_offset += snprintf(buf + out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, " %-13s%11u\n", hash_alg_name[alg], op_cnt); } } - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "AEAD setkey............%u\n", atomic_read(&ipriv->setkey_cnt[SPU_OP_AEAD])); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "AEAD Ops...............%u\n", atomic_read(&ipriv->op_counts[SPU_OP_AEAD])); for (alg = 0; alg < AEAD_TYPE_LAST; alg++) { op_cnt = atomic_read(&ipriv->aead_cnt[alg]); if (op_cnt) { - out_offset += snprintf(buf + out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, " %-13s%11u\n", aead_alg_name[alg], op_cnt); } } - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Bytes of req data......%llu\n", (u64)atomic64_read(&ipriv->bytes_out)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Bytes of resp data.....%llu\n", (u64)atomic64_read(&ipriv->bytes_in)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Mailbox full...........%u\n", atomic_read(&ipriv->mb_no_spc)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Mailbox send failures..%u\n", atomic_read(&ipriv->mb_send_fail)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Check ICV errors.......%u\n", atomic_read(&ipriv->bad_icv)); if (ipriv->spu.spu_type == SPU_TYPE_SPUM) @@ -455,7 +455,7 @@ static ssize_t spu_debugfs_read(struct file *filp, char __user *ubuf, spu_ofifo_ctrl = ioread32(ipriv->spu.reg_vbase[i] + SPU_OFIFO_CTRL); fifo_len = spu_ofifo_ctrl & SPU_FIFO_WATERMARK; - out_offset += snprintf(buf + out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "SPU %d output FIFO high water.....%u\n", i, fifo_len); diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index fac5b2e26610..a62f228be6da 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -13,6 +13,7 @@ config CRYPTO_DEV_FSL_CAAM depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE select SOC_BUS select CRYPTO_DEV_FSL_CAAM_COMMON + imply FSL_MC_BUS help Enables the driver module for Freescale's Cryptographic Accelerator and Assurance Module (CAAM), also known as the SEC version 4 (SEC4). @@ -33,6 +34,7 @@ config CRYPTO_DEV_FSL_CAAM_DEBUG menuconfig CRYPTO_DEV_FSL_CAAM_JR tristate "Freescale CAAM Job Ring driver backend" + select CRYPTO_ENGINE default y help Enables the driver module for Job Rings which are part of diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index ef1a65f4fc92..b7bb7c30adeb 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -56,6 +56,7 @@ #include "sg_sw_sec4.h" #include "key_gen.h" #include "caamalg_desc.h" +#include <crypto/engine.h> /* * crypto alg @@ -101,6 +102,7 @@ struct caam_skcipher_alg { * per-session context */ struct caam_ctx { + struct crypto_engine_ctx enginectx; u32 sh_desc_enc[DESC_MAX_USED_LEN]; u32 sh_desc_dec[DESC_MAX_USED_LEN]; u8 key[CAAM_MAX_KEY_SIZE]; @@ -114,6 +116,14 @@ struct caam_ctx { unsigned int authsize; }; +struct caam_skcipher_req_ctx { + struct skcipher_edesc *edesc; +}; + +struct caam_aead_req_ctx { + struct aead_edesc *edesc; +}; + static int aead_null_set_sh_desc(struct crypto_aead *aead) { struct caam_ctx *ctx = crypto_aead_ctx(aead); @@ -858,6 +868,7 @@ static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, * @mapped_src_nents: number of segments in input h/w link table * @mapped_dst_nents: number of segments in output h/w link table * @sec4_sg_bytes: length of dma mapped sec4_sg space + * @bklog: stored to determine if the request needs backlog * @sec4_sg_dma: bus physical mapped address of h/w link table * @sec4_sg: pointer to h/w link table * @hw_desc: the h/w job descriptor followed by any referenced link tables @@ -868,6 +879,7 @@ struct aead_edesc { int mapped_src_nents; int mapped_dst_nents; int sec4_sg_bytes; + bool bklog; dma_addr_t sec4_sg_dma; struct sec4_sg_entry *sec4_sg; u32 hw_desc[]; @@ -881,6 +893,7 @@ struct aead_edesc { * @mapped_dst_nents: number of segments in output h/w link table * @iv_dma: dma address of iv for checking continuity and link table * @sec4_sg_bytes: length of dma mapped sec4_sg space + * @bklog: stored to determine if the request needs backlog * @sec4_sg_dma: bus physical mapped address of h/w link table * @sec4_sg: pointer to h/w link table * @hw_desc: the h/w job descriptor followed by any referenced link tables @@ -893,9 +906,10 @@ struct skcipher_edesc { int mapped_dst_nents; dma_addr_t iv_dma; int sec4_sg_bytes; + bool bklog; dma_addr_t sec4_sg_dma; struct sec4_sg_entry *sec4_sg; - u32 hw_desc[0]; + u32 hw_desc[]; }; static void caam_unmap(struct device *dev, struct scatterlist *src, @@ -941,37 +955,18 @@ static void skcipher_unmap(struct device *dev, struct skcipher_edesc *edesc, edesc->sec4_sg_dma, edesc->sec4_sg_bytes); } -static void aead_encrypt_done(struct device *jrdev, u32 *desc, u32 err, - void *context) -{ - struct aead_request *req = context; - struct aead_edesc *edesc; - int ecode = 0; - - dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - - edesc = container_of(desc, struct aead_edesc, hw_desc[0]); - - if (err) - ecode = caam_jr_strstatus(jrdev, err); - - aead_unmap(jrdev, edesc, req); - - kfree(edesc); - - aead_request_complete(req, ecode); -} - -static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err, - void *context) +static void aead_crypt_done(struct device *jrdev, u32 *desc, u32 err, + void *context) { struct aead_request *req = context; + struct caam_aead_req_ctx *rctx = aead_request_ctx(req); + struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev); struct aead_edesc *edesc; int ecode = 0; dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - edesc = container_of(desc, struct aead_edesc, hw_desc[0]); + edesc = rctx->edesc; if (err) ecode = caam_jr_strstatus(jrdev, err); @@ -980,61 +975,30 @@ static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err, kfree(edesc); - aead_request_complete(req, ecode); -} - -static void skcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err, - void *context) -{ - struct skcipher_request *req = context; - struct skcipher_edesc *edesc; - struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - int ivsize = crypto_skcipher_ivsize(skcipher); - int ecode = 0; - - dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - - edesc = container_of(desc, struct skcipher_edesc, hw_desc[0]); - - if (err) - ecode = caam_jr_strstatus(jrdev, err); - - skcipher_unmap(jrdev, edesc, req); - /* - * The crypto API expects us to set the IV (req->iv) to the last - * ciphertext block (CBC mode) or last counter (CTR mode). - * This is used e.g. by the CTS mode. + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. */ - if (ivsize && !ecode) { - memcpy(req->iv, (u8 *)edesc->sec4_sg + edesc->sec4_sg_bytes, - ivsize); - print_hex_dump_debug("dstiv @"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->iv, - edesc->src_nents > 1 ? 100 : ivsize, 1); - } - - caam_dump_sg("dst @" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->dst, - edesc->dst_nents > 1 ? 100 : req->cryptlen, 1); - - kfree(edesc); - - skcipher_request_complete(req, ecode); + if (!edesc->bklog) + aead_request_complete(req, ecode); + else + crypto_finalize_aead_request(jrp->engine, req, ecode); } -static void skcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err, - void *context) +static void skcipher_crypt_done(struct device *jrdev, u32 *desc, u32 err, + void *context) { struct skcipher_request *req = context; struct skcipher_edesc *edesc; + struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); + struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev); int ivsize = crypto_skcipher_ivsize(skcipher); int ecode = 0; dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - edesc = container_of(desc, struct skcipher_edesc, hw_desc[0]); + edesc = rctx->edesc; if (err) ecode = caam_jr_strstatus(jrdev, err); @@ -1060,7 +1024,14 @@ static void skcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err, kfree(edesc); - skcipher_request_complete(req, ecode); + /* + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. + */ + if (!edesc->bklog) + skcipher_request_complete(req, ecode); + else + crypto_finalize_skcipher_request(jrp->engine, req, ecode); } /* @@ -1306,6 +1277,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, struct crypto_aead *aead = crypto_aead_reqtfm(req); struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; + struct caam_aead_req_ctx *rctx = aead_request_ctx(req); gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0; @@ -1406,6 +1378,9 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, edesc->mapped_dst_nents = mapped_dst_nents; edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) + desc_bytes; + + rctx->edesc = edesc; + *all_contig_ptr = !(mapped_src_nents > 1); sec4_sg_index = 0; @@ -1436,41 +1411,34 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, return edesc; } -static int gcm_encrypt(struct aead_request *req) +static int aead_enqueue_req(struct device *jrdev, struct aead_request *req) { - struct aead_edesc *edesc; - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); - struct device *jrdev = ctx->jrdev; - bool all_contig; - u32 *desc; - int ret = 0; - - /* allocate extended descriptor */ - edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, true); - if (IS_ERR(edesc)) - return PTR_ERR(edesc); - - /* Create and submit job descriptor */ - init_gcm_job(req, edesc, all_contig, true); + struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); + struct caam_aead_req_ctx *rctx = aead_request_ctx(req); + struct aead_edesc *edesc = rctx->edesc; + u32 *desc = edesc->hw_desc; + int ret; - print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, - desc_bytes(edesc->hw_desc), 1); + /* + * Only the backlog request are sent to crypto-engine since the others + * can be handled by CAAM, if free, especially since JR has up to 1024 + * entries (more than the 10 entries from crypto-engine). + */ + if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG) + ret = crypto_transfer_aead_request_to_engine(jrpriv->engine, + req); + else + ret = caam_jr_enqueue(jrdev, desc, aead_crypt_done, req); - desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) { aead_unmap(jrdev, edesc, req); - kfree(edesc); + kfree(rctx->edesc); } return ret; } -static int chachapoly_encrypt(struct aead_request *req) +static inline int chachapoly_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); @@ -1478,180 +1446,130 @@ static int chachapoly_encrypt(struct aead_request *req) struct device *jrdev = ctx->jrdev; bool all_contig; u32 *desc; - int ret; edesc = aead_edesc_alloc(req, CHACHAPOLY_DESC_JOB_IO_LEN, &all_contig, - true); + encrypt); if (IS_ERR(edesc)) return PTR_ERR(edesc); desc = edesc->hw_desc; - init_chachapoly_job(req, edesc, all_contig, true); + init_chachapoly_job(req, edesc, all_contig, encrypt); print_hex_dump_debug("chachapoly jobdesc@" __stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - aead_unmap(jrdev, edesc, req); - kfree(edesc); - } - - return ret; + return aead_enqueue_req(jrdev, req); } -static int chachapoly_decrypt(struct aead_request *req) +static int chachapoly_encrypt(struct aead_request *req) { - struct aead_edesc *edesc; - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); - struct device *jrdev = ctx->jrdev; - bool all_contig; - u32 *desc; - int ret; - - edesc = aead_edesc_alloc(req, CHACHAPOLY_DESC_JOB_IO_LEN, &all_contig, - false); - if (IS_ERR(edesc)) - return PTR_ERR(edesc); - - desc = edesc->hw_desc; - - init_chachapoly_job(req, edesc, all_contig, false); - print_hex_dump_debug("chachapoly jobdesc@" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), - 1); - - ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - aead_unmap(jrdev, edesc, req); - kfree(edesc); - } - - return ret; + return chachapoly_crypt(req, true); } -static int ipsec_gcm_encrypt(struct aead_request *req) +static int chachapoly_decrypt(struct aead_request *req) { - return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_encrypt(req); + return chachapoly_crypt(req, false); } -static int aead_encrypt(struct aead_request *req) +static inline int aead_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; bool all_contig; - u32 *desc; - int ret = 0; /* allocate extended descriptor */ edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN, - &all_contig, true); + &all_contig, encrypt); if (IS_ERR(edesc)) return PTR_ERR(edesc); /* Create and submit job descriptor */ - init_authenc_job(req, edesc, all_contig, true); + init_authenc_job(req, edesc, all_contig, encrypt); print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, desc_bytes(edesc->hw_desc), 1); - desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - aead_unmap(jrdev, edesc, req); - kfree(edesc); - } + return aead_enqueue_req(jrdev, req); +} - return ret; +static int aead_encrypt(struct aead_request *req) +{ + return aead_crypt(req, true); } -static int gcm_decrypt(struct aead_request *req) +static int aead_decrypt(struct aead_request *req) { - struct aead_edesc *edesc; - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); - struct device *jrdev = ctx->jrdev; - bool all_contig; - u32 *desc; - int ret = 0; + return aead_crypt(req, false); +} - /* allocate extended descriptor */ - edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, false); - if (IS_ERR(edesc)) - return PTR_ERR(edesc); +static int aead_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct aead_request *req = aead_request_cast(areq); + struct caam_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); + struct caam_aead_req_ctx *rctx = aead_request_ctx(req); + u32 *desc = rctx->edesc->hw_desc; + int ret; - /* Create and submit job descriptor*/ - init_gcm_job(req, edesc, all_contig, false); + rctx->edesc->bklog = true; - print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, - desc_bytes(edesc->hw_desc), 1); + ret = caam_jr_enqueue(ctx->jrdev, desc, aead_crypt_done, req); - desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; + if (ret != -EINPROGRESS) { + aead_unmap(ctx->jrdev, rctx->edesc, req); + kfree(rctx->edesc); } else { - aead_unmap(jrdev, edesc, req); - kfree(edesc); + ret = 0; } return ret; } -static int ipsec_gcm_decrypt(struct aead_request *req) -{ - return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_decrypt(req); -} - -static int aead_decrypt(struct aead_request *req) +static inline int gcm_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; bool all_contig; - u32 *desc; - int ret = 0; - - caam_dump_sg("dec src@" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->src, - req->assoclen + req->cryptlen, 1); /* allocate extended descriptor */ - edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN, - &all_contig, false); + edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, + encrypt); if (IS_ERR(edesc)) return PTR_ERR(edesc); - /* Create and submit job descriptor*/ - init_authenc_job(req, edesc, all_contig, false); + /* Create and submit job descriptor */ + init_gcm_job(req, edesc, all_contig, encrypt); print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, desc_bytes(edesc->hw_desc), 1); - desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - aead_unmap(jrdev, edesc, req); - kfree(edesc); - } + return aead_enqueue_req(jrdev, req); +} - return ret; +static int gcm_encrypt(struct aead_request *req) +{ + return gcm_crypt(req, true); +} + +static int gcm_decrypt(struct aead_request *req) +{ + return gcm_crypt(req, false); +} + +static int ipsec_gcm_encrypt(struct aead_request *req) +{ + return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_encrypt(req); +} + +static int ipsec_gcm_decrypt(struct aead_request *req) +{ + return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_decrypt(req); } /* @@ -1662,6 +1580,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, { struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req); struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; @@ -1760,6 +1679,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, edesc->sec4_sg_bytes = sec4_sg_bytes; edesc->sec4_sg = (struct sec4_sg_entry *)((u8 *)edesc->hw_desc + desc_bytes); + rctx->edesc = edesc; /* Make sure IV is located in a DMAable area */ if (ivsize) { @@ -1815,49 +1735,35 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, return edesc; } -static int skcipher_encrypt(struct skcipher_request *req) +static int skcipher_do_one_req(struct crypto_engine *engine, void *areq) { - struct skcipher_edesc *edesc; - struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); - struct device *jrdev = ctx->jrdev; - u32 *desc; - int ret = 0; - - if (!req->cryptlen) - return 0; - - /* allocate extended descriptor */ - edesc = skcipher_edesc_alloc(req, DESC_JOB_IO_LEN * CAAM_CMD_SZ); - if (IS_ERR(edesc)) - return PTR_ERR(edesc); - - /* Create and submit job descriptor*/ - init_skcipher_job(req, edesc, true); + struct skcipher_request *req = skcipher_request_cast(areq); + struct caam_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req); + u32 *desc = rctx->edesc->hw_desc; + int ret; - print_hex_dump_debug("skcipher jobdesc@" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, - desc_bytes(edesc->hw_desc), 1); + rctx->edesc->bklog = true; - desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, skcipher_encrypt_done, req); + ret = caam_jr_enqueue(ctx->jrdev, desc, skcipher_crypt_done, req); - if (!ret) { - ret = -EINPROGRESS; + if (ret != -EINPROGRESS) { + skcipher_unmap(ctx->jrdev, rctx->edesc, req); + kfree(rctx->edesc); } else { - skcipher_unmap(jrdev, edesc, req); - kfree(edesc); + ret = 0; } return ret; } -static int skcipher_decrypt(struct skcipher_request *req) +static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt) { struct skcipher_edesc *edesc; struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); struct device *jrdev = ctx->jrdev; + struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); u32 *desc; int ret = 0; @@ -1870,17 +1776,25 @@ static int skcipher_decrypt(struct skcipher_request *req) return PTR_ERR(edesc); /* Create and submit job descriptor*/ - init_skcipher_job(req, edesc, false); - desc = edesc->hw_desc; + init_skcipher_job(req, edesc, encrypt); print_hex_dump_debug("skcipher jobdesc@" __stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, desc_bytes(edesc->hw_desc), 1); - ret = caam_jr_enqueue(jrdev, desc, skcipher_decrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { + desc = edesc->hw_desc; + /* + * Only the backlog request are sent to crypto-engine since the others + * can be handled by CAAM, if free, especially since JR has up to 1024 + * entries (more than the 10 entries from crypto-engine). + */ + if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG) + ret = crypto_transfer_skcipher_request_to_engine(jrpriv->engine, + req); + else + ret = caam_jr_enqueue(jrdev, desc, skcipher_crypt_done, req); + + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) { skcipher_unmap(jrdev, edesc, req); kfree(edesc); } @@ -1888,6 +1802,16 @@ static int skcipher_decrypt(struct skcipher_request *req) return ret; } +static int skcipher_encrypt(struct skcipher_request *req) +{ + return skcipher_crypt(req, true); +} + +static int skcipher_decrypt(struct skcipher_request *req) +{ + return skcipher_crypt(req, false); +} + static struct caam_skcipher_alg driver_algs[] = { { .skcipher = { @@ -3391,6 +3315,8 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam, { dma_addr_t dma_addr; struct caam_drv_private *priv; + const size_t sh_desc_enc_offset = offsetof(struct caam_ctx, + sh_desc_enc); ctx->jrdev = caam_jr_alloc(); if (IS_ERR(ctx->jrdev)) { @@ -3406,7 +3332,8 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam, dma_addr = dma_map_single_attrs(ctx->jrdev, ctx->sh_desc_enc, offsetof(struct caam_ctx, - sh_desc_enc_dma), + sh_desc_enc_dma) - + sh_desc_enc_offset, ctx->dir, DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(ctx->jrdev, dma_addr)) { dev_err(ctx->jrdev, "unable to map key, shared descriptors\n"); @@ -3416,8 +3343,10 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam, ctx->sh_desc_enc_dma = dma_addr; ctx->sh_desc_dec_dma = dma_addr + offsetof(struct caam_ctx, - sh_desc_dec); - ctx->key_dma = dma_addr + offsetof(struct caam_ctx, key); + sh_desc_dec) - + sh_desc_enc_offset; + ctx->key_dma = dma_addr + offsetof(struct caam_ctx, key) - + sh_desc_enc_offset; /* copy descriptor header template value */ ctx->cdata.algtype = OP_TYPE_CLASS1_ALG | caam->class1_alg_type; @@ -3431,6 +3360,11 @@ static int caam_cra_init(struct crypto_skcipher *tfm) struct skcipher_alg *alg = crypto_skcipher_alg(tfm); struct caam_skcipher_alg *caam_alg = container_of(alg, typeof(*caam_alg), skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx(tfm); + + crypto_skcipher_set_reqsize(tfm, sizeof(struct caam_skcipher_req_ctx)); + + ctx->enginectx.op.do_one_request = skcipher_do_one_req; return caam_init_common(crypto_skcipher_ctx(tfm), &caam_alg->caam, false); @@ -3443,13 +3377,18 @@ static int caam_aead_init(struct crypto_aead *tfm) container_of(alg, struct caam_aead_alg, aead); struct caam_ctx *ctx = crypto_aead_ctx(tfm); + crypto_aead_set_reqsize(tfm, sizeof(struct caam_aead_req_ctx)); + + ctx->enginectx.op.do_one_request = aead_do_one_req; + return caam_init_common(ctx, &caam_alg->caam, !caam_alg->caam.nodkp); } static void caam_exit_common(struct caam_ctx *ctx) { dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_enc_dma, - offsetof(struct caam_ctx, sh_desc_enc_dma), + offsetof(struct caam_ctx, sh_desc_enc_dma) - + offsetof(struct caam_ctx, sh_desc_enc), ctx->dir, DMA_ATTR_SKIP_CPU_SYNC); caam_jr_free(ctx->jrdev); } diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c index aa9ccca67045..d6c58184bb57 100644 --- a/drivers/crypto/caam/caamalg_desc.c +++ b/drivers/crypto/caam/caamalg_desc.c @@ -1379,6 +1379,9 @@ void cnstr_shdsc_skcipher_encap(u32 * const desc, struct alginfo *cdata, const u32 ctx1_iv_off) { u32 *key_jump_cmd; + u32 options = cdata->algtype | OP_ALG_AS_INIT | OP_ALG_ENCRYPT; + bool is_chacha20 = ((cdata->algtype & OP_ALG_ALGSEL_MASK) == + OP_ALG_ALGSEL_CHACHA20); init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); /* Skip if already shared */ @@ -1417,14 +1420,15 @@ void cnstr_shdsc_skcipher_encap(u32 * const desc, struct alginfo *cdata, LDST_OFFSET_SHIFT)); /* Load operation */ - append_operation(desc, cdata->algtype | OP_ALG_AS_INIT | - OP_ALG_ENCRYPT); + if (is_chacha20) + options |= OP_ALG_AS_FINALIZE; + append_operation(desc, options); /* Perform operation */ skcipher_append_src_dst(desc); /* Store IV */ - if (ivsize) + if (!is_chacha20 && ivsize) append_seq_store(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT)); @@ -1451,6 +1455,8 @@ void cnstr_shdsc_skcipher_decap(u32 * const desc, struct alginfo *cdata, const u32 ctx1_iv_off) { u32 *key_jump_cmd; + bool is_chacha20 = ((cdata->algtype & OP_ALG_ALGSEL_MASK) == + OP_ALG_ALGSEL_CHACHA20); init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); /* Skip if already shared */ @@ -1499,7 +1505,7 @@ void cnstr_shdsc_skcipher_decap(u32 * const desc, struct alginfo *cdata, skcipher_append_src_dst(desc); /* Store IV */ - if (ivsize) + if (!is_chacha20 && ivsize) append_seq_store(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT)); @@ -1518,7 +1524,13 @@ EXPORT_SYMBOL(cnstr_shdsc_skcipher_decap); */ void cnstr_shdsc_xts_skcipher_encap(u32 * const desc, struct alginfo *cdata) { - __be64 sector_size = cpu_to_be64(512); + /* + * Set sector size to a big value, practically disabling + * sector size segmentation in xts implementation. We cannot + * take full advantage of this HW feature with existing + * crypto API / dm-crypt SW architecture. + */ + __be64 sector_size = cpu_to_be64(BIT(15)); u32 *key_jump_cmd; init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); @@ -1571,7 +1583,13 @@ EXPORT_SYMBOL(cnstr_shdsc_xts_skcipher_encap); */ void cnstr_shdsc_xts_skcipher_decap(u32 * const desc, struct alginfo *cdata) { - __be64 sector_size = cpu_to_be64(512); + /* + * Set sector size to a big value, practically disabling + * sector size segmentation in xts implementation. We cannot + * take full advantage of this HW feature with existing + * crypto API / dm-crypt SW architecture. + */ + __be64 sector_size = cpu_to_be64(BIT(15)); u32 *key_jump_cmd; init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c index 4a29e0ef9d63..27e36bdf6163 100644 --- a/drivers/crypto/caam/caamalg_qi.c +++ b/drivers/crypto/caam/caamalg_qi.c @@ -783,7 +783,7 @@ struct aead_edesc { unsigned int assoclen; dma_addr_t assoclen_dma; struct caam_drv_req drv_req; - struct qm_sg_entry sgt[0]; + struct qm_sg_entry sgt[]; }; /* @@ -803,7 +803,7 @@ struct skcipher_edesc { int qm_sg_bytes; dma_addr_t qm_sg_dma; struct caam_drv_req drv_req; - struct qm_sg_entry sgt[0]; + struct qm_sg_entry sgt[]; }; static struct caam_drv_ctx *get_drv_ctx(struct caam_ctx *ctx, diff --git a/drivers/crypto/caam/caamalg_qi2.h b/drivers/crypto/caam/caamalg_qi2.h index 706736776b47..f29cb7bd7dd3 100644 --- a/drivers/crypto/caam/caamalg_qi2.h +++ b/drivers/crypto/caam/caamalg_qi2.h @@ -114,7 +114,7 @@ struct aead_edesc { dma_addr_t qm_sg_dma; unsigned int assoclen; dma_addr_t assoclen_dma; - struct dpaa2_sg_entry sgt[0]; + struct dpaa2_sg_entry sgt[]; }; /* @@ -132,7 +132,7 @@ struct skcipher_edesc { dma_addr_t iv_dma; int qm_sg_bytes; dma_addr_t qm_sg_dma; - struct dpaa2_sg_entry sgt[0]; + struct dpaa2_sg_entry sgt[]; }; /* @@ -146,7 +146,7 @@ struct ahash_edesc { dma_addr_t qm_sg_dma; int src_nents; int qm_sg_bytes; - struct dpaa2_sg_entry sgt[0]; + struct dpaa2_sg_entry sgt[]; }; /** diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 8d9143407fc5..943bc0296267 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -65,6 +65,7 @@ #include "sg_sw_sec4.h" #include "key_gen.h" #include "caamhash_desc.h" +#include <crypto/engine.h> #define CAAM_CRA_PRIORITY 3000 @@ -86,6 +87,7 @@ static struct list_head hash_list; /* ahash per-session context */ struct caam_hash_ctx { + struct crypto_engine_ctx enginectx; u32 sh_desc_update[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned; u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned; u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned; @@ -111,9 +113,12 @@ struct caam_hash_state { int buflen; int next_buflen; u8 caam_ctx[MAX_CTX_LEN] ____cacheline_aligned; - int (*update)(struct ahash_request *req); + int (*update)(struct ahash_request *req) ____cacheline_aligned; int (*final)(struct ahash_request *req); int (*finup)(struct ahash_request *req); + struct ahash_edesc *edesc; + void (*ahash_op_done)(struct device *jrdev, u32 *desc, u32 err, + void *context); }; struct caam_export_state { @@ -395,7 +400,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key, init_completion(&result.completion); ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result); - if (!ret) { + if (ret == -EINPROGRESS) { /* in progress */ wait_for_completion(&result.completion); ret = result.err; @@ -521,6 +526,7 @@ static int acmac_setkey(struct crypto_ahash *ahash, const u8 *key, * @sec4_sg_dma: physical mapped address of h/w link table * @src_nents: number of segments in input scatterlist * @sec4_sg_bytes: length of dma mapped sec4_sg space + * @bklog: stored to determine if the request needs backlog * @hw_desc: the h/w job descriptor followed by any referenced link tables * @sec4_sg: h/w link table */ @@ -528,8 +534,9 @@ struct ahash_edesc { dma_addr_t sec4_sg_dma; int src_nents; int sec4_sg_bytes; + bool bklog; u32 hw_desc[DESC_JOB_IO_LEN_MAX / sizeof(u32)] ____cacheline_aligned; - struct sec4_sg_entry sec4_sg[0]; + struct sec4_sg_entry sec4_sg[]; }; static inline void ahash_unmap(struct device *dev, @@ -565,10 +572,11 @@ static inline void ahash_unmap_ctx(struct device *dev, ahash_unmap(dev, edesc, req, dst_len); } -static void ahash_done(struct device *jrdev, u32 *desc, u32 err, - void *context) +static inline void ahash_done_cpy(struct device *jrdev, u32 *desc, u32 err, + void *context, enum dma_data_direction dir) { struct ahash_request *req = context; + struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev); struct ahash_edesc *edesc; struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); int digestsize = crypto_ahash_digestsize(ahash); @@ -578,11 +586,12 @@ static void ahash_done(struct device *jrdev, u32 *desc, u32 err, dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - edesc = container_of(desc, struct ahash_edesc, hw_desc[0]); + edesc = state->edesc; + if (err) ecode = caam_jr_strstatus(jrdev, err); - ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); + ahash_unmap_ctx(jrdev, edesc, req, digestsize, dir); memcpy(req->result, state->caam_ctx, digestsize); kfree(edesc); @@ -590,81 +599,33 @@ static void ahash_done(struct device *jrdev, u32 *desc, u32 err, DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, ctx->ctx_len, 1); - req->base.complete(&req->base, ecode); + /* + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. + */ + if (!edesc->bklog) + req->base.complete(&req->base, ecode); + else + crypto_finalize_hash_request(jrp->engine, req, ecode); } -static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err, - void *context) +static void ahash_done(struct device *jrdev, u32 *desc, u32 err, + void *context) { - struct ahash_request *req = context; - struct ahash_edesc *edesc; - struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); - int digestsize = crypto_ahash_digestsize(ahash); - int ecode = 0; - - dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - - edesc = container_of(desc, struct ahash_edesc, hw_desc[0]); - if (err) - ecode = caam_jr_strstatus(jrdev, err); - - ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL); - kfree(edesc); - - scatterwalk_map_and_copy(state->buf, req->src, - req->nbytes - state->next_buflen, - state->next_buflen, 0); - state->buflen = state->next_buflen; - - print_hex_dump_debug("buf@" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, state->buf, - state->buflen, 1); - - print_hex_dump_debug("ctx@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, - ctx->ctx_len, 1); - if (req->result) - print_hex_dump_debug("result@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->result, - digestsize, 1); - - req->base.complete(&req->base, ecode); + ahash_done_cpy(jrdev, desc, err, context, DMA_FROM_DEVICE); } static void ahash_done_ctx_src(struct device *jrdev, u32 *desc, u32 err, void *context) { - struct ahash_request *req = context; - struct ahash_edesc *edesc; - struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - int digestsize = crypto_ahash_digestsize(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - int ecode = 0; - - dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - - edesc = container_of(desc, struct ahash_edesc, hw_desc[0]); - if (err) - ecode = caam_jr_strstatus(jrdev, err); - - ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL); - memcpy(req->result, state->caam_ctx, digestsize); - kfree(edesc); - - print_hex_dump_debug("ctx@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, - ctx->ctx_len, 1); - - req->base.complete(&req->base, ecode); + ahash_done_cpy(jrdev, desc, err, context, DMA_BIDIRECTIONAL); } -static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err, - void *context) +static inline void ahash_done_switch(struct device *jrdev, u32 *desc, u32 err, + void *context, enum dma_data_direction dir) { struct ahash_request *req = context; + struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev); struct ahash_edesc *edesc; struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); @@ -674,11 +635,11 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err, dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - edesc = container_of(desc, struct ahash_edesc, hw_desc[0]); + edesc = state->edesc; if (err) ecode = caam_jr_strstatus(jrdev, err); - ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_FROM_DEVICE); + ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, dir); kfree(edesc); scatterwalk_map_and_copy(state->buf, req->src, @@ -698,18 +659,42 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err, DUMP_PREFIX_ADDRESS, 16, 4, req->result, digestsize, 1); - req->base.complete(&req->base, ecode); + /* + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. + */ + if (!edesc->bklog) + req->base.complete(&req->base, ecode); + else + crypto_finalize_hash_request(jrp->engine, req, ecode); + +} + +static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err, + void *context) +{ + ahash_done_switch(jrdev, desc, err, context, DMA_BIDIRECTIONAL); +} + +static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err, + void *context) +{ + ahash_done_switch(jrdev, desc, err, context, DMA_FROM_DEVICE); } /* * Allocate an enhanced descriptor, which contains the hardware descriptor * and space for hardware scatter table containing sg_num entries. */ -static struct ahash_edesc *ahash_edesc_alloc(struct caam_hash_ctx *ctx, +static struct ahash_edesc *ahash_edesc_alloc(struct ahash_request *req, int sg_num, u32 *sh_desc, - dma_addr_t sh_desc_dma, - gfp_t flags) + dma_addr_t sh_desc_dma) { + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx(req); + gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; struct ahash_edesc *edesc; unsigned int sg_size = sg_num * sizeof(struct sec4_sg_entry); @@ -719,6 +704,8 @@ static struct ahash_edesc *ahash_edesc_alloc(struct caam_hash_ctx *ctx, return NULL; } + state->edesc = edesc; + init_job_desc_shared(edesc->hw_desc, sh_desc_dma, desc_len(sh_desc), HDR_SHARE_DEFER | HDR_REVERSE); @@ -761,6 +748,62 @@ static int ahash_edesc_add_src(struct caam_hash_ctx *ctx, return 0; } +static int ahash_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct ahash_request *req = ahash_request_cast(areq); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct caam_hash_state *state = ahash_request_ctx(req); + struct device *jrdev = ctx->jrdev; + u32 *desc = state->edesc->hw_desc; + int ret; + + state->edesc->bklog = true; + + ret = caam_jr_enqueue(jrdev, desc, state->ahash_op_done, req); + + if (ret != -EINPROGRESS) { + ahash_unmap(jrdev, state->edesc, req, 0); + kfree(state->edesc); + } else { + ret = 0; + } + + return ret; +} + +static int ahash_enqueue_req(struct device *jrdev, + void (*cbk)(struct device *jrdev, u32 *desc, + u32 err, void *context), + struct ahash_request *req, + int dst_len, enum dma_data_direction dir) +{ + struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); + struct caam_hash_state *state = ahash_request_ctx(req); + struct ahash_edesc *edesc = state->edesc; + u32 *desc = edesc->hw_desc; + int ret; + + state->ahash_op_done = cbk; + + /* + * Only the backlog request are sent to crypto-engine since the others + * can be handled by CAAM, if free, especially since JR has up to 1024 + * entries (more than the 10 entries from crypto-engine). + */ + if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG) + ret = crypto_transfer_hash_request_to_engine(jrpriv->engine, + req); + else + ret = caam_jr_enqueue(jrdev, desc, cbk, req); + + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) { + ahash_unmap_ctx(jrdev, edesc, req, dst_len, dir); + kfree(edesc); + } + + return ret; +} + /* submit update job descriptor */ static int ahash_update_ctx(struct ahash_request *req) { @@ -768,8 +811,6 @@ static int ahash_update_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; u8 *buf = state->buf; int *buflen = &state->buflen; int *next_buflen = &state->next_buflen; @@ -823,8 +864,8 @@ static int ahash_update_ctx(struct ahash_request *req) * allocate space for base edesc and hw desc commands, * link tables */ - edesc = ahash_edesc_alloc(ctx, pad_nents, ctx->sh_desc_update, - ctx->sh_desc_update_dma, flags); + edesc = ahash_edesc_alloc(req, pad_nents, ctx->sh_desc_update, + ctx->sh_desc_update_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -870,11 +911,8 @@ static int ahash_update_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done_bi, req); - if (ret) - goto unmap_ctx; - - ret = -EINPROGRESS; + ret = ahash_enqueue_req(jrdev, ahash_done_bi, req, + ctx->ctx_len, DMA_BIDIRECTIONAL); } else if (*next_buflen) { scatterwalk_map_and_copy(buf + *buflen, req->src, 0, req->nbytes, 0); @@ -898,8 +936,6 @@ static int ahash_final_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; int buflen = state->buflen; u32 *desc; int sec4_sg_bytes; @@ -911,8 +947,8 @@ static int ahash_final_ctx(struct ahash_request *req) sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ - edesc = ahash_edesc_alloc(ctx, 4, ctx->sh_desc_fin, - ctx->sh_desc_fin_dma, flags); + edesc = ahash_edesc_alloc(req, 4, ctx->sh_desc_fin, + ctx->sh_desc_fin_dma); if (!edesc) return -ENOMEM; @@ -947,11 +983,8 @@ static int ahash_final_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req); - if (ret) - goto unmap_ctx; - - return -EINPROGRESS; + return ahash_enqueue_req(jrdev, ahash_done_ctx_src, req, + digestsize, DMA_BIDIRECTIONAL); unmap_ctx: ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL); kfree(edesc); @@ -964,8 +997,6 @@ static int ahash_finup_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; int buflen = state->buflen; u32 *desc; int sec4_sg_src_index; @@ -994,9 +1025,8 @@ static int ahash_finup_ctx(struct ahash_request *req) sec4_sg_src_index = 1 + (buflen ? 1 : 0); /* allocate space for base edesc and hw desc commands, link tables */ - edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents, - ctx->sh_desc_fin, ctx->sh_desc_fin_dma, - flags); + edesc = ahash_edesc_alloc(req, sec4_sg_src_index + mapped_nents, + ctx->sh_desc_fin, ctx->sh_desc_fin_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -1027,11 +1057,8 @@ static int ahash_finup_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req); - if (ret) - goto unmap_ctx; - - return -EINPROGRESS; + return ahash_enqueue_req(jrdev, ahash_done_ctx_src, req, + digestsize, DMA_BIDIRECTIONAL); unmap_ctx: ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL); kfree(edesc); @@ -1044,8 +1071,6 @@ static int ahash_digest(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; u32 *desc; int digestsize = crypto_ahash_digestsize(ahash); int src_nents, mapped_nents; @@ -1072,9 +1097,8 @@ static int ahash_digest(struct ahash_request *req) } /* allocate space for base edesc and hw desc commands, link tables */ - edesc = ahash_edesc_alloc(ctx, mapped_nents > 1 ? mapped_nents : 0, - ctx->sh_desc_digest, ctx->sh_desc_digest_dma, - flags); + edesc = ahash_edesc_alloc(req, mapped_nents > 1 ? mapped_nents : 0, + ctx->sh_desc_digest, ctx->sh_desc_digest_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -1103,15 +1127,8 @@ static int ahash_digest(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); - kfree(edesc); - } - - return ret; + return ahash_enqueue_req(jrdev, ahash_done, req, digestsize, + DMA_FROM_DEVICE); } /* submit ahash final if it the first job descriptor */ @@ -1121,8 +1138,6 @@ static int ahash_final_no_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; u8 *buf = state->buf; int buflen = state->buflen; u32 *desc; @@ -1131,8 +1146,8 @@ static int ahash_final_no_ctx(struct ahash_request *req) int ret; /* allocate space for base edesc and hw desc commands, link tables */ - edesc = ahash_edesc_alloc(ctx, 0, ctx->sh_desc_digest, - ctx->sh_desc_digest_dma, flags); + edesc = ahash_edesc_alloc(req, 0, ctx->sh_desc_digest, + ctx->sh_desc_digest_dma); if (!edesc) return -ENOMEM; @@ -1157,20 +1172,12 @@ static int ahash_final_no_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); - kfree(edesc); - } - - return ret; + return ahash_enqueue_req(jrdev, ahash_done, req, + digestsize, DMA_FROM_DEVICE); unmap: ahash_unmap(jrdev, edesc, req, digestsize); kfree(edesc); return -ENOMEM; - } /* submit ahash update if it the first job descriptor after update */ @@ -1180,8 +1187,6 @@ static int ahash_update_no_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; u8 *buf = state->buf; int *buflen = &state->buflen; int *next_buflen = &state->next_buflen; @@ -1234,10 +1239,9 @@ static int ahash_update_no_ctx(struct ahash_request *req) * allocate space for base edesc and hw desc commands, * link tables */ - edesc = ahash_edesc_alloc(ctx, pad_nents, + edesc = ahash_edesc_alloc(req, pad_nents, ctx->sh_desc_update_first, - ctx->sh_desc_update_first_dma, - flags); + ctx->sh_desc_update_first_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -1273,11 +1277,10 @@ static int ahash_update_no_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req); - if (ret) - goto unmap_ctx; - - ret = -EINPROGRESS; + ret = ahash_enqueue_req(jrdev, ahash_done_ctx_dst, req, + ctx->ctx_len, DMA_TO_DEVICE); + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) + return ret; state->update = ahash_update_ctx; state->finup = ahash_finup_ctx; state->final = ahash_final_ctx; @@ -1305,8 +1308,6 @@ static int ahash_finup_no_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; int buflen = state->buflen; u32 *desc; int sec4_sg_bytes, sec4_sg_src_index, src_nents, mapped_nents; @@ -1336,9 +1337,8 @@ static int ahash_finup_no_ctx(struct ahash_request *req) sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ - edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents, - ctx->sh_desc_digest, ctx->sh_desc_digest_dma, - flags); + edesc = ahash_edesc_alloc(req, sec4_sg_src_index + mapped_nents, + ctx->sh_desc_digest, ctx->sh_desc_digest_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -1368,15 +1368,8 @@ static int ahash_finup_no_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); - kfree(edesc); - } - - return ret; + return ahash_enqueue_req(jrdev, ahash_done, req, + digestsize, DMA_FROM_DEVICE); unmap: ahash_unmap(jrdev, edesc, req, digestsize); kfree(edesc); @@ -1391,8 +1384,6 @@ static int ahash_update_first(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; u8 *buf = state->buf; int *buflen = &state->buflen; int *next_buflen = &state->next_buflen; @@ -1440,11 +1431,10 @@ static int ahash_update_first(struct ahash_request *req) * allocate space for base edesc and hw desc commands, * link tables */ - edesc = ahash_edesc_alloc(ctx, mapped_nents > 1 ? + edesc = ahash_edesc_alloc(req, mapped_nents > 1 ? mapped_nents : 0, ctx->sh_desc_update_first, - ctx->sh_desc_update_first_dma, - flags); + ctx->sh_desc_update_first_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -1467,11 +1457,10 @@ static int ahash_update_first(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req); - if (ret) - goto unmap_ctx; - - ret = -EINPROGRESS; + ret = ahash_enqueue_req(jrdev, ahash_done_ctx_dst, req, + ctx->ctx_len, DMA_TO_DEVICE); + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) + return ret; state->update = ahash_update_ctx; state->finup = ahash_finup_ctx; state->final = ahash_final_ctx; @@ -1774,6 +1763,8 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) HASH_MSG_LEN + SHA256_DIGEST_SIZE, HASH_MSG_LEN + 64, HASH_MSG_LEN + SHA512_DIGEST_SIZE }; + const size_t sh_desc_update_offset = offsetof(struct caam_hash_ctx, + sh_desc_update); dma_addr_t dma_addr; struct caam_drv_private *priv; @@ -1826,7 +1817,8 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) } dma_addr = dma_map_single_attrs(ctx->jrdev, ctx->sh_desc_update, - offsetof(struct caam_hash_ctx, key), + offsetof(struct caam_hash_ctx, key) - + sh_desc_update_offset, ctx->dir, DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(ctx->jrdev, dma_addr)) { dev_err(ctx->jrdev, "unable to map shared descriptors\n"); @@ -1844,11 +1836,16 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) ctx->sh_desc_update_dma = dma_addr; ctx->sh_desc_update_first_dma = dma_addr + offsetof(struct caam_hash_ctx, - sh_desc_update_first); + sh_desc_update_first) - + sh_desc_update_offset; ctx->sh_desc_fin_dma = dma_addr + offsetof(struct caam_hash_ctx, - sh_desc_fin); + sh_desc_fin) - + sh_desc_update_offset; ctx->sh_desc_digest_dma = dma_addr + offsetof(struct caam_hash_ctx, - sh_desc_digest); + sh_desc_digest) - + sh_desc_update_offset; + + ctx->enginectx.op.do_one_request = ahash_do_one_req; crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct caam_hash_state)); @@ -1865,7 +1862,8 @@ static void caam_hash_cra_exit(struct crypto_tfm *tfm) struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm); dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_update_dma, - offsetof(struct caam_hash_ctx, key), + offsetof(struct caam_hash_ctx, key) - + offsetof(struct caam_hash_ctx, sh_desc_update), ctx->dir, DMA_ATTR_SKIP_CPU_SYNC); if (ctx->key_dir != DMA_NONE) dma_unmap_single_attrs(ctx->jrdev, ctx->adata.key_dma, diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 6619c512ef1a..4fcae37a2e33 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -117,76 +117,69 @@ static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc, static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context) { struct akcipher_request *req = context; + struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); + struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); struct rsa_edesc *edesc; int ecode = 0; if (err) ecode = caam_jr_strstatus(dev, err); - edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + edesc = req_ctx->edesc; rsa_pub_unmap(dev, edesc, req); rsa_io_unmap(dev, edesc, req); kfree(edesc); - akcipher_request_complete(req, ecode); -} - -static void rsa_priv_f1_done(struct device *dev, u32 *desc, u32 err, - void *context) -{ - struct akcipher_request *req = context; - struct rsa_edesc *edesc; - int ecode = 0; - - if (err) - ecode = caam_jr_strstatus(dev, err); - - edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); - - rsa_priv_f1_unmap(dev, edesc, req); - rsa_io_unmap(dev, edesc, req); - kfree(edesc); - - akcipher_request_complete(req, ecode); -} - -static void rsa_priv_f2_done(struct device *dev, u32 *desc, u32 err, - void *context) -{ - struct akcipher_request *req = context; - struct rsa_edesc *edesc; - int ecode = 0; - - if (err) - ecode = caam_jr_strstatus(dev, err); - - edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); - - rsa_priv_f2_unmap(dev, edesc, req); - rsa_io_unmap(dev, edesc, req); - kfree(edesc); - - akcipher_request_complete(req, ecode); + /* + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. + */ + if (!edesc->bklog) + akcipher_request_complete(req, ecode); + else + crypto_finalize_akcipher_request(jrp->engine, req, ecode); } -static void rsa_priv_f3_done(struct device *dev, u32 *desc, u32 err, - void *context) +static void rsa_priv_f_done(struct device *dev, u32 *desc, u32 err, + void *context) { struct akcipher_request *req = context; + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); struct rsa_edesc *edesc; int ecode = 0; if (err) ecode = caam_jr_strstatus(dev, err); - edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + edesc = req_ctx->edesc; + + switch (key->priv_form) { + case FORM1: + rsa_priv_f1_unmap(dev, edesc, req); + break; + case FORM2: + rsa_priv_f2_unmap(dev, edesc, req); + break; + case FORM3: + rsa_priv_f3_unmap(dev, edesc, req); + } - rsa_priv_f3_unmap(dev, edesc, req); rsa_io_unmap(dev, edesc, req); kfree(edesc); - akcipher_request_complete(req, ecode); + /* + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. + */ + if (!edesc->bklog) + akcipher_request_complete(req, ecode); + else + crypto_finalize_akcipher_request(jrp->engine, req, ecode); } /** @@ -334,6 +327,8 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req, edesc->src_nents = src_nents; edesc->dst_nents = dst_nents; + req_ctx->edesc = edesc; + if (!sec4_sg_bytes) return edesc; @@ -364,6 +359,33 @@ src_fail: return ERR_PTR(-ENOMEM); } +static int akcipher_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct akcipher_request *req = container_of(areq, + struct akcipher_request, + base); + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct device *jrdev = ctx->dev; + u32 *desc = req_ctx->edesc->hw_desc; + int ret; + + req_ctx->edesc->bklog = true; + + ret = caam_jr_enqueue(jrdev, desc, req_ctx->akcipher_op_done, req); + + if (ret != -EINPROGRESS) { + rsa_pub_unmap(jrdev, req_ctx->edesc, req); + rsa_io_unmap(jrdev, req_ctx->edesc, req); + kfree(req_ctx->edesc); + } else { + ret = 0; + } + + return ret; +} + static int set_rsa_pub_pdb(struct akcipher_request *req, struct rsa_edesc *edesc) { @@ -627,6 +649,53 @@ unmap_p: return -ENOMEM; } +static int akcipher_enqueue_req(struct device *jrdev, + void (*cbk)(struct device *jrdev, u32 *desc, + u32 err, void *context), + struct akcipher_request *req) +{ + struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); + struct rsa_edesc *edesc = req_ctx->edesc; + u32 *desc = edesc->hw_desc; + int ret; + + req_ctx->akcipher_op_done = cbk; + /* + * Only the backlog request are sent to crypto-engine since the others + * can be handled by CAAM, if free, especially since JR has up to 1024 + * entries (more than the 10 entries from crypto-engine). + */ + if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG) + ret = crypto_transfer_akcipher_request_to_engine(jrpriv->engine, + req); + else + ret = caam_jr_enqueue(jrdev, desc, cbk, req); + + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) { + switch (key->priv_form) { + case FORM1: + rsa_priv_f1_unmap(jrdev, edesc, req); + break; + case FORM2: + rsa_priv_f2_unmap(jrdev, edesc, req); + break; + case FORM3: + rsa_priv_f3_unmap(jrdev, edesc, req); + break; + default: + rsa_pub_unmap(jrdev, edesc, req); + } + rsa_io_unmap(jrdev, edesc, req); + kfree(edesc); + } + + return ret; +} + static int caam_rsa_enc(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); @@ -658,11 +727,7 @@ static int caam_rsa_enc(struct akcipher_request *req) /* Initialize Job Descriptor */ init_rsa_pub_desc(edesc->hw_desc, &edesc->pdb.pub); - ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_pub_done, req); - if (!ret) - return -EINPROGRESS; - - rsa_pub_unmap(jrdev, edesc, req); + return akcipher_enqueue_req(jrdev, rsa_pub_done, req); init_fail: rsa_io_unmap(jrdev, edesc, req); @@ -691,11 +756,7 @@ static int caam_rsa_dec_priv_f1(struct akcipher_request *req) /* Initialize Job Descriptor */ init_rsa_priv_f1_desc(edesc->hw_desc, &edesc->pdb.priv_f1); - ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f1_done, req); - if (!ret) - return -EINPROGRESS; - - rsa_priv_f1_unmap(jrdev, edesc, req); + return akcipher_enqueue_req(jrdev, rsa_priv_f_done, req); init_fail: rsa_io_unmap(jrdev, edesc, req); @@ -724,11 +785,7 @@ static int caam_rsa_dec_priv_f2(struct akcipher_request *req) /* Initialize Job Descriptor */ init_rsa_priv_f2_desc(edesc->hw_desc, &edesc->pdb.priv_f2); - ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f2_done, req); - if (!ret) - return -EINPROGRESS; - - rsa_priv_f2_unmap(jrdev, edesc, req); + return akcipher_enqueue_req(jrdev, rsa_priv_f_done, req); init_fail: rsa_io_unmap(jrdev, edesc, req); @@ -757,11 +814,7 @@ static int caam_rsa_dec_priv_f3(struct akcipher_request *req) /* Initialize Job Descriptor */ init_rsa_priv_f3_desc(edesc->hw_desc, &edesc->pdb.priv_f3); - ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f3_done, req); - if (!ret) - return -EINPROGRESS; - - rsa_priv_f3_unmap(jrdev, edesc, req); + return akcipher_enqueue_req(jrdev, rsa_priv_f_done, req); init_fail: rsa_io_unmap(jrdev, edesc, req); @@ -1054,6 +1107,8 @@ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm) return -ENOMEM; } + ctx->enginectx.op.do_one_request = akcipher_do_one_req; + return 0; } diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h index c68fb4c03ee6..cc889a525e2f 100644 --- a/drivers/crypto/caam/caampkc.h +++ b/drivers/crypto/caam/caampkc.h @@ -12,6 +12,7 @@ #define _PKC_DESC_H_ #include "compat.h" #include "pdb.h" +#include <crypto/engine.h> /** * caam_priv_key_form - CAAM RSA private key representation @@ -87,11 +88,13 @@ struct caam_rsa_key { /** * caam_rsa_ctx - per session context. + * @enginectx : crypto engine context * @key : RSA key in DMA zone * @dev : device structure * @padding_dma : dma address of padding, for adding it to the input */ struct caam_rsa_ctx { + struct crypto_engine_ctx enginectx; struct caam_rsa_key key; struct device *dev; dma_addr_t padding_dma; @@ -103,11 +106,16 @@ struct caam_rsa_ctx { * @src : input scatterlist (stripped of leading zeros) * @fixup_src : input scatterlist (that might be stripped of leading zeros) * @fixup_src_len : length of the fixup_src input scatterlist + * @edesc : s/w-extended rsa descriptor + * @akcipher_op_done : callback used when operation is done */ struct caam_rsa_req_ctx { struct scatterlist src[2]; struct scatterlist *fixup_src; unsigned int fixup_src_len; + struct rsa_edesc *edesc; + void (*akcipher_op_done)(struct device *jrdev, u32 *desc, u32 err, + void *context); }; /** @@ -117,6 +125,7 @@ struct caam_rsa_req_ctx { * @mapped_src_nents: number of segments in input h/w link table * @mapped_dst_nents: number of segments in output h/w link table * @sec4_sg_bytes : length of h/w link table + * @bklog : stored to determine if the request needs backlog * @sec4_sg_dma : dma address of h/w link table * @sec4_sg : pointer to h/w link table * @pdb : specific RSA Protocol Data Block (PDB) @@ -128,6 +137,7 @@ struct rsa_edesc { int mapped_src_nents; int mapped_dst_nents; int sec4_sg_bytes; + bool bklog; dma_addr_t sec4_sg_dma; struct sec4_sg_entry *sec4_sg; union { diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index e8baacaabe07..77d048dfe5d0 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -7,35 +7,12 @@ * * Based on caamalg.c crypto API driver. * - * relationship between job descriptors to shared descriptors: - * - * --------------- -------------- - * | JobDesc #0 |-------------------->| ShareDesc | - * | *(buffer 0) | |------------->| (generate) | - * --------------- | | (move) | - * | | (store) | - * --------------- | -------------- - * | JobDesc #1 |------| - * | *(buffer 1) | - * --------------- - * - * A job desc looks like this: - * - * --------------------- - * | Header | - * | ShareDesc Pointer | - * | SEQ_OUT_PTR | - * | (output buffer) | - * --------------------- - * - * The SharedDesc never changes, and each job descriptor points to one of two - * buffers for each device, from which the data will be copied into the - * requested destination */ #include <linux/hw_random.h> #include <linux/completion.h> #include <linux/atomic.h> +#include <linux/kfifo.h> #include "compat.h" @@ -45,278 +22,205 @@ #include "jr.h" #include "error.h" +#define CAAM_RNG_MAX_FIFO_STORE_SIZE 16 + /* - * Maximum buffer size: maximum number of random, cache-aligned bytes that - * will be generated and moved to seq out ptr (extlen not allowed) + * Length of used descriptors, see caam_init_desc() */ -#define RN_BUF_SIZE (0xffff / L1_CACHE_BYTES * \ - L1_CACHE_BYTES) - -/* length of descriptors */ -#define DESC_JOB_O_LEN (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ_MAX * 2) -#define DESC_RNG_LEN (3 * CAAM_CMD_SZ) - -/* Buffer, its dma address and lock */ -struct buf_data { - u8 buf[RN_BUF_SIZE] ____cacheline_aligned; - dma_addr_t addr; - struct completion filled; - u32 hw_desc[DESC_JOB_O_LEN]; -#define BUF_NOT_EMPTY 0 -#define BUF_EMPTY 1 -#define BUF_PENDING 2 /* Empty, but with job pending --don't submit another */ - atomic_t empty; -}; +#define CAAM_RNG_DESC_LEN (CAAM_CMD_SZ + \ + CAAM_CMD_SZ + \ + CAAM_CMD_SZ + CAAM_PTR_SZ_MAX) /* rng per-device context */ struct caam_rng_ctx { + struct hwrng rng; struct device *jrdev; - dma_addr_t sh_desc_dma; - u32 sh_desc[DESC_RNG_LEN]; - unsigned int cur_buf_idx; - int current_buf; - struct buf_data bufs[2]; + struct device *ctrldev; + void *desc_async; + void *desc_sync; + struct work_struct worker; + struct kfifo fifo; }; -static struct caam_rng_ctx *rng_ctx; - -/* - * Variable used to avoid double free of resources in case - * algorithm registration was unsuccessful - */ -static bool init_done; - -static inline void rng_unmap_buf(struct device *jrdev, struct buf_data *bd) -{ - if (bd->addr) - dma_unmap_single(jrdev, bd->addr, RN_BUF_SIZE, - DMA_FROM_DEVICE); -} +struct caam_rng_job_ctx { + struct completion *done; + int *err; +}; -static inline void rng_unmap_ctx(struct caam_rng_ctx *ctx) +static struct caam_rng_ctx *to_caam_rng_ctx(struct hwrng *r) { - struct device *jrdev = ctx->jrdev; - - if (ctx->sh_desc_dma) - dma_unmap_single(jrdev, ctx->sh_desc_dma, - desc_bytes(ctx->sh_desc), DMA_TO_DEVICE); - rng_unmap_buf(jrdev, &ctx->bufs[0]); - rng_unmap_buf(jrdev, &ctx->bufs[1]); + return (struct caam_rng_ctx *)r->priv; } -static void rng_done(struct device *jrdev, u32 *desc, u32 err, void *context) +static void caam_rng_done(struct device *jrdev, u32 *desc, u32 err, + void *context) { - struct buf_data *bd; - - bd = container_of(desc, struct buf_data, hw_desc[0]); + struct caam_rng_job_ctx *jctx = context; if (err) - caam_jr_strstatus(jrdev, err); + *jctx->err = caam_jr_strstatus(jrdev, err); - atomic_set(&bd->empty, BUF_NOT_EMPTY); - complete(&bd->filled); - - /* Buffer refilled, invalidate cache */ - dma_sync_single_for_cpu(jrdev, bd->addr, RN_BUF_SIZE, DMA_FROM_DEVICE); - - print_hex_dump_debug("rng refreshed buf@: ", DUMP_PREFIX_ADDRESS, 16, 4, - bd->buf, RN_BUF_SIZE, 1); + complete(jctx->done); } -static inline int submit_job(struct caam_rng_ctx *ctx, int to_current) +static u32 *caam_init_desc(u32 *desc, dma_addr_t dst_dma) { - struct buf_data *bd = &ctx->bufs[!(to_current ^ ctx->current_buf)]; - struct device *jrdev = ctx->jrdev; - u32 *desc = bd->hw_desc; - int err; - - dev_dbg(jrdev, "submitting job %d\n", !(to_current ^ ctx->current_buf)); - init_completion(&bd->filled); - err = caam_jr_enqueue(jrdev, desc, rng_done, ctx); - if (err) - complete(&bd->filled); /* don't wait on failed job*/ - else - atomic_inc(&bd->empty); /* note if pending */ - - return err; + init_job_desc(desc, 0); /* + 1 cmd_sz */ + /* Generate random bytes: + 1 cmd_sz */ + append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG | + OP_ALG_PR_ON); + /* Store bytes: + 1 cmd_sz + caam_ptr_sz */ + append_fifo_store(desc, dst_dma, + CAAM_RNG_MAX_FIFO_STORE_SIZE, FIFOST_TYPE_RNGSTORE); + + print_hex_dump_debug("rng job desc@: ", DUMP_PREFIX_ADDRESS, + 16, 4, desc, desc_bytes(desc), 1); + + return desc; } -static int caam_read(struct hwrng *rng, void *data, size_t max, bool wait) +static int caam_rng_read_one(struct device *jrdev, + void *dst, int len, + void *desc, + struct completion *done) { - struct caam_rng_ctx *ctx = rng_ctx; - struct buf_data *bd = &ctx->bufs[ctx->current_buf]; - int next_buf_idx, copied_idx; - int err; - - if (atomic_read(&bd->empty)) { - /* try to submit job if there wasn't one */ - if (atomic_read(&bd->empty) == BUF_EMPTY) { - err = submit_job(ctx, 1); - /* if can't submit job, can't even wait */ - if (err) - return 0; - } - /* no immediate data, so exit if not waiting */ - if (!wait) - return 0; - - /* waiting for pending job */ - if (atomic_read(&bd->empty)) - wait_for_completion(&bd->filled); + dma_addr_t dst_dma; + int err, ret = 0; + struct caam_rng_job_ctx jctx = { + .done = done, + .err = &ret, + }; + + len = CAAM_RNG_MAX_FIFO_STORE_SIZE; + + dst_dma = dma_map_single(jrdev, dst, len, DMA_FROM_DEVICE); + if (dma_mapping_error(jrdev, dst_dma)) { + dev_err(jrdev, "unable to map destination memory\n"); + return -ENOMEM; } - next_buf_idx = ctx->cur_buf_idx + max; - dev_dbg(ctx->jrdev, "%s: start reading at buffer %d, idx %d\n", - __func__, ctx->current_buf, ctx->cur_buf_idx); - - /* if enough data in current buffer */ - if (next_buf_idx < RN_BUF_SIZE) { - memcpy(data, bd->buf + ctx->cur_buf_idx, max); - ctx->cur_buf_idx = next_buf_idx; - return max; + init_completion(done); + err = caam_jr_enqueue(jrdev, + caam_init_desc(desc, dst_dma), + caam_rng_done, &jctx); + if (err == -EINPROGRESS) { + wait_for_completion(done); + err = 0; } - /* else, copy what's left... */ - copied_idx = RN_BUF_SIZE - ctx->cur_buf_idx; - memcpy(data, bd->buf + ctx->cur_buf_idx, copied_idx); - ctx->cur_buf_idx = 0; - atomic_set(&bd->empty, BUF_EMPTY); - - /* ...refill... */ - submit_job(ctx, 1); + dma_unmap_single(jrdev, dst_dma, len, DMA_FROM_DEVICE); - /* and use next buffer */ - ctx->current_buf = !ctx->current_buf; - dev_dbg(ctx->jrdev, "switched to buffer %d\n", ctx->current_buf); - - /* since there already is some data read, don't wait */ - return copied_idx + caam_read(rng, data + copied_idx, - max - copied_idx, false); + return err ?: (ret ?: len); } -static inline int rng_create_sh_desc(struct caam_rng_ctx *ctx) +static void caam_rng_fill_async(struct caam_rng_ctx *ctx) { - struct device *jrdev = ctx->jrdev; - u32 *desc = ctx->sh_desc; - - init_sh_desc(desc, HDR_SHARE_SERIAL); - - /* Generate random bytes */ - append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG); - - /* Store bytes */ - append_seq_fifo_store(desc, RN_BUF_SIZE, FIFOST_TYPE_RNGSTORE); + struct scatterlist sg[1]; + struct completion done; + int len, nents; + + sg_init_table(sg, ARRAY_SIZE(sg)); + nents = kfifo_dma_in_prepare(&ctx->fifo, sg, ARRAY_SIZE(sg), + CAAM_RNG_MAX_FIFO_STORE_SIZE); + if (!nents) + return; - ctx->sh_desc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + len = caam_rng_read_one(ctx->jrdev, sg_virt(&sg[0]), + sg[0].length, + ctx->desc_async, + &done); + if (len < 0) + return; - print_hex_dump_debug("rng shdesc@: ", DUMP_PREFIX_ADDRESS, 16, 4, - desc, desc_bytes(desc), 1); + kfifo_dma_in_finish(&ctx->fifo, len); +} - return 0; +static void caam_rng_worker(struct work_struct *work) +{ + struct caam_rng_ctx *ctx = container_of(work, struct caam_rng_ctx, + worker); + caam_rng_fill_async(ctx); } -static inline int rng_create_job_desc(struct caam_rng_ctx *ctx, int buf_id) +static int caam_read(struct hwrng *rng, void *dst, size_t max, bool wait) { - struct device *jrdev = ctx->jrdev; - struct buf_data *bd = &ctx->bufs[buf_id]; - u32 *desc = bd->hw_desc; - int sh_len = desc_len(ctx->sh_desc); + struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng); + int out; - init_job_desc_shared(desc, ctx->sh_desc_dma, sh_len, HDR_SHARE_DEFER | - HDR_REVERSE); + if (wait) { + struct completion done; - bd->addr = dma_map_single(jrdev, bd->buf, RN_BUF_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(jrdev, bd->addr)) { - dev_err(jrdev, "unable to map dst\n"); - return -ENOMEM; + return caam_rng_read_one(ctx->jrdev, dst, max, + ctx->desc_sync, &done); } - append_seq_out_ptr_intlen(desc, bd->addr, RN_BUF_SIZE, 0); - - print_hex_dump_debug("rng job desc@: ", DUMP_PREFIX_ADDRESS, 16, 4, - desc, desc_bytes(desc), 1); + out = kfifo_out(&ctx->fifo, dst, max); + if (kfifo_is_empty(&ctx->fifo)) + schedule_work(&ctx->worker); - return 0; + return out; } static void caam_cleanup(struct hwrng *rng) { - int i; - struct buf_data *bd; - - for (i = 0; i < 2; i++) { - bd = &rng_ctx->bufs[i]; - if (atomic_read(&bd->empty) == BUF_PENDING) - wait_for_completion(&bd->filled); - } + struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng); - rng_unmap_ctx(rng_ctx); + flush_work(&ctx->worker); + caam_jr_free(ctx->jrdev); + kfifo_free(&ctx->fifo); } -static int caam_init_buf(struct caam_rng_ctx *ctx, int buf_id) +static int caam_init(struct hwrng *rng) { - struct buf_data *bd = &ctx->bufs[buf_id]; + struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng); int err; - err = rng_create_job_desc(ctx, buf_id); - if (err) - return err; - - atomic_set(&bd->empty, BUF_EMPTY); - submit_job(ctx, buf_id == ctx->current_buf); - wait_for_completion(&bd->filled); + ctx->desc_sync = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN, + GFP_DMA | GFP_KERNEL); + if (!ctx->desc_sync) + return -ENOMEM; - return 0; -} + ctx->desc_async = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN, + GFP_DMA | GFP_KERNEL); + if (!ctx->desc_async) + return -ENOMEM; -static int caam_init_rng(struct caam_rng_ctx *ctx, struct device *jrdev) -{ - int err; + if (kfifo_alloc(&ctx->fifo, CAAM_RNG_MAX_FIFO_STORE_SIZE, + GFP_DMA | GFP_KERNEL)) + return -ENOMEM; - ctx->jrdev = jrdev; + INIT_WORK(&ctx->worker, caam_rng_worker); - err = rng_create_sh_desc(ctx); - if (err) + ctx->jrdev = caam_jr_alloc(); + err = PTR_ERR_OR_ZERO(ctx->jrdev); + if (err) { + kfifo_free(&ctx->fifo); + pr_err("Job Ring Device allocation for transform failed\n"); return err; + } - ctx->current_buf = 0; - ctx->cur_buf_idx = 0; + /* + * Fill async buffer to have early randomness data for + * hw_random + */ + caam_rng_fill_async(ctx); - err = caam_init_buf(ctx, 0); - if (err) - return err; - - return caam_init_buf(ctx, 1); + return 0; } -static struct hwrng caam_rng = { - .name = "rng-caam", - .cleanup = caam_cleanup, - .read = caam_read, -}; +int caam_rng_init(struct device *ctrldev); -void caam_rng_exit(void) +void caam_rng_exit(struct device *ctrldev) { - if (!init_done) - return; - - caam_jr_free(rng_ctx->jrdev); - hwrng_unregister(&caam_rng); - kfree(rng_ctx); + devres_release_group(ctrldev, caam_rng_init); } int caam_rng_init(struct device *ctrldev) { - struct device *dev; + struct caam_rng_ctx *ctx; u32 rng_inst; struct caam_drv_private *priv = dev_get_drvdata(ctrldev); - int err; - init_done = false; + int ret; /* Check for an instantiated RNG before registration */ if (priv->era < 10) @@ -328,31 +232,30 @@ int caam_rng_init(struct device *ctrldev) if (!rng_inst) return 0; - dev = caam_jr_alloc(); - if (IS_ERR(dev)) { - pr_err("Job Ring Device allocation for transform failed\n"); - return PTR_ERR(dev); - } - rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA | GFP_KERNEL); - if (!rng_ctx) { - err = -ENOMEM; - goto free_caam_alloc; - } - err = caam_init_rng(rng_ctx, dev); - if (err) - goto free_rng_ctx; + if (!devres_open_group(ctrldev, caam_rng_init, GFP_KERNEL)) + return -ENOMEM; - dev_info(dev, "registering rng-caam\n"); + ctx = devm_kzalloc(ctrldev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; - err = hwrng_register(&caam_rng); - if (!err) { - init_done = true; - return err; + ctx->ctrldev = ctrldev; + + ctx->rng.name = "rng-caam"; + ctx->rng.init = caam_init; + ctx->rng.cleanup = caam_cleanup; + ctx->rng.read = caam_read; + ctx->rng.priv = (unsigned long)ctx; + ctx->rng.quality = 1024; + + dev_info(ctrldev, "registering rng-caam\n"); + + ret = devm_hwrng_register(ctrldev, &ctx->rng); + if (ret) { + caam_rng_exit(ctrldev); + return ret; } -free_rng_ctx: - kfree(rng_ctx); -free_caam_alloc: - caam_jr_free(dev); - return err; + devres_close_group(ctrldev, caam_rng_init); + return 0; } diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 7139366da016..4fcdd262e581 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -10,6 +10,7 @@ #include <linux/of_address.h> #include <linux/of_irq.h> #include <linux/sys_soc.h> +#include <linux/fsl/mc.h> #include "compat.h" #include "regs.h" @@ -36,7 +37,8 @@ static void build_instantiation_desc(u32 *desc, int handle, int do_sk) init_job_desc(desc, 0); op_flags = OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG | - (handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INIT; + (handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INIT | + OP_ALG_PR_ON; /* INIT RNG in non-test mode */ append_operation(desc, op_flags); @@ -196,7 +198,7 @@ static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask) u32 *desc, status; int sh_idx, ret = 0; - desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL); + desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL | GFP_DMA); if (!desc) return -ENOMEM; @@ -273,17 +275,30 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask, int ret = 0, sh_idx; ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl; - desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL); + desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL | GFP_DMA); if (!desc) return -ENOMEM; for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) { + const u32 rdsta_if = RDSTA_IF0 << sh_idx; + const u32 rdsta_pr = RDSTA_PR0 << sh_idx; + const u32 rdsta_mask = rdsta_if | rdsta_pr; /* * If the corresponding bit is set, this state handle * was initialized by somebody else, so it's left alone. */ - if ((1 << sh_idx) & state_handle_mask) - continue; + if (rdsta_if & state_handle_mask) { + if (rdsta_pr & state_handle_mask) + continue; + + dev_info(ctrldev, + "RNG4 SH%d was previously instantiated without prediction resistance. Tearing it down\n", + sh_idx); + + ret = deinstantiate_rng(ctrldev, rdsta_if); + if (ret) + break; + } /* Create the descriptor for instantiating RNG State Handle */ build_instantiation_desc(desc, sh_idx, gen_sk); @@ -303,9 +318,9 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask, if (ret) break; - rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK; + rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_MASK; if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) || - !(rdsta_val & (1 << sh_idx))) { + (rdsta_val & rdsta_mask) != rdsta_mask) { ret = -EAGAIN; break; } @@ -341,8 +356,12 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl; r4tst = &ctrl->r4tst[0]; - /* put RNG4 into program mode */ - clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM); + /* + * Setting both RTMCTL:PRGM and RTMCTL:TRNG_ACC causes TRNG to + * properly invalidate the entropy in the entropy register and + * force re-generation. + */ + clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM | RTMCTL_ACC); /* * Performance-wise, it does not make sense to @@ -372,7 +391,8 @@ start_rng: * select raw sampling in both entropy shifter * and statistical checker; ; put RNG4 into run mode */ - clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, RTMCTL_SAMP_MODE_RAW_ES_SC); + clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM | RTMCTL_ACC, + RTMCTL_SAMP_MODE_RAW_ES_SC); } static int caam_get_era_from_hw(struct caam_ctrl __iomem *ctrl) @@ -559,6 +579,26 @@ static void caam_remove_debugfs(void *root) } #endif +#ifdef CONFIG_FSL_MC_BUS +static bool check_version(struct fsl_mc_version *mc_version, u32 major, + u32 minor, u32 revision) +{ + if (mc_version->major > major) + return true; + + if (mc_version->major == major) { + if (mc_version->minor > minor) + return true; + + if (mc_version->minor == minor && + mc_version->revision > revision) + return true; + } + + return false; +} +#endif + /* Probe routine for CAAM top (controller) level */ static int caam_probe(struct platform_device *pdev) { @@ -577,6 +617,7 @@ static int caam_probe(struct platform_device *pdev) u8 rng_vid; int pg_size; int BLOCK_OFFSET = 0; + bool pr_support = false; ctrlpriv = devm_kzalloc(&pdev->dev, sizeof(*ctrlpriv), GFP_KERNEL); if (!ctrlpriv) @@ -662,6 +703,21 @@ static int caam_probe(struct platform_device *pdev) /* Get the IRQ of the controller (for security violations only) */ ctrlpriv->secvio_irq = irq_of_parse_and_map(nprop, 0); + np = of_find_compatible_node(NULL, NULL, "fsl,qoriq-mc"); + ctrlpriv->mc_en = !!np; + of_node_put(np); + +#ifdef CONFIG_FSL_MC_BUS + if (ctrlpriv->mc_en) { + struct fsl_mc_version *mc_version; + + mc_version = fsl_mc_get_version(); + if (mc_version) + pr_support = check_version(mc_version, 10, 20, 0); + else + return -EPROBE_DEFER; + } +#endif /* * Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel, @@ -669,10 +725,6 @@ static int caam_probe(struct platform_device *pdev) * In case of SoCs with Management Complex, MC f/w performs * the configuration. */ - np = of_find_compatible_node(NULL, NULL, "fsl,qoriq-mc"); - ctrlpriv->mc_en = !!np; - of_node_put(np); - if (!ctrlpriv->mc_en) clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK, MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF | @@ -779,7 +831,7 @@ static int caam_probe(struct platform_device *pdev) * already instantiated, do RNG instantiation * In case of SoCs with Management Complex, RNG is managed by MC f/w. */ - if (!ctrlpriv->mc_en && rng_vid >= 4) { + if (!(ctrlpriv->mc_en && pr_support) && rng_vid >= 4) { ctrlpriv->rng4_sh_init = rd_reg32(&ctrl->r4tst[0].rdsta); /* @@ -789,11 +841,11 @@ static int caam_probe(struct platform_device *pdev) * to regenerate these keys before the next POR. */ gen_sk = ctrlpriv->rng4_sh_init & RDSTA_SKVN ? 0 : 1; - ctrlpriv->rng4_sh_init &= RDSTA_IFMASK; + ctrlpriv->rng4_sh_init &= RDSTA_MASK; do { int inst_handles = rd_reg32(&ctrl->r4tst[0].rdsta) & - RDSTA_IFMASK; + RDSTA_MASK; /* * If either SH were instantiated by somebody else * (e.g. u-boot) then it is assumed that the entropy @@ -833,7 +885,7 @@ static int caam_probe(struct platform_device *pdev) * Set handles init'ed by this module as the complement of the * already initialized ones */ - ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_IFMASK; + ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_MASK; /* Enable RDB bit so that RNG works faster */ clrsetbits_32(&ctrl->scfgr, 0, SCFGR_RDBENABLE); diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index 4b6854bf896a..e796d3cb9be8 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -1254,6 +1254,8 @@ #define OP_ALG_ICV_OFF (0 << OP_ALG_ICV_SHIFT) #define OP_ALG_ICV_ON (1 << OP_ALG_ICV_SHIFT) +#define OP_ALG_PR_ON BIT(1) + #define OP_ALG_DIR_SHIFT 0 #define OP_ALG_DIR_MASK 1 #define OP_ALG_DECRYPT 0 diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index c7c10c90464b..402d6a362e8c 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -11,6 +11,7 @@ #define INTERN_H #include "ctrl.h" +#include <crypto/engine.h> /* Currently comes from Kconfig param as a ^2 (driver-required) */ #define JOBR_DEPTH (1 << CONFIG_CRYPTO_DEV_FSL_CAAM_RINGSIZE) @@ -46,6 +47,7 @@ struct caam_drv_private_jr { struct caam_job_ring __iomem *rregs; /* JobR's register space */ struct tasklet_struct irqtask; int irq; /* One per queue */ + bool hwrng; /* Number of scatterlist crypt transforms active on the JobR */ atomic_t tfm_count ____cacheline_aligned; @@ -60,6 +62,7 @@ struct caam_drv_private_jr { int out_ring_read_index; /* Output index "tail" */ int tail; /* entinfo (s/w ring) tail index */ void *outring; /* Base of output ring, DMA-safe */ + struct crypto_engine *engine; }; /* @@ -161,7 +164,7 @@ static inline void caam_pkc_exit(void) #ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API int caam_rng_init(struct device *dev); -void caam_rng_exit(void); +void caam_rng_exit(struct device *dev); #else @@ -170,9 +173,7 @@ static inline int caam_rng_init(struct device *dev) return 0; } -static inline void caam_rng_exit(void) -{ -} +static inline void caam_rng_exit(struct device *dev) {} #endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API */ diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index fc97cde27059..4af22e7ceb4f 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -27,7 +27,8 @@ static struct jr_driver_data driver_data; static DEFINE_MUTEX(algs_lock); static unsigned int active_devs; -static void register_algs(struct device *dev) +static void register_algs(struct caam_drv_private_jr *jrpriv, + struct device *dev) { mutex_lock(&algs_lock); @@ -37,7 +38,7 @@ static void register_algs(struct device *dev) caam_algapi_init(dev); caam_algapi_hash_init(dev); caam_pkc_init(dev); - caam_rng_init(dev); + jrpriv->hwrng = !caam_rng_init(dev); caam_qi_algapi_init(dev); algs_unlock: @@ -53,7 +54,6 @@ static void unregister_algs(void) caam_qi_algapi_exit(); - caam_rng_exit(); caam_pkc_exit(); caam_algapi_hash_exit(); caam_algapi_exit(); @@ -62,6 +62,15 @@ algs_unlock: mutex_unlock(&algs_lock); } +static void caam_jr_crypto_engine_exit(void *data) +{ + struct device *jrdev = data; + struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); + + /* Free the resources of crypto-engine */ + crypto_engine_exit(jrpriv->engine); +} + static int caam_reset_hw_jr(struct device *dev) { struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); @@ -126,6 +135,9 @@ static int caam_jr_remove(struct platform_device *pdev) jrdev = &pdev->dev; jrpriv = dev_get_drvdata(jrdev); + if (jrpriv->hwrng) + caam_rng_exit(jrdev->parent); + /* * Return EBUSY if job ring already allocated. */ @@ -324,8 +336,8 @@ void caam_jr_free(struct device *rdev) EXPORT_SYMBOL(caam_jr_free); /** - * caam_jr_enqueue() - Enqueue a job descriptor head. Returns 0 if OK, - * -EBUSY if the queue is full, -EIO if it cannot map the caller's + * caam_jr_enqueue() - Enqueue a job descriptor head. Returns -EINPROGRESS + * if OK, -ENOSPC if the queue is full, -EIO if it cannot map the caller's * descriptor. * @dev: device of the job ring to be used. This device should have * been assigned prior by caam_jr_register(). @@ -377,7 +389,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, CIRC_SPACE(head, tail, JOBR_DEPTH) <= 0) { spin_unlock_bh(&jrp->inplock); dma_unmap_single(dev, desc_dma, desc_size, DMA_TO_DEVICE); - return -EBUSY; + return -ENOSPC; } head_entry = &jrp->entinfo[head]; @@ -414,7 +426,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, spin_unlock_bh(&jrp->inplock); - return 0; + return -EINPROGRESS; } EXPORT_SYMBOL(caam_jr_enqueue); @@ -505,7 +517,7 @@ static int caam_jr_probe(struct platform_device *pdev) int error; jrdev = &pdev->dev; - jrpriv = devm_kmalloc(jrdev, sizeof(*jrpriv), GFP_KERNEL); + jrpriv = devm_kzalloc(jrdev, sizeof(*jrpriv), GFP_KERNEL); if (!jrpriv) return -ENOMEM; @@ -538,6 +550,25 @@ static int caam_jr_probe(struct platform_device *pdev) return error; } + /* Initialize crypto engine */ + jrpriv->engine = crypto_engine_alloc_init(jrdev, false); + if (!jrpriv->engine) { + dev_err(jrdev, "Could not init crypto-engine\n"); + return -ENOMEM; + } + + error = devm_add_action_or_reset(jrdev, caam_jr_crypto_engine_exit, + jrdev); + if (error) + return error; + + /* Start crypto engine */ + error = crypto_engine_start(jrpriv->engine); + if (error) { + dev_err(jrdev, "Could not start crypto-engine\n"); + return error; + } + /* Identify the interrupt */ jrpriv->irq = irq_of_parse_and_map(nprop, 0); if (!jrpriv->irq) { @@ -562,7 +593,7 @@ static int caam_jr_probe(struct platform_device *pdev) atomic_set(&jrpriv->tfm_count, 0); - register_algs(jrdev->parent); + register_algs(jrpriv, jrdev->parent); return 0; } diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c index 5a851ddc48fb..b0e8a4939b4f 100644 --- a/drivers/crypto/caam/key_gen.c +++ b/drivers/crypto/caam/key_gen.c @@ -108,7 +108,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out, init_completion(&result.completion); ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result); - if (!ret) { + if (ret == -EINPROGRESS) { /* in progress */ wait_for_completion(&result.completion); ret = result.err; diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c index dacf2fa4aa8e..b390b935db6d 100644 --- a/drivers/crypto/caam/qi.c +++ b/drivers/crypto/caam/qi.c @@ -4,7 +4,7 @@ * Queue Interface backend functionality * * Copyright 2013-2016 Freescale Semiconductor, Inc. - * Copyright 2016-2017, 2019 NXP + * Copyright 2016-2017, 2019-2020 NXP */ #include <linux/cpumask.h> @@ -124,8 +124,10 @@ int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req) do { ret = qman_enqueue(req->drv_ctx->req_fq, &fd); - if (likely(!ret)) + if (likely(!ret)) { + refcount_inc(&req->drv_ctx->refcnt); return 0; + } if (ret != -EBUSY) break; @@ -148,11 +150,6 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq, fd = &msg->ern.fd; - if (qm_fd_get_format(fd) != qm_fd_compound) { - dev_err(qidev, "Non-compound FD from CAAM\n"); - return; - } - drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd)); if (!drv_req) { dev_err(qidev, @@ -160,6 +157,13 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq, return; } + refcount_dec(&drv_req->drv_ctx->refcnt); + + if (qm_fd_get_format(fd) != qm_fd_compound) { + dev_err(qidev, "Non-compound FD from CAAM\n"); + return; + } + dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd), sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL); @@ -287,9 +291,10 @@ empty_fq: return ret; } -static int empty_caam_fq(struct qman_fq *fq) +static int empty_caam_fq(struct qman_fq *fq, struct caam_drv_ctx *drv_ctx) { int ret; + int retries = 10; struct qm_mcr_queryfq_np np; /* Wait till the older CAAM FQ get empty */ @@ -304,11 +309,18 @@ static int empty_caam_fq(struct qman_fq *fq) msleep(20); } while (1); - /* - * Give extra time for pending jobs from this FQ in holding tanks - * to get processed - */ - msleep(20); + /* Wait until pending jobs from this FQ are processed by CAAM */ + do { + if (refcount_read(&drv_ctx->refcnt) == 1) + break; + + msleep(20); + } while (--retries); + + if (!retries) + dev_warn_once(drv_ctx->qidev, "%d frames from FQID %u still pending in CAAM\n", + refcount_read(&drv_ctx->refcnt), fq->fqid); + return 0; } @@ -340,7 +352,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc) drv_ctx->req_fq = new_fq; /* Empty and remove the older FQ */ - ret = empty_caam_fq(old_fq); + ret = empty_caam_fq(old_fq, drv_ctx); if (ret) { dev_err(qidev, "Old CAAM FQ empty failed: %d\n", ret); @@ -453,6 +465,9 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev, return ERR_PTR(-ENOMEM); } + /* init reference counter used to track references to request FQ */ + refcount_set(&drv_ctx->refcnt, 1); + drv_ctx->qidev = qidev; return drv_ctx; } @@ -571,6 +586,16 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p, return qman_cb_dqrr_stop; fd = &dqrr->fd; + + drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd)); + if (unlikely(!drv_req)) { + dev_err(qidev, + "Can't find original request for caam response\n"); + return qman_cb_dqrr_consume; + } + + refcount_dec(&drv_req->drv_ctx->refcnt); + status = be32_to_cpu(fd->status); if (unlikely(status)) { u32 ssrc = status & JRSTA_SSRC_MASK; @@ -588,13 +613,6 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p, return qman_cb_dqrr_consume; } - drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd)); - if (unlikely(!drv_req)) { - dev_err(qidev, - "Can't find original request for caam response\n"); - return qman_cb_dqrr_consume; - } - dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd), sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL); diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h index 848958951f68..5894f16f8fe3 100644 --- a/drivers/crypto/caam/qi.h +++ b/drivers/crypto/caam/qi.h @@ -3,7 +3,7 @@ * Public definitions for the CAAM/QI (Queue Interface) backend. * * Copyright 2013-2016 Freescale Semiconductor, Inc. - * Copyright 2016-2017 NXP + * Copyright 2016-2017, 2020 NXP */ #ifndef __QI_H__ @@ -52,6 +52,7 @@ enum optype { * @context_a: shared descriptor dma address * @req_fq: to-CAAM request frame queue * @rsp_fq: from-CAAM response frame queue + * @refcnt: reference counter incremented for each frame enqueued in to-CAAM FQ * @cpu: cpu on which to receive CAAM response * @op_type: operation type * @qidev: device pointer for CAAM/QI backend @@ -62,6 +63,7 @@ struct caam_drv_ctx { dma_addr_t context_a; struct qman_fq *req_fq; struct qman_fq *rsp_fq; + refcount_t refcnt; int cpu; enum optype op_type; struct device *qidev; diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 05127b70527d..0f810bc13b2b 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -487,7 +487,8 @@ struct rngtst { /* RNG4 TRNG test registers */ struct rng4tst { -#define RTMCTL_PRGM 0x00010000 /* 1 -> program mode, 0 -> run mode */ +#define RTMCTL_ACC BIT(5) /* TRNG access mode */ +#define RTMCTL_PRGM BIT(16) /* 1 -> program mode, 0 -> run mode */ #define RTMCTL_SAMP_MODE_VON_NEUMANN_ES_SC 0 /* use von Neumann data in both entropy shifter and statistical checker */ @@ -523,9 +524,11 @@ struct rng4tst { u32 rsvd1[40]; #define RDSTA_SKVT 0x80000000 #define RDSTA_SKVN 0x40000000 +#define RDSTA_PR0 BIT(4) +#define RDSTA_PR1 BIT(5) #define RDSTA_IF0 0x00000001 #define RDSTA_IF1 0x00000002 -#define RDSTA_IFMASK (RDSTA_IF1 | RDSTA_IF0) +#define RDSTA_MASK (RDSTA_PR1 | RDSTA_PR0 | RDSTA_IF1 | RDSTA_IF0) u32 rdsta; u32 rsvd2[15]; }; diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c index c4632d84c9a1..e91be9b8b083 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_main.c +++ b/drivers/crypto/cavium/nitrox/nitrox_main.c @@ -71,7 +71,7 @@ struct ucode { char version[VERSION_LEN - 1]; __be32 code_size; u8 raz[12]; - u64 code[0]; + u64 code[]; }; /** diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c index e95e7aa5dbf1..ae7b44599914 100644 --- a/drivers/crypto/ccp/psp-dev.c +++ b/drivers/crypto/ccp/psp-dev.c @@ -215,6 +215,9 @@ void psp_dev_destroy(struct sp_device *sp) tee_dev_destroy(psp); sp_free_psp_irq(sp, psp); + + if (sp->clear_psp_master_device) + sp->clear_psp_master_device(sp); } void psp_set_sev_irq_handler(struct psp_device *psp, psp_irq_handler_t handler, diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index e467860f797d..896f190b9a50 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -283,11 +283,11 @@ static int sev_get_platform_state(int *state, int *error) return rc; } -static int sev_ioctl_do_reset(struct sev_issue_cmd *argp) +static int sev_ioctl_do_reset(struct sev_issue_cmd *argp, bool writable) { int state, rc; - if (!capable(CAP_SYS_ADMIN)) + if (!writable) return -EPERM; /* @@ -331,12 +331,12 @@ static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp) return ret; } -static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp) +static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp, bool writable) { struct sev_device *sev = psp_master->sev_data; int rc; - if (!capable(CAP_SYS_ADMIN)) + if (!writable) return -EPERM; if (sev->state == SEV_STATE_UNINIT) { @@ -348,7 +348,7 @@ static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp) return __sev_do_cmd_locked(cmd, NULL, &argp->error); } -static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp) +static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable) { struct sev_device *sev = psp_master->sev_data; struct sev_user_data_pek_csr input; @@ -356,7 +356,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp) void *blob = NULL; int ret; - if (!capable(CAP_SYS_ADMIN)) + if (!writable) return -EPERM; if (copy_from_user(&input, (void __user *)argp->data, sizeof(input))) @@ -539,7 +539,7 @@ fw_err: return ret; } -static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp) +static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp, bool writable) { struct sev_device *sev = psp_master->sev_data; struct sev_user_data_pek_cert_import input; @@ -547,7 +547,7 @@ static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp) void *pek_blob, *oca_blob; int ret; - if (!capable(CAP_SYS_ADMIN)) + if (!writable) return -EPERM; if (copy_from_user(&input, (void __user *)argp->data, sizeof(input))) @@ -698,7 +698,7 @@ static int sev_ioctl_do_get_id(struct sev_issue_cmd *argp) return ret; } -static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp) +static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable) { struct sev_device *sev = psp_master->sev_data; struct sev_user_data_pdh_cert_export input; @@ -708,7 +708,7 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp) /* If platform is not in INIT state then transition it to INIT. */ if (sev->state != SEV_STATE_INIT) { - if (!capable(CAP_SYS_ADMIN)) + if (!writable) return -EPERM; ret = __sev_platform_init_locked(&argp->error); @@ -801,6 +801,7 @@ static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) void __user *argp = (void __user *)arg; struct sev_issue_cmd input; int ret = -EFAULT; + bool writable = file->f_mode & FMODE_WRITE; if (!psp_master || !psp_master->sev_data) return -ENODEV; @@ -819,25 +820,25 @@ static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) switch (input.cmd) { case SEV_FACTORY_RESET: - ret = sev_ioctl_do_reset(&input); + ret = sev_ioctl_do_reset(&input, writable); break; case SEV_PLATFORM_STATUS: ret = sev_ioctl_do_platform_status(&input); break; case SEV_PEK_GEN: - ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PEK_GEN, &input); + ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PEK_GEN, &input, writable); break; case SEV_PDH_GEN: - ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PDH_GEN, &input); + ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PDH_GEN, &input, writable); break; case SEV_PEK_CSR: - ret = sev_ioctl_do_pek_csr(&input); + ret = sev_ioctl_do_pek_csr(&input, writable); break; case SEV_PEK_CERT_IMPORT: - ret = sev_ioctl_do_pek_import(&input); + ret = sev_ioctl_do_pek_import(&input, writable); break; case SEV_PDH_CERT_EXPORT: - ret = sev_ioctl_do_pdh_export(&input); + ret = sev_ioctl_do_pdh_export(&input, writable); break; case SEV_GET_ID: pr_warn_once("SEV_GET_ID command is deprecated, use SEV_GET_ID2\n"); @@ -896,9 +897,9 @@ EXPORT_SYMBOL_GPL(sev_guest_df_flush); static void sev_exit(struct kref *ref) { - struct sev_misc_dev *misc_dev = container_of(ref, struct sev_misc_dev, refcount); - misc_deregister(&misc_dev->misc); + kfree(misc_dev); + misc_dev = NULL; } static int sev_misc_init(struct sev_device *sev) @@ -916,7 +917,7 @@ static int sev_misc_init(struct sev_device *sev) if (!misc_dev) { struct miscdevice *misc; - misc_dev = devm_kzalloc(dev, sizeof(*misc_dev), GFP_KERNEL); + misc_dev = kzalloc(sizeof(*misc_dev), GFP_KERNEL); if (!misc_dev) return -ENOMEM; diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h index 423594608ad1..f913f1494af9 100644 --- a/drivers/crypto/ccp/sp-dev.h +++ b/drivers/crypto/ccp/sp-dev.h @@ -90,6 +90,7 @@ struct sp_device { /* get and set master device */ struct sp_device*(*get_psp_master_device)(void); void (*set_psp_master_device)(struct sp_device *); + void (*clear_psp_master_device)(struct sp_device *); bool irq_registered; bool use_tasklet; diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c index 56c1f61c0f84..cb6cb47053f4 100644 --- a/drivers/crypto/ccp/sp-pci.c +++ b/drivers/crypto/ccp/sp-pci.c @@ -146,6 +146,14 @@ static struct sp_device *psp_get_master(void) return sp_dev_master; } +static void psp_clear_master(struct sp_device *sp) +{ + if (sp == sp_dev_master) { + sp_dev_master = NULL; + dev_dbg(sp->dev, "Cleared sp_dev_master\n"); + } +} + static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct sp_device *sp; @@ -206,6 +214,7 @@ static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_master(pdev); sp->set_psp_master_device = psp_set_master; sp->get_psp_master_device = psp_get_master; + sp->clear_psp_master_device = psp_clear_master; ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); if (ret) { diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index 2fc0e0da790b..1cf51edbc4b9 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -6,8 +6,9 @@ #include <crypto/algapi.h> #include <crypto/internal/aead.h> #include <crypto/authenc.h> -#include <crypto/internal/des.h> +#include <crypto/gcm.h> #include <linux/rtnetlink.h> +#include <crypto/internal/des.h> #include "cc_driver.h" #include "cc_buffer_mgr.h" #include "cc_aead.h" @@ -26,7 +27,7 @@ #define MAX_NONCE_SIZE CTR_RFC3686_NONCE_SIZE struct cc_aead_handle { - cc_sram_addr_t sram_workspace_addr; + u32 sram_workspace_addr; struct list_head aead_list; }; @@ -60,11 +61,6 @@ struct cc_aead_ctx { enum drv_hash_mode auth_mode; }; -static inline bool valid_assoclen(struct aead_request *req) -{ - return ((req->assoclen == 16) || (req->assoclen == 20)); -} - static void cc_aead_exit(struct crypto_aead *tfm) { struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); @@ -417,7 +413,7 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey, dma_addr_t key_dma_addr = 0; struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); - u32 larval_addr = cc_larval_digest_addr(ctx->drvdata, ctx->auth_mode); + u32 larval_addr; struct cc_crypto_req cc_req = {}; unsigned int blocksize; unsigned int digestsize; @@ -448,8 +444,7 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey, if (!key) return -ENOMEM; - key_dma_addr = dma_map_single(dev, (void *)key, keylen, - DMA_TO_DEVICE); + key_dma_addr = dma_map_single(dev, key, keylen, DMA_TO_DEVICE); if (dma_mapping_error(dev, key_dma_addr)) { dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n", key, keylen); @@ -460,6 +455,8 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey, /* Load hash initial state */ hw_desc_init(&desc[idx]); set_cipher_mode(&desc[idx], hashmode); + larval_addr = cc_larval_digest_addr(ctx->drvdata, + ctx->auth_mode); set_din_sram(&desc[idx], larval_addr, digestsize); set_flow_mode(&desc[idx], S_DIN_to_HASH); set_setup_mode(&desc[idx], SETUP_LOAD_STATE0); @@ -796,7 +793,7 @@ static void cc_proc_authen_desc(struct aead_request *areq, * assoc. + iv + data -compact in one table * if assoclen is ZERO only IV perform */ - cc_sram_addr_t mlli_addr = areq_ctx->assoc.sram_addr; + u32 mlli_addr = areq_ctx->assoc.sram_addr; u32 mlli_nents = areq_ctx->assoc.mlli_nents; if (areq_ctx->is_single_pass) { @@ -1170,7 +1167,7 @@ static void cc_mlli_to_sram(struct aead_request *req, req_ctx->data_buff_type == CC_DMA_BUF_MLLI || !req_ctx->is_single_pass) && req_ctx->mlli_params.mlli_len) { dev_dbg(dev, "Copy-to-sram: mlli_dma=%08x, mlli_size=%u\n", - (unsigned int)ctx->drvdata->mlli_sram_addr, + ctx->drvdata->mlli_sram_addr, req_ctx->mlli_params.mlli_len); /* Copy MLLI table host-to-sram */ hw_desc_init(&desc[*seq_size]); @@ -1222,7 +1219,7 @@ static void cc_hmac_authenc(struct aead_request *req, struct cc_hw_desc desc[], req_ctx->is_single_pass); if (req_ctx->is_single_pass) { - /** + /* * Single-pass flow */ cc_set_hmac_desc(req, desc, seq_size); @@ -1234,7 +1231,7 @@ static void cc_hmac_authenc(struct aead_request *req, struct cc_hw_desc desc[], return; } - /** + /* * Double-pass flow * Fallback for unsupported single-pass modes, * i.e. using assoc. data of non-word-multiple @@ -1275,7 +1272,7 @@ cc_xcbc_authenc(struct aead_request *req, struct cc_hw_desc desc[], req_ctx->is_single_pass); if (req_ctx->is_single_pass) { - /** + /* * Single-pass flow */ cc_set_xcbc_desc(req, desc, seq_size); @@ -1286,7 +1283,7 @@ cc_xcbc_authenc(struct aead_request *req, struct cc_hw_desc desc[], return; } - /** + /* * Double-pass flow * Fallback for unsupported single-pass modes, * i.e. using assoc. data of non-word-multiple @@ -1611,7 +1608,6 @@ static void cc_proc_rfc4309_ccm(struct aead_request *req) memcpy(areq_ctx->ctr_iv + CCM_BLOCK_IV_OFFSET, req->iv, CCM_BLOCK_IV_SIZE); req->iv = areq_ctx->ctr_iv; - areq_ctx->assoclen -= CCM_BLOCK_IV_SIZE; } static void cc_set_ghash_desc(struct aead_request *req, @@ -1799,12 +1795,6 @@ static int cc_gcm(struct aead_request *req, struct cc_hw_desc desc[], struct aead_req_ctx *req_ctx = aead_request_ctx(req); unsigned int cipher_flow_mode; - if (req_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT) { - cipher_flow_mode = AES_and_HASH; - } else { /* Encrypt */ - cipher_flow_mode = AES_to_HASH_and_DOUT; - } - //in RFC4543 no data to encrypt. just copy data from src to dest. if (req_ctx->plaintext_authenticate_only) { cc_proc_cipher_desc(req, BYPASS, desc, seq_size); @@ -1816,6 +1806,12 @@ static int cc_gcm(struct aead_request *req, struct cc_hw_desc desc[], return 0; } + if (req_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT) { + cipher_flow_mode = AES_and_HASH; + } else { /* Encrypt */ + cipher_flow_mode = AES_to_HASH_and_DOUT; + } + // for gcm and rfc4106. cc_set_ghash_desc(req, desc, seq_size); /* process(ghash) assoc data */ @@ -1870,8 +1866,7 @@ static int config_gcm_context(struct aead_request *req) */ __be64 temp64; - temp64 = cpu_to_be64((req_ctx->assoclen + - GCM_BLOCK_RFC4_IV_SIZE + cryptlen) * 8); + temp64 = cpu_to_be64((req_ctx->assoclen + cryptlen) * 8); memcpy(&req_ctx->gcm_len_block.len_a, &temp64, sizeof(temp64)); temp64 = 0; memcpy(&req_ctx->gcm_len_block.len_c, &temp64, 8); @@ -1891,7 +1886,6 @@ static void cc_proc_rfc4_gcm(struct aead_request *req) memcpy(areq_ctx->ctr_iv + GCM_BLOCK_RFC4_IV_OFFSET, req->iv, GCM_BLOCK_RFC4_IV_SIZE); req->iv = areq_ctx->ctr_iv; - areq_ctx->assoclen -= GCM_BLOCK_RFC4_IV_SIZE; } static int cc_proc_aead(struct aead_request *req, @@ -1921,8 +1915,8 @@ static int cc_proc_aead(struct aead_request *req, } /* Setup request structure */ - cc_req.user_cb = (void *)cc_aead_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_aead_complete; + cc_req.user_arg = req; /* Setup request context */ areq_ctx->gen_ctx.op_type = direct; @@ -1989,7 +1983,6 @@ static int cc_proc_aead(struct aead_request *req, /* Load MLLI tables to SRAM if necessary */ cc_mlli_to_sram(req, desc, &seq_len); - /*TODO: move seq len by reference */ switch (ctx->auth_mode) { case DRV_HASH_SHA1: case DRV_HASH_SHA256: @@ -2034,9 +2027,6 @@ static int cc_aead_encrypt(struct aead_request *req) /* No generated IV required */ areq_ctx->backup_iv = req->iv; areq_ctx->assoclen = req->assoclen; - areq_ctx->is_gcm4543 = false; - - areq_ctx->plaintext_authenticate_only = false; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) @@ -2050,22 +2040,17 @@ static int cc_rfc4309_ccm_encrypt(struct aead_request *req) /* Very similar to cc_aead_encrypt() above. */ struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); /* No generated IV required */ areq_ctx->backup_iv = req->iv; - areq_ctx->assoclen = req->assoclen; - areq_ctx->is_gcm4543 = true; + areq_ctx->assoclen = req->assoclen - CCM_BLOCK_IV_SIZE; cc_proc_rfc4309_ccm(req); @@ -2086,9 +2071,6 @@ static int cc_aead_decrypt(struct aead_request *req) /* No generated IV required */ areq_ctx->backup_iv = req->iv; areq_ctx->assoclen = req->assoclen; - areq_ctx->is_gcm4543 = false; - - areq_ctx->plaintext_authenticate_only = false; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) @@ -2099,24 +2081,19 @@ static int cc_aead_decrypt(struct aead_request *req) static int cc_rfc4309_ccm_decrypt(struct aead_request *req) { - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); /* No generated IV required */ areq_ctx->backup_iv = req->iv; - areq_ctx->assoclen = req->assoclen; + areq_ctx->assoclen = req->assoclen - CCM_BLOCK_IV_SIZE; - areq_ctx->is_gcm4543 = true; cc_proc_rfc4309_ccm(req); rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT); @@ -2216,28 +2193,20 @@ static int cc_rfc4543_gcm_setauthsize(struct crypto_aead *authenc, static int cc_rfc4106_gcm_encrypt(struct aead_request *req) { - /* Very similar to cc_aead_encrypt() above. */ - - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); /* No generated IV required */ areq_ctx->backup_iv = req->iv; - areq_ctx->assoclen = req->assoclen; - areq_ctx->plaintext_authenticate_only = false; + areq_ctx->assoclen = req->assoclen - GCM_BLOCK_RFC4_IV_SIZE; cc_proc_rfc4_gcm(req); - areq_ctx->is_gcm4543 = true; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) @@ -2248,17 +2217,12 @@ out: static int cc_rfc4543_gcm_encrypt(struct aead_request *req) { - /* Very similar to cc_aead_encrypt() above. */ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); @@ -2270,7 +2234,6 @@ static int cc_rfc4543_gcm_encrypt(struct aead_request *req) areq_ctx->assoclen = req->assoclen; cc_proc_rfc4_gcm(req); - areq_ctx->is_gcm4543 = true; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) @@ -2281,28 +2244,20 @@ out: static int cc_rfc4106_gcm_decrypt(struct aead_request *req) { - /* Very similar to cc_aead_decrypt() above. */ - - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); /* No generated IV required */ areq_ctx->backup_iv = req->iv; - areq_ctx->assoclen = req->assoclen; - areq_ctx->plaintext_authenticate_only = false; + areq_ctx->assoclen = req->assoclen - GCM_BLOCK_RFC4_IV_SIZE; cc_proc_rfc4_gcm(req); - areq_ctx->is_gcm4543 = true; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) @@ -2313,17 +2268,12 @@ out: static int cc_rfc4543_gcm_decrypt(struct aead_request *req) { - /* Very similar to cc_aead_decrypt() above. */ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); @@ -2335,7 +2285,6 @@ static int cc_rfc4543_gcm_decrypt(struct aead_request *req) areq_ctx->assoclen = req->assoclen; cc_proc_rfc4_gcm(req); - areq_ctx->is_gcm4543 = true; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) @@ -2614,7 +2563,7 @@ static struct cc_crypto_alg *cc_create_aead_alg(struct cc_alg_template *tmpl, struct cc_crypto_alg *t_alg; struct aead_alg *alg; - t_alg = kzalloc(sizeof(*t_alg), GFP_KERNEL); + t_alg = devm_kzalloc(dev, sizeof(*t_alg), GFP_KERNEL); if (!t_alg) return ERR_PTR(-ENOMEM); @@ -2628,6 +2577,7 @@ static struct cc_crypto_alg *cc_create_aead_alg(struct cc_alg_template *tmpl, alg->base.cra_ctxsize = sizeof(struct cc_aead_ctx); alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY; + alg->base.cra_blocksize = tmpl->blocksize; alg->init = cc_aead_init; alg->exit = cc_aead_exit; @@ -2643,19 +2593,12 @@ static struct cc_crypto_alg *cc_create_aead_alg(struct cc_alg_template *tmpl, int cc_aead_free(struct cc_drvdata *drvdata) { struct cc_crypto_alg *t_alg, *n; - struct cc_aead_handle *aead_handle = - (struct cc_aead_handle *)drvdata->aead_handle; - - if (aead_handle) { - /* Remove registered algs */ - list_for_each_entry_safe(t_alg, n, &aead_handle->aead_list, - entry) { - crypto_unregister_aead(&t_alg->aead_alg); - list_del(&t_alg->entry); - kfree(t_alg); - } - kfree(aead_handle); - drvdata->aead_handle = NULL; + struct cc_aead_handle *aead_handle = drvdata->aead_handle; + + /* Remove registered algs */ + list_for_each_entry_safe(t_alg, n, &aead_handle->aead_list, entry) { + crypto_unregister_aead(&t_alg->aead_alg); + list_del(&t_alg->entry); } return 0; @@ -2669,7 +2612,7 @@ int cc_aead_alloc(struct cc_drvdata *drvdata) int alg; struct device *dev = drvdata_to_dev(drvdata); - aead_handle = kmalloc(sizeof(*aead_handle), GFP_KERNEL); + aead_handle = devm_kmalloc(dev, sizeof(*aead_handle), GFP_KERNEL); if (!aead_handle) { rc = -ENOMEM; goto fail0; @@ -2682,7 +2625,6 @@ int cc_aead_alloc(struct cc_drvdata *drvdata) MAX_HMAC_DIGEST_SIZE); if (aead_handle->sram_workspace_addr == NULL_SRAM_ADDR) { - dev_err(dev, "SRAM pool exhausted\n"); rc = -ENOMEM; goto fail1; } @@ -2705,18 +2647,16 @@ int cc_aead_alloc(struct cc_drvdata *drvdata) if (rc) { dev_err(dev, "%s alg registration failed\n", t_alg->aead_alg.base.cra_driver_name); - goto fail2; - } else { - list_add_tail(&t_alg->entry, &aead_handle->aead_list); - dev_dbg(dev, "Registered %s\n", - t_alg->aead_alg.base.cra_driver_name); + goto fail1; } + + list_add_tail(&t_alg->entry, &aead_handle->aead_list); + dev_dbg(dev, "Registered %s\n", + t_alg->aead_alg.base.cra_driver_name); } return 0; -fail2: - kfree(t_alg); fail1: cc_aead_free(drvdata); fail0: diff --git a/drivers/crypto/ccree/cc_aead.h b/drivers/crypto/ccree/cc_aead.h index f12169b57f9d..b69591550730 100644 --- a/drivers/crypto/ccree/cc_aead.h +++ b/drivers/crypto/ccree/cc_aead.h @@ -66,7 +66,7 @@ struct aead_req_ctx { /* used to prevent cache coherence problem */ u8 backup_mac[MAX_MAC_SIZE]; u8 *backup_iv; /* store orig iv */ - u32 assoclen; /* internal assoclen */ + u32 assoclen; /* size of AAD buffer to authenticate */ dma_addr_t mac_buf_dma_addr; /* internal ICV DMA buffer */ /* buffer for internal ccm configurations */ dma_addr_t ccm_iv0_dma_addr; @@ -79,7 +79,6 @@ struct aead_req_ctx { dma_addr_t gcm_iv_inc2_dma_addr; dma_addr_t hkey_dma_addr; /* Phys. address of hkey */ dma_addr_t gcm_block_len_dma_addr; /* Phys. address of gcm block len */ - bool is_gcm4543; u8 *icv_virt_addr; /* Virt. address of ICV */ struct async_gen_req_ctx gen_ctx; diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index a72586eccd81..b2bd093e7013 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -13,16 +13,6 @@ #include "cc_hash.h" #include "cc_aead.h" -enum dma_buffer_type { - DMA_NULL_TYPE = -1, - DMA_SGL_TYPE = 1, - DMA_BUFF_TYPE = 2, -}; - -struct buff_mgr_handle { - struct dma_pool *mlli_buffs_pool; -}; - union buffer_array_entry { struct scatterlist *sgl; dma_addr_t buffer_dma; @@ -34,7 +24,6 @@ struct buffer_array { unsigned int offset[MAX_NUM_OF_BUFFERS_IN_MLLI]; int nents[MAX_NUM_OF_BUFFERS_IN_MLLI]; int total_data_len[MAX_NUM_OF_BUFFERS_IN_MLLI]; - enum dma_buffer_type type[MAX_NUM_OF_BUFFERS_IN_MLLI]; bool is_last[MAX_NUM_OF_BUFFERS_IN_MLLI]; u32 *mlli_nents[MAX_NUM_OF_BUFFERS_IN_MLLI]; }; @@ -64,11 +53,7 @@ static void cc_copy_mac(struct device *dev, struct aead_request *req, enum cc_sg_cpy_direct dir) { struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - u32 skip = areq_ctx->assoclen + req->cryptlen; - - if (areq_ctx->is_gcm4543) - skip += crypto_aead_ivsize(tfm); + u32 skip = req->assoclen + req->cryptlen; cc_copy_sg_portion(dev, areq_ctx->backup_mac, req->src, (skip - areq_ctx->req_authsize), skip, dir); @@ -77,9 +62,13 @@ static void cc_copy_mac(struct device *dev, struct aead_request *req, /** * cc_get_sgl_nents() - Get scatterlist number of entries. * + * @dev: Device object * @sg_list: SG list * @nbytes: [IN] Total SGL data bytes. * @lbytes: [OUT] Returns the amount of bytes at the last entry + * + * Return: + * Number of entries in the scatterlist */ static unsigned int cc_get_sgl_nents(struct device *dev, struct scatterlist *sg_list, @@ -87,6 +76,8 @@ static unsigned int cc_get_sgl_nents(struct device *dev, { unsigned int nents = 0; + *lbytes = 0; + while (nbytes && sg_list) { nents++; /* get the number of bytes in the last entry */ @@ -95,6 +86,7 @@ static unsigned int cc_get_sgl_nents(struct device *dev, nbytes : sg_list->length; sg_list = sg_next(sg_list); } + dev_dbg(dev, "nents %d last bytes %d\n", nents, *lbytes); return nents; } @@ -103,11 +95,13 @@ static unsigned int cc_get_sgl_nents(struct device *dev, * cc_copy_sg_portion() - Copy scatter list data, * from to_skip to end, to dest and vice versa * - * @dest: - * @sg: - * @to_skip: - * @end: - * @direct: + * @dev: Device object + * @dest: Buffer to copy to/from + * @sg: SG list + * @to_skip: Number of bytes to skip before copying + * @end: Offset of last byte to copy + * @direct: Transfer direction (true == from SG list to buffer, false == from + * buffer to SG list) */ void cc_copy_sg_portion(struct device *dev, u8 *dest, struct scatterlist *sg, u32 to_skip, u32 end, enum cc_sg_cpy_direct direct) @@ -115,7 +109,7 @@ void cc_copy_sg_portion(struct device *dev, u8 *dest, struct scatterlist *sg, u32 nents; nents = sg_nents_for_len(sg, end); - sg_copy_buffer(sg, nents, (void *)dest, (end - to_skip + 1), to_skip, + sg_copy_buffer(sg, nents, dest, (end - to_skip + 1), to_skip, (direct == CC_SG_TO_BUF)); } @@ -204,21 +198,15 @@ static int cc_generate_mlli(struct device *dev, struct buffer_array *sg_data, goto build_mlli_exit; } /* Point to start of MLLI */ - mlli_p = (u32 *)mlli_params->mlli_virt_addr; + mlli_p = mlli_params->mlli_virt_addr; /* go over all SG's and link it to one MLLI table */ for (i = 0; i < sg_data->num_of_buffers; i++) { union buffer_array_entry *entry = &sg_data->entry[i]; u32 tot_len = sg_data->total_data_len[i]; u32 offset = sg_data->offset[i]; - if (sg_data->type[i] == DMA_SGL_TYPE) - rc = cc_render_sg_to_mlli(dev, entry->sgl, tot_len, - offset, &total_nents, - &mlli_p); - else /*DMA_BUFF_TYPE*/ - rc = cc_render_buff_to_mlli(dev, entry->buffer_dma, - tot_len, &total_nents, - &mlli_p); + rc = cc_render_sg_to_mlli(dev, entry->sgl, tot_len, offset, + &total_nents, &mlli_p); if (rc) return rc; @@ -244,27 +232,6 @@ build_mlli_exit: return rc; } -static void cc_add_buffer_entry(struct device *dev, - struct buffer_array *sgl_data, - dma_addr_t buffer_dma, unsigned int buffer_len, - bool is_last_entry, u32 *mlli_nents) -{ - unsigned int index = sgl_data->num_of_buffers; - - dev_dbg(dev, "index=%u single_buff=%pad buffer_len=0x%08X is_last=%d\n", - index, &buffer_dma, buffer_len, is_last_entry); - sgl_data->nents[index] = 1; - sgl_data->entry[index].buffer_dma = buffer_dma; - sgl_data->offset[index] = 0; - sgl_data->total_data_len[index] = buffer_len; - sgl_data->type[index] = DMA_BUFF_TYPE; - sgl_data->is_last[index] = is_last_entry; - sgl_data->mlli_nents[index] = mlli_nents; - if (sgl_data->mlli_nents[index]) - *sgl_data->mlli_nents[index] = 0; - sgl_data->num_of_buffers++; -} - static void cc_add_sg_entry(struct device *dev, struct buffer_array *sgl_data, unsigned int nents, struct scatterlist *sgl, unsigned int data_len, unsigned int data_offset, @@ -278,7 +245,6 @@ static void cc_add_sg_entry(struct device *dev, struct buffer_array *sgl_data, sgl_data->entry[index].sgl = sgl; sgl_data->offset[index] = data_offset; sgl_data->total_data_len[index] = data_len; - sgl_data->type[index] = DMA_SGL_TYPE; sgl_data->is_last[index] = is_last_table; sgl_data->mlli_nents[index] = mlli_nents; if (sgl_data->mlli_nents[index]) @@ -290,37 +256,25 @@ static int cc_map_sg(struct device *dev, struct scatterlist *sg, unsigned int nbytes, int direction, u32 *nents, u32 max_sg_nents, u32 *lbytes, u32 *mapped_nents) { - if (sg_is_last(sg)) { - /* One entry only case -set to DLLI */ - if (dma_map_sg(dev, sg, 1, direction) != 1) { - dev_err(dev, "dma_map_sg() single buffer failed\n"); - return -ENOMEM; - } - dev_dbg(dev, "Mapped sg: dma_address=%pad page=%p addr=%pK offset=%u length=%u\n", - &sg_dma_address(sg), sg_page(sg), sg_virt(sg), - sg->offset, sg->length); - *lbytes = nbytes; - *nents = 1; - *mapped_nents = 1; - } else { /*sg_is_last*/ - *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes); - if (*nents > max_sg_nents) { - *nents = 0; - dev_err(dev, "Too many fragments. current %d max %d\n", - *nents, max_sg_nents); - return -ENOMEM; - } - /* In case of mmu the number of mapped nents might - * be changed from the original sgl nents - */ - *mapped_nents = dma_map_sg(dev, sg, *nents, direction); - if (*mapped_nents == 0) { - *nents = 0; - dev_err(dev, "dma_map_sg() sg buffer failed\n"); - return -ENOMEM; - } + int ret = 0; + + *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes); + if (*nents > max_sg_nents) { + *nents = 0; + dev_err(dev, "Too many fragments. current %d max %d\n", + *nents, max_sg_nents); + return -ENOMEM; + } + + ret = dma_map_sg(dev, sg, *nents, direction); + if (dma_mapping_error(dev, ret)) { + *nents = 0; + dev_err(dev, "dma_map_sg() sg buffer failed %d\n", ret); + return -ENOMEM; } + *mapped_nents = ret; + return 0; } @@ -411,7 +365,6 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, { struct cipher_req_ctx *req_ctx = (struct cipher_req_ctx *)ctx; struct mlli_params *mlli_params = &req_ctx->mlli_params; - struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle; struct device *dev = drvdata_to_dev(drvdata); struct buffer_array sg_data; u32 dummy = 0; @@ -424,10 +377,9 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, /* Map IV buffer */ if (ivsize) { - dump_byte_array("iv", (u8 *)info, ivsize); + dump_byte_array("iv", info, ivsize); req_ctx->gen_ctx.iv_dma_addr = - dma_map_single(dev, (void *)info, - ivsize, DMA_BIDIRECTIONAL); + dma_map_single(dev, info, ivsize, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, req_ctx->gen_ctx.iv_dma_addr)) { dev_err(dev, "Mapping iv %u B at va=%pK for DMA failed\n", ivsize, info); @@ -476,7 +428,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, } if (req_ctx->dma_buf_type == CC_DMA_BUF_MLLI) { - mlli_params->curr_pool = buff_mgr->mlli_buffs_pool; + mlli_params->curr_pool = drvdata->mlli_buffs_pool; rc = cc_generate_mlli(dev, &sg_data, mlli_params, flags); if (rc) goto cipher_exit; @@ -555,11 +507,12 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) sg_virt(req->src), areq_ctx->src.nents, areq_ctx->assoc.nents, areq_ctx->assoclen, req->cryptlen); - dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_BIDIRECTIONAL); + dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents, + DMA_BIDIRECTIONAL); if (req->src != req->dst) { dev_dbg(dev, "Unmapping dst sgl: req->dst=%pK\n", sg_virt(req->dst)); - dma_unmap_sg(dev, req->dst, sg_nents(req->dst), + dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents, DMA_BIDIRECTIONAL); } if (drvdata->coherent && @@ -614,18 +567,6 @@ static int cc_aead_chain_iv(struct cc_drvdata *drvdata, dev_dbg(dev, "Mapped iv %u B at va=%pK to dma=%pad\n", hw_iv_size, req->iv, &areq_ctx->gen_ctx.iv_dma_addr); - // TODO: what about CTR?? ask Ron - if (do_chain && areq_ctx->plaintext_authenticate_only) { - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - unsigned int iv_size_to_authenc = crypto_aead_ivsize(tfm); - unsigned int iv_ofs = GCM_BLOCK_RFC4_IV_OFFSET; - /* Chain to given list */ - cc_add_buffer_entry(dev, sg_data, - (areq_ctx->gen_ctx.iv_dma_addr + iv_ofs), - iv_size_to_authenc, is_last, - &areq_ctx->assoc.mlli_nents); - areq_ctx->assoc_buff_type = CC_DMA_BUF_MLLI; - } chain_iv_exit: return rc; @@ -639,13 +580,8 @@ static int cc_aead_chain_assoc(struct cc_drvdata *drvdata, struct aead_req_ctx *areq_ctx = aead_request_ctx(req); int rc = 0; int mapped_nents = 0; - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - unsigned int size_of_assoc = areq_ctx->assoclen; struct device *dev = drvdata_to_dev(drvdata); - if (areq_ctx->is_gcm4543) - size_of_assoc += crypto_aead_ivsize(tfm); - if (!sg_data) { rc = -EINVAL; goto chain_assoc_exit; @@ -661,7 +597,7 @@ static int cc_aead_chain_assoc(struct cc_drvdata *drvdata, goto chain_assoc_exit; } - mapped_nents = sg_nents_for_len(req->src, size_of_assoc); + mapped_nents = sg_nents_for_len(req->src, areq_ctx->assoclen); if (mapped_nents < 0) return mapped_nents; @@ -854,16 +790,11 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, u32 src_mapped_nents = 0, dst_mapped_nents = 0; u32 offset = 0; /* non-inplace mode */ - unsigned int size_for_map = areq_ctx->assoclen + req->cryptlen; - struct crypto_aead *tfm = crypto_aead_reqtfm(req); + unsigned int size_for_map = req->assoclen + req->cryptlen; u32 sg_index = 0; - bool is_gcm4543 = areq_ctx->is_gcm4543; - u32 size_to_skip = areq_ctx->assoclen; + u32 size_to_skip = req->assoclen; struct scatterlist *sgl; - if (is_gcm4543) - size_to_skip += crypto_aead_ivsize(tfm); - offset = size_to_skip; if (!sg_data) @@ -872,16 +803,13 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, areq_ctx->src_sgl = req->src; areq_ctx->dst_sgl = req->dst; - if (is_gcm4543) - size_for_map += crypto_aead_ivsize(tfm); - size_for_map += (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ? authsize : 0; src_mapped_nents = cc_get_sgl_nents(dev, req->src, size_for_map, &src_last_bytes); sg_index = areq_ctx->src_sgl->length; //check where the data starts - while (sg_index <= size_to_skip) { + while (src_mapped_nents && (sg_index <= size_to_skip)) { src_mapped_nents--; offset -= areq_ctx->src_sgl->length; sgl = sg_next(areq_ctx->src_sgl); @@ -901,14 +829,15 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, areq_ctx->src_offset = offset; if (req->src != req->dst) { - size_for_map = areq_ctx->assoclen + req->cryptlen; - size_for_map += (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ? - authsize : 0; - if (is_gcm4543) - size_for_map += crypto_aead_ivsize(tfm); + size_for_map = req->assoclen + req->cryptlen; + + if (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) + size_for_map += authsize; + else + size_for_map -= authsize; rc = cc_map_sg(dev, req->dst, size_for_map, DMA_BIDIRECTIONAL, - &areq_ctx->dst.nents, + &areq_ctx->dst.mapped_nents, LLI_MAX_NUM_OF_DATA_ENTRIES, &dst_last_bytes, &dst_mapped_nents); if (rc) @@ -921,7 +850,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, offset = size_to_skip; //check where the data starts - while (sg_index <= size_to_skip) { + while (dst_mapped_nents && sg_index <= size_to_skip) { dst_mapped_nents--; offset -= areq_ctx->dst_sgl->length; sgl = sg_next(areq_ctx->dst_sgl); @@ -1012,14 +941,11 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) struct device *dev = drvdata_to_dev(drvdata); struct buffer_array sg_data; unsigned int authsize = areq_ctx->req_authsize; - struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle; int rc = 0; - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - bool is_gcm4543 = areq_ctx->is_gcm4543; dma_addr_t dma_addr; u32 mapped_nents = 0; u32 dummy = 0; /*used for the assoc data fragments */ - u32 size_to_map = 0; + u32 size_to_map; gfp_t flags = cc_gfp_flags(&req->base); mlli_params->curr_pool = NULL; @@ -1116,14 +1042,15 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) areq_ctx->gcm_iv_inc2_dma_addr = dma_addr; } - size_to_map = req->cryptlen + areq_ctx->assoclen; - if (areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_ENCRYPT) + size_to_map = req->cryptlen + req->assoclen; + /* If we do in-place encryption, we also need the auth tag */ + if ((areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_ENCRYPT) && + (req->src == req->dst)) { size_to_map += authsize; + } - if (is_gcm4543) - size_to_map += crypto_aead_ivsize(tfm); rc = cc_map_sg(dev, req->src, size_to_map, DMA_BIDIRECTIONAL, - &areq_ctx->src.nents, + &areq_ctx->src.mapped_nents, (LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES + LLI_MAX_NUM_OF_DATA_ENTRIES), &dummy, &mapped_nents); @@ -1183,7 +1110,7 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) */ if (areq_ctx->assoc_buff_type == CC_DMA_BUF_MLLI || areq_ctx->data_buff_type == CC_DMA_BUF_MLLI) { - mlli_params->curr_pool = buff_mgr->mlli_buffs_pool; + mlli_params->curr_pool = drvdata->mlli_buffs_pool; rc = cc_generate_mlli(dev, &sg_data, mlli_params, flags); if (rc) goto aead_map_failure; @@ -1211,7 +1138,6 @@ int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx, u32 *curr_buff_cnt = cc_hash_buf_cnt(areq_ctx); struct mlli_params *mlli_params = &areq_ctx->mlli_params; struct buffer_array sg_data; - struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle; int rc = 0; u32 dummy = 0; u32 mapped_nents = 0; @@ -1229,7 +1155,6 @@ int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx, return 0; } - /*TODO: copy data in case that buffer is enough for operation */ /* map the previous buffer */ if (*curr_buff_cnt) { rc = cc_set_hash_buf(dev, areq_ctx, curr_buff, *curr_buff_cnt, @@ -1258,7 +1183,7 @@ int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx, /*build mlli */ if (areq_ctx->data_dma_buf_type == CC_DMA_BUF_MLLI) { - mlli_params->curr_pool = buff_mgr->mlli_buffs_pool; + mlli_params->curr_pool = drvdata->mlli_buffs_pool; /* add the src data to the sg_data */ cc_add_sg_entry(dev, &sg_data, areq_ctx->in_nents, src, nbytes, 0, true, &areq_ctx->mlli_nents); @@ -1296,7 +1221,6 @@ int cc_map_hash_request_update(struct cc_drvdata *drvdata, void *ctx, unsigned int update_data_len; u32 total_in_len = nbytes + *curr_buff_cnt; struct buffer_array sg_data; - struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle; unsigned int swap_index = 0; int rc = 0; u32 dummy = 0; @@ -1371,7 +1295,7 @@ int cc_map_hash_request_update(struct cc_drvdata *drvdata, void *ctx, } if (areq_ctx->data_dma_buf_type == CC_DMA_BUF_MLLI) { - mlli_params->curr_pool = buff_mgr->mlli_buffs_pool; + mlli_params->curr_pool = drvdata->mlli_buffs_pool; /* add the src data to the sg_data */ cc_add_sg_entry(dev, &sg_data, areq_ctx->in_nents, src, (update_data_len - *curr_buff_cnt), 0, true, @@ -1438,39 +1362,22 @@ void cc_unmap_hash_request(struct device *dev, void *ctx, int cc_buffer_mgr_init(struct cc_drvdata *drvdata) { - struct buff_mgr_handle *buff_mgr_handle; struct device *dev = drvdata_to_dev(drvdata); - buff_mgr_handle = kmalloc(sizeof(*buff_mgr_handle), GFP_KERNEL); - if (!buff_mgr_handle) - return -ENOMEM; - - drvdata->buff_mgr_handle = buff_mgr_handle; - - buff_mgr_handle->mlli_buffs_pool = + drvdata->mlli_buffs_pool = dma_pool_create("dx_single_mlli_tables", dev, MAX_NUM_OF_TOTAL_MLLI_ENTRIES * LLI_ENTRY_BYTE_SIZE, MLLI_TABLE_MIN_ALIGNMENT, 0); - if (!buff_mgr_handle->mlli_buffs_pool) - goto error; + if (!drvdata->mlli_buffs_pool) + return -ENOMEM; return 0; - -error: - cc_buffer_mgr_fini(drvdata); - return -ENOMEM; } int cc_buffer_mgr_fini(struct cc_drvdata *drvdata) { - struct buff_mgr_handle *buff_mgr_handle = drvdata->buff_mgr_handle; - - if (buff_mgr_handle) { - dma_pool_destroy(buff_mgr_handle->mlli_buffs_pool); - kfree(drvdata->buff_mgr_handle); - drvdata->buff_mgr_handle = NULL; - } + dma_pool_destroy(drvdata->mlli_buffs_pool); return 0; } diff --git a/drivers/crypto/ccree/cc_buffer_mgr.h b/drivers/crypto/ccree/cc_buffer_mgr.h index af434872c6ff..653441b6542e 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.h +++ b/drivers/crypto/ccree/cc_buffer_mgr.h @@ -24,14 +24,15 @@ enum cc_sg_cpy_direct { }; struct cc_mlli { - cc_sram_addr_t sram_addr; + u32 sram_addr; + unsigned int mapped_nents; unsigned int nents; //sg nents unsigned int mlli_nents; //mlli nents might be different than the above }; struct mlli_params { struct dma_pool *curr_pool; - u8 *mlli_virt_addr; + void *mlli_virt_addr; dma_addr_t mlli_dma_addr; u32 mlli_len; }; diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index 7d6252d892d7..a84335328f37 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -20,10 +20,6 @@ #define template_skcipher template_u.skcipher -struct cc_cipher_handle { - struct list_head alg_list; -}; - struct cc_user_key_info { u8 *key; dma_addr_t key_dma_addr; @@ -184,7 +180,7 @@ static int cc_cipher_init(struct crypto_tfm *tfm) ctx_p->user.key); /* Map key buffer */ - ctx_p->user.key_dma_addr = dma_map_single(dev, (void *)ctx_p->user.key, + ctx_p->user.key_dma_addr = dma_map_single(dev, ctx_p->user.key, max_key_buf_size, DMA_TO_DEVICE); if (dma_mapping_error(dev, ctx_p->user.key_dma_addr)) { @@ -284,7 +280,7 @@ static int cc_cipher_sethkey(struct crypto_skcipher *sktfm, const u8 *key, dev_dbg(dev, "Setting HW key in context @%p for %s. keylen=%u\n", ctx_p, crypto_tfm_alg_name(tfm), keylen); - dump_byte_array("key", (u8 *)key, keylen); + dump_byte_array("key", key, keylen); /* STAT_PHASE_0: Init and sanity checks */ @@ -387,7 +383,7 @@ static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key, dev_dbg(dev, "Setting key in context @%p for %s. keylen=%u\n", ctx_p, crypto_tfm_alg_name(tfm), keylen); - dump_byte_array("key", (u8 *)key, keylen); + dump_byte_array("key", key, keylen); /* STAT_PHASE_0: Init and sanity checks */ @@ -533,14 +529,6 @@ static void cc_setup_state_desc(struct crypto_tfm *tfm, int flow_mode = ctx_p->flow_mode; int direction = req_ctx->gen_ctx.op_type; dma_addr_t iv_dma_addr = req_ctx->gen_ctx.iv_dma_addr; - unsigned int du_size = nbytes; - - struct cc_crypto_alg *cc_alg = - container_of(tfm->__crt_alg, struct cc_crypto_alg, - skcipher_alg.base); - - if (cc_alg->data_unit) - du_size = cc_alg->data_unit; switch (cipher_mode) { case DRV_CIPHER_ECB: @@ -753,7 +741,7 @@ static void cc_setup_mlli_desc(struct crypto_tfm *tfm, dev_dbg(dev, " bypass params addr %pad length 0x%X addr 0x%08X\n", &req_ctx->mlli_params.mlli_dma_addr, req_ctx->mlli_params.mlli_len, - (unsigned int)ctx_p->drvdata->mlli_sram_addr); + ctx_p->drvdata->mlli_sram_addr); hw_desc_init(&desc[*seq_size]); set_din_type(&desc[*seq_size], DMA_DLLI, req_ctx->mlli_params.mlli_dma_addr, @@ -801,16 +789,16 @@ static void cc_setup_flow_desc(struct crypto_tfm *tfm, req_ctx->in_mlli_nents, NS_BIT); if (req_ctx->out_nents == 0) { dev_dbg(dev, " din/dout params addr 0x%08X addr 0x%08X\n", - (unsigned int)ctx_p->drvdata->mlli_sram_addr, - (unsigned int)ctx_p->drvdata->mlli_sram_addr); + ctx_p->drvdata->mlli_sram_addr, + ctx_p->drvdata->mlli_sram_addr); set_dout_mlli(&desc[*seq_size], ctx_p->drvdata->mlli_sram_addr, req_ctx->in_mlli_nents, NS_BIT, (!last_desc ? 0 : 1)); } else { dev_dbg(dev, " din/dout params addr 0x%08X addr 0x%08X\n", - (unsigned int)ctx_p->drvdata->mlli_sram_addr, - (unsigned int)ctx_p->drvdata->mlli_sram_addr + + ctx_p->drvdata->mlli_sram_addr, + ctx_p->drvdata->mlli_sram_addr + (u32)LLI_ENTRY_BYTE_SIZE * req_ctx->in_nents); set_dout_mlli(&desc[*seq_size], (ctx_p->drvdata->mlli_sram_addr + @@ -871,7 +859,6 @@ static int cc_cipher_process(struct skcipher_request *req, /* STAT_PHASE_0: Init and sanity checks */ - /* TODO: check data length according to mode */ if (validate_data_size(ctx_p, nbytes)) { dev_dbg(dev, "Unsupported data size %d.\n", nbytes); rc = -EINVAL; @@ -893,8 +880,8 @@ static int cc_cipher_process(struct skcipher_request *req, } /* Setup request structure */ - cc_req.user_cb = (void *)cc_cipher_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_cipher_complete; + cc_req.user_arg = req; /* Setup CPP operation details */ if (ctx_p->key_type == CC_POLICY_PROTECTED_KEY) { @@ -1228,6 +1215,10 @@ static const struct cc_alg_template skcipher_algs[] = { .sec_func = true, }, { + /* See https://www.mail-archive.com/linux-crypto@vger.kernel.org/msg40576.html + * for the reason why this differs from the generic + * implementation. + */ .name = "xts(aes)", .driver_name = "xts-aes-ccree", .blocksize = 1, @@ -1423,7 +1414,7 @@ static const struct cc_alg_template skcipher_algs[] = { { .name = "ofb(aes)", .driver_name = "ofb-aes-ccree", - .blocksize = AES_BLOCK_SIZE, + .blocksize = 1, .template_skcipher = { .setkey = cc_cipher_setkey, .encrypt = cc_cipher_encrypt, @@ -1576,7 +1567,7 @@ static const struct cc_alg_template skcipher_algs[] = { { .name = "ctr(sm4)", .driver_name = "ctr-sm4-ccree", - .blocksize = SM4_BLOCK_SIZE, + .blocksize = 1, .template_skcipher = { .setkey = cc_cipher_setkey, .encrypt = cc_cipher_encrypt, @@ -1634,7 +1625,7 @@ static struct cc_crypto_alg *cc_create_alg(const struct cc_alg_template *tmpl, struct cc_crypto_alg *t_alg; struct skcipher_alg *alg; - t_alg = kzalloc(sizeof(*t_alg), GFP_KERNEL); + t_alg = devm_kzalloc(dev, sizeof(*t_alg), GFP_KERNEL); if (!t_alg) return ERR_PTR(-ENOMEM); @@ -1665,36 +1656,23 @@ static struct cc_crypto_alg *cc_create_alg(const struct cc_alg_template *tmpl, int cc_cipher_free(struct cc_drvdata *drvdata) { struct cc_crypto_alg *t_alg, *n; - struct cc_cipher_handle *cipher_handle = drvdata->cipher_handle; - - if (cipher_handle) { - /* Remove registered algs */ - list_for_each_entry_safe(t_alg, n, &cipher_handle->alg_list, - entry) { - crypto_unregister_skcipher(&t_alg->skcipher_alg); - list_del(&t_alg->entry); - kfree(t_alg); - } - kfree(cipher_handle); - drvdata->cipher_handle = NULL; + + /* Remove registered algs */ + list_for_each_entry_safe(t_alg, n, &drvdata->alg_list, entry) { + crypto_unregister_skcipher(&t_alg->skcipher_alg); + list_del(&t_alg->entry); } return 0; } int cc_cipher_alloc(struct cc_drvdata *drvdata) { - struct cc_cipher_handle *cipher_handle; struct cc_crypto_alg *t_alg; struct device *dev = drvdata_to_dev(drvdata); int rc = -ENOMEM; int alg; - cipher_handle = kmalloc(sizeof(*cipher_handle), GFP_KERNEL); - if (!cipher_handle) - return -ENOMEM; - - INIT_LIST_HEAD(&cipher_handle->alg_list); - drvdata->cipher_handle = cipher_handle; + INIT_LIST_HEAD(&drvdata->alg_list); /* Linux crypto */ dev_dbg(dev, "Number of algorithms = %zu\n", @@ -1723,14 +1701,12 @@ int cc_cipher_alloc(struct cc_drvdata *drvdata) if (rc) { dev_err(dev, "%s alg registration failed\n", t_alg->skcipher_alg.base.cra_driver_name); - kfree(t_alg); goto fail0; - } else { - list_add_tail(&t_alg->entry, - &cipher_handle->alg_list); - dev_dbg(dev, "Registered %s\n", - t_alg->skcipher_alg.base.cra_driver_name); } + + list_add_tail(&t_alg->entry, &drvdata->alg_list); + dev_dbg(dev, "Registered %s\n", + t_alg->skcipher_alg.base.cra_driver_name); } return 0; diff --git a/drivers/crypto/ccree/cc_debugfs.c b/drivers/crypto/ccree/cc_debugfs.c index 566999738698..c454afce7781 100644 --- a/drivers/crypto/ccree/cc_debugfs.c +++ b/drivers/crypto/ccree/cc_debugfs.c @@ -8,10 +8,6 @@ #include "cc_crypto_ctx.h" #include "cc_debugfs.h" -struct cc_debugfs_ctx { - struct dentry *dir; -}; - #define CC_DEBUG_REG(_X) { \ .name = __stringify(_X),\ .offset = CC_REG(_X) \ @@ -67,13 +63,8 @@ void __exit cc_debugfs_global_fini(void) int cc_debugfs_init(struct cc_drvdata *drvdata) { struct device *dev = drvdata_to_dev(drvdata); - struct cc_debugfs_ctx *ctx; struct debugfs_regset32 *regset, *verset; - ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; - regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL); if (!regset) return -ENOMEM; @@ -81,16 +72,18 @@ int cc_debugfs_init(struct cc_drvdata *drvdata) regset->regs = debug_regs; regset->nregs = ARRAY_SIZE(debug_regs); regset->base = drvdata->cc_base; + regset->dev = dev; - ctx->dir = debugfs_create_dir(drvdata->plat_dev->name, cc_debugfs_dir); + drvdata->dir = debugfs_create_dir(drvdata->plat_dev->name, + cc_debugfs_dir); - debugfs_create_regset32("regs", 0400, ctx->dir, regset); - debugfs_create_bool("coherent", 0400, ctx->dir, &drvdata->coherent); + debugfs_create_regset32("regs", 0400, drvdata->dir, regset); + debugfs_create_bool("coherent", 0400, drvdata->dir, &drvdata->coherent); verset = devm_kzalloc(dev, sizeof(*verset), GFP_KERNEL); /* Failing here is not important enough to fail the module load */ if (!verset) - goto out; + return 0; if (drvdata->hw_rev <= CC_HW_REV_712) { ver_sig_regs[0].offset = drvdata->sig_offset; @@ -102,17 +95,13 @@ int cc_debugfs_init(struct cc_drvdata *drvdata) verset->nregs = ARRAY_SIZE(pid_cid_regs); } verset->base = drvdata->cc_base; + verset->dev = dev; - debugfs_create_regset32("version", 0400, ctx->dir, verset); - -out: - drvdata->debugfs = ctx; + debugfs_create_regset32("version", 0400, drvdata->dir, verset); return 0; } void cc_debugfs_fini(struct cc_drvdata *drvdata) { - struct cc_debugfs_ctx *ctx = (struct cc_debugfs_ctx *)drvdata->debugfs; - - debugfs_remove_recursive(ctx->dir); + debugfs_remove_recursive(drvdata->dir); } diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index 532bc95a8373..2d50991b9a17 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -14,6 +14,8 @@ #include <linux/of.h> #include <linux/clk.h> #include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/pm_runtime.h> #include "cc_driver.h" #include "cc_request_mgr.h" @@ -134,7 +136,7 @@ static irqreturn_t cc_isr(int irq, void *dev_id) /* STAT_OP_TYPE_GENERIC STAT_PHASE_0: Interrupt */ /* if driver suspended return, probably shared interrupt */ - if (cc_pm_is_dev_suspended(dev)) + if (pm_runtime_suspended(dev)) return IRQ_NONE; /* read the interrupt status */ @@ -269,7 +271,6 @@ static int init_cc_resources(struct platform_device *plat_dev) u32 val, hw_rev_pidr, sig_cidr; u64 dma_mask; const struct cc_hw_data *hw_rev; - const struct of_device_id *dev_id; struct clk *clk; int irq; int rc = 0; @@ -278,11 +279,7 @@ static int init_cc_resources(struct platform_device *plat_dev) if (!new_drvdata) return -ENOMEM; - dev_id = of_match_node(arm_ccree_dev_of_match, np); - if (!dev_id) - return -ENODEV; - - hw_rev = (struct cc_hw_data *)dev_id->data; + hw_rev = of_device_get_match_data(dev); new_drvdata->hw_rev_name = hw_rev->name; new_drvdata->hw_rev = hw_rev->rev; new_drvdata->std_bodies = hw_rev->std_bodies; @@ -302,22 +299,12 @@ static int init_cc_resources(struct platform_device *plat_dev) platform_set_drvdata(plat_dev, new_drvdata); new_drvdata->plat_dev = plat_dev; - clk = devm_clk_get(dev, NULL); - if (IS_ERR(clk)) - switch (PTR_ERR(clk)) { - /* Clock is optional so this might be fine */ - case -ENOENT: - break; - - /* Clock not available, let's try again soon */ - case -EPROBE_DEFER: - return -EPROBE_DEFER; - - default: - dev_err(dev, "Error getting clock: %ld\n", - PTR_ERR(clk)); - return PTR_ERR(clk); - } + clk = devm_clk_get_optional(dev, NULL); + if (IS_ERR(clk)) { + if (PTR_ERR(clk) != -EPROBE_DEFER) + dev_err(dev, "Error getting clock: %pe\n", clk); + return PTR_ERR(clk); + } new_drvdata->clk = clk; new_drvdata->coherent = of_dma_is_coherent(np); @@ -344,13 +331,13 @@ static int init_cc_resources(struct platform_device *plat_dev) init_completion(&new_drvdata->hw_queue_avail); - if (!plat_dev->dev.dma_mask) - plat_dev->dev.dma_mask = &plat_dev->dev.coherent_dma_mask; + if (!dev->dma_mask) + dev->dma_mask = &dev->coherent_dma_mask; dma_mask = DMA_BIT_MASK(DMA_BIT_MASK_LEN); while (dma_mask > 0x7fffffffUL) { - if (dma_supported(&plat_dev->dev, dma_mask)) { - rc = dma_set_coherent_mask(&plat_dev->dev, dma_mask); + if (dma_supported(dev, dma_mask)) { + rc = dma_set_coherent_mask(dev, dma_mask); if (!rc) break; } @@ -362,7 +349,7 @@ static int init_cc_resources(struct platform_device *plat_dev) return rc; } - rc = cc_clk_on(new_drvdata); + rc = clk_prepare_enable(new_drvdata->clk); if (rc) { dev_err(dev, "Failed to enable clock"); return rc; @@ -370,7 +357,17 @@ static int init_cc_resources(struct platform_device *plat_dev) new_drvdata->sec_disabled = cc_sec_disable; - /* wait for Crytpcell reset completion */ + pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + rc = pm_runtime_get_sync(dev); + if (rc < 0) { + dev_err(dev, "pm_runtime_get_sync() failed: %d\n", rc); + goto post_pm_err; + } + + /* Wait for Cryptocell reset completion */ if (!cc_wait_for_reset_completion(new_drvdata)) { dev_err(dev, "Cryptocell reset not completed"); } @@ -382,7 +379,7 @@ static int init_cc_resources(struct platform_device *plat_dev) dev_err(dev, "Invalid CC signature: SIGNATURE=0x%08X != expected=0x%08X\n", val, hw_rev->sig); rc = -EINVAL; - goto post_clk_err; + goto post_pm_err; } sig_cidr = val; hw_rev_pidr = cc_ioread(new_drvdata, new_drvdata->ver_offset); @@ -393,7 +390,7 @@ static int init_cc_resources(struct platform_device *plat_dev) dev_err(dev, "Invalid CC PIDR: PIDR0124=0x%08X != expected=0x%08X\n", val, hw_rev->pidr_0124); rc = -EINVAL; - goto post_clk_err; + goto post_pm_err; } hw_rev_pidr = val; @@ -402,7 +399,7 @@ static int init_cc_resources(struct platform_device *plat_dev) dev_err(dev, "Invalid CC CIDR: CIDR0123=0x%08X != expected=0x%08X\n", val, hw_rev->cidr_0123); rc = -EINVAL; - goto post_clk_err; + goto post_pm_err; } sig_cidr = val; @@ -421,7 +418,7 @@ static int init_cc_resources(struct platform_device *plat_dev) default: dev_err(dev, "Unsupported engines configuration.\n"); rc = -EINVAL; - goto post_clk_err; + goto post_pm_err; } /* Check security disable state */ @@ -447,14 +444,14 @@ static int init_cc_resources(struct platform_device *plat_dev) new_drvdata); if (rc) { dev_err(dev, "Could not register to interrupt %d\n", irq); - goto post_clk_err; + goto post_pm_err; } dev_dbg(dev, "Registered to IRQ: %d\n", irq); rc = init_cc_regs(new_drvdata, true); if (rc) { dev_err(dev, "init_cc_regs failed\n"); - goto post_clk_err; + goto post_pm_err; } rc = cc_debugfs_init(new_drvdata); @@ -477,15 +474,14 @@ static int init_cc_resources(struct platform_device *plat_dev) new_drvdata->mlli_sram_addr = cc_sram_alloc(new_drvdata, MAX_MLLI_BUFF_SIZE); if (new_drvdata->mlli_sram_addr == NULL_SRAM_ADDR) { - dev_err(dev, "Failed to alloc MLLI Sram buffer\n"); rc = -ENOMEM; - goto post_sram_mgr_err; + goto post_fips_init_err; } rc = cc_req_mgr_init(new_drvdata); if (rc) { dev_err(dev, "cc_req_mgr_init failed\n"); - goto post_sram_mgr_err; + goto post_fips_init_err; } rc = cc_buffer_mgr_init(new_drvdata); @@ -494,12 +490,6 @@ static int init_cc_resources(struct platform_device *plat_dev) goto post_req_mgr_err; } - rc = cc_pm_init(new_drvdata); - if (rc) { - dev_err(dev, "cc_pm_init failed\n"); - goto post_buf_mgr_err; - } - /* Allocate crypto algs */ rc = cc_cipher_alloc(new_drvdata); if (rc) { @@ -520,15 +510,13 @@ static int init_cc_resources(struct platform_device *plat_dev) goto post_hash_err; } - /* All set, we can allow autosuspend */ - cc_pm_go(new_drvdata); - /* If we got here and FIPS mode is enabled * it means all FIPS test passed, so let TEE * know we're good. */ cc_set_ree_fips_status(new_drvdata, true); + pm_runtime_put(dev); return 0; post_hash_err: @@ -539,16 +527,17 @@ post_buf_mgr_err: cc_buffer_mgr_fini(new_drvdata); post_req_mgr_err: cc_req_mgr_fini(new_drvdata); -post_sram_mgr_err: - cc_sram_mgr_fini(new_drvdata); post_fips_init_err: cc_fips_fini(new_drvdata); post_debugfs_err: cc_debugfs_fini(new_drvdata); post_regs_err: fini_cc_regs(new_drvdata); -post_clk_err: - cc_clk_off(new_drvdata); +post_pm_err: + pm_runtime_put_noidle(dev); + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + clk_disable_unprepare(new_drvdata->clk); return rc; } @@ -560,36 +549,22 @@ void fini_cc_regs(struct cc_drvdata *drvdata) static void cleanup_cc_resources(struct platform_device *plat_dev) { + struct device *dev = &plat_dev->dev; struct cc_drvdata *drvdata = (struct cc_drvdata *)platform_get_drvdata(plat_dev); cc_aead_free(drvdata); cc_hash_free(drvdata); cc_cipher_free(drvdata); - cc_pm_fini(drvdata); cc_buffer_mgr_fini(drvdata); cc_req_mgr_fini(drvdata); - cc_sram_mgr_fini(drvdata); cc_fips_fini(drvdata); cc_debugfs_fini(drvdata); fini_cc_regs(drvdata); - cc_clk_off(drvdata); -} - -int cc_clk_on(struct cc_drvdata *drvdata) -{ - struct clk *clk = drvdata->clk; - int rc; - - if (IS_ERR(clk)) - /* Not all devices have a clock associated with CCREE */ - return 0; - - rc = clk_prepare_enable(clk); - if (rc) - return rc; - - return 0; + pm_runtime_put_noidle(dev); + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + clk_disable_unprepare(drvdata->clk); } unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata) @@ -600,17 +575,6 @@ unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata) return HASH_LEN_SIZE_630; } -void cc_clk_off(struct cc_drvdata *drvdata) -{ - struct clk *clk = drvdata->clk; - - if (IS_ERR(clk)) - /* Not all devices have a clock associated with CCREE */ - return; - - clk_disable_unprepare(clk); -} - static int ccree_probe(struct platform_device *plat_dev) { int rc; @@ -653,7 +617,6 @@ static struct platform_driver ccree_driver = { static int __init ccree_init(void) { - cc_hash_global_init(); cc_debugfs_global_init(); return platform_driver_register(&ccree_driver); diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index c227718ba992..d938886390d2 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -26,7 +26,6 @@ #include <linux/clk.h> #include <linux/platform_device.h> -/* Registers definitions from shared/hw/ree_include */ #include "cc_host_regs.h" #include "cc_crypto_ctx.h" #include "cc_hw_queue_defs.h" @@ -71,9 +70,7 @@ enum cc_std_body { #define CC_NVM_IS_IDLE_MASK BIT(CC_NVM_IS_IDLE_VALUE_BIT_SHIFT) -#define AXIM_MON_COMP_VALUE GENMASK(CC_AXIM_MON_COMP_VALUE_BIT_SIZE + \ - CC_AXIM_MON_COMP_VALUE_BIT_SHIFT, \ - CC_AXIM_MON_COMP_VALUE_BIT_SHIFT) +#define AXIM_MON_COMP_VALUE CC_GENMASK(CC_AXIM_MON_COMP_VALUE) #define CC_CPP_AES_ABORT_MASK ( \ BIT(CC_HOST_IMR_REE_OP_ABORTED_AES_0_MASK_BIT_SHIFT) | \ @@ -139,15 +136,15 @@ struct cc_drvdata { int irq; struct completion hw_queue_avail; /* wait for HW queue availability */ struct platform_device *plat_dev; - cc_sram_addr_t mlli_sram_addr; - void *buff_mgr_handle; - void *cipher_handle; + u32 mlli_sram_addr; + struct dma_pool *mlli_buffs_pool; + struct list_head alg_list; void *hash_handle; void *aead_handle; void *request_mgr_handle; void *fips_handle; - void *sram_mgr_handle; - void *debugfs; + u32 sram_free_offset; /* offset to non-allocated area in SRAM */ + struct dentry *dir; /* for debugfs */ struct clk *clk; bool coherent; char *hw_rev_name; @@ -158,7 +155,6 @@ struct cc_drvdata { int std_bodies; bool sec_disabled; u32 comp_mask; - bool pm_on; }; struct cc_crypto_alg { @@ -212,8 +208,6 @@ static inline void dump_byte_array(const char *name, const u8 *the_array, bool cc_wait_for_reset_completion(struct cc_drvdata *drvdata); int init_cc_regs(struct cc_drvdata *drvdata, bool is_probe); void fini_cc_regs(struct cc_drvdata *drvdata); -int cc_clk_on(struct cc_drvdata *drvdata); -void cc_clk_off(struct cc_drvdata *drvdata); unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata); static inline void cc_iowrite(struct cc_drvdata *drvdata, u32 reg, u32 val) diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index 912e5ce5079d..d5310783af15 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -20,8 +20,8 @@ #define CC_SM3_HASH_LEN_SIZE 8 struct cc_hash_handle { - cc_sram_addr_t digest_len_sram_addr; /* const value in SRAM*/ - cc_sram_addr_t larval_digest_sram_addr; /* const value in SRAM */ + u32 digest_len_sram_addr; /* const value in SRAM*/ + u32 larval_digest_sram_addr; /* const value in SRAM */ struct list_head hash_list; }; @@ -39,12 +39,19 @@ static const u32 cc_sha256_init[] = { SHA256_H3, SHA256_H2, SHA256_H1, SHA256_H0 }; static const u32 cc_digest_len_sha512_init[] = { 0x00000080, 0x00000000, 0x00000000, 0x00000000 }; -static u64 cc_sha384_init[] = { - SHA384_H7, SHA384_H6, SHA384_H5, SHA384_H4, - SHA384_H3, SHA384_H2, SHA384_H1, SHA384_H0 }; -static u64 cc_sha512_init[] = { - SHA512_H7, SHA512_H6, SHA512_H5, SHA512_H4, - SHA512_H3, SHA512_H2, SHA512_H1, SHA512_H0 }; + +/* + * Due to the way the HW works, every double word in the SHA384 and SHA512 + * larval hashes must be stored in hi/lo order + */ +#define hilo(x) upper_32_bits(x), lower_32_bits(x) +static const u32 cc_sha384_init[] = { + hilo(SHA384_H7), hilo(SHA384_H6), hilo(SHA384_H5), hilo(SHA384_H4), + hilo(SHA384_H3), hilo(SHA384_H2), hilo(SHA384_H1), hilo(SHA384_H0) }; +static const u32 cc_sha512_init[] = { + hilo(SHA512_H7), hilo(SHA512_H6), hilo(SHA512_H5), hilo(SHA512_H4), + hilo(SHA512_H3), hilo(SHA512_H2), hilo(SHA512_H1), hilo(SHA512_H0) }; + static const u32 cc_sm3_init[] = { SM3_IVH, SM3_IVG, SM3_IVF, SM3_IVE, SM3_IVD, SM3_IVC, SM3_IVB, SM3_IVA }; @@ -342,7 +349,6 @@ static int cc_fin_result(struct cc_hw_desc *desc, struct ahash_request *req, /* Get final MAC result */ hw_desc_init(&desc[idx]); set_hash_cipher_mode(&desc[idx], ctx->hw_mode, ctx->hash_mode); - /* TODO */ set_dout_dlli(&desc[idx], state->digest_result_dma_addr, digestsize, NS_BIT, 1); set_queue_last_ind(ctx->drvdata, &desc[idx]); @@ -422,8 +428,7 @@ static int cc_hash_digest(struct ahash_request *req) bool is_hmac = ctx->is_hmac; struct cc_crypto_req cc_req = {}; struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN]; - cc_sram_addr_t larval_digest_addr = - cc_larval_digest_addr(ctx->drvdata, ctx->hash_mode); + u32 larval_digest_addr; int idx = 0; int rc = 0; gfp_t flags = cc_gfp_flags(&req->base); @@ -465,6 +470,8 @@ static int cc_hash_digest(struct ahash_request *req) set_din_type(&desc[idx], DMA_DLLI, state->digest_buff_dma_addr, ctx->inter_digestsize, NS_BIT); } else { + larval_digest_addr = cc_larval_digest_addr(ctx->drvdata, + ctx->hash_mode); set_din_sram(&desc[idx], larval_digest_addr, ctx->inter_digestsize); } @@ -726,7 +733,7 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key, int digestsize = 0; int i, idx = 0, rc = 0; struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN]; - cc_sram_addr_t larval_addr; + u32 larval_addr; struct device *dev; ctx = crypto_ahash_ctx(ahash); @@ -752,7 +759,7 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key, return -ENOMEM; ctx->key_params.key_dma_addr = - dma_map_single(dev, (void *)ctx->key_params.key, keylen, + dma_map_single(dev, ctx->key_params.key, keylen, DMA_TO_DEVICE); if (dma_mapping_error(dev, ctx->key_params.key_dma_addr)) { dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n", @@ -1067,8 +1074,8 @@ static int cc_alloc_ctx(struct cc_hash_ctx *ctx) ctx->key_params.keylen = 0; ctx->digest_buff_dma_addr = - dma_map_single(dev, (void *)ctx->digest_buff, - sizeof(ctx->digest_buff), DMA_BIDIRECTIONAL); + dma_map_single(dev, ctx->digest_buff, sizeof(ctx->digest_buff), + DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, ctx->digest_buff_dma_addr)) { dev_err(dev, "Mapping digest len %zu B at va=%pK for DMA failed\n", sizeof(ctx->digest_buff), ctx->digest_buff); @@ -1079,7 +1086,7 @@ static int cc_alloc_ctx(struct cc_hash_ctx *ctx) &ctx->digest_buff_dma_addr); ctx->opad_tmp_keys_dma_addr = - dma_map_single(dev, (void *)ctx->opad_tmp_keys_buff, + dma_map_single(dev, ctx->opad_tmp_keys_buff, sizeof(ctx->opad_tmp_keys_buff), DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, ctx->opad_tmp_keys_dma_addr)) { @@ -1196,8 +1203,8 @@ static int cc_mac_update(struct ahash_request *req) idx++; /* Setup request structure */ - cc_req.user_cb = (void *)cc_update_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_update_complete; + cc_req.user_arg = req; rc = cc_send_request(ctx->drvdata, &cc_req, desc, idx, &req->base); if (rc != -EINPROGRESS && rc != -EBUSY) { @@ -1254,8 +1261,8 @@ static int cc_mac_final(struct ahash_request *req) } /* Setup request structure */ - cc_req.user_cb = (void *)cc_hash_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_hash_complete; + cc_req.user_arg = req; if (state->xcbc_count && rem_cnt == 0) { /* Load key for ECB decryption */ @@ -1311,7 +1318,6 @@ static int cc_mac_final(struct ahash_request *req) /* Get final MAC result */ hw_desc_init(&desc[idx]); - /* TODO */ set_dout_dlli(&desc[idx], state->digest_result_dma_addr, digestsize, NS_BIT, 1); set_queue_last_ind(ctx->drvdata, &desc[idx]); @@ -1369,8 +1375,8 @@ static int cc_mac_finup(struct ahash_request *req) } /* Setup request structure */ - cc_req.user_cb = (void *)cc_hash_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_hash_complete; + cc_req.user_arg = req; if (ctx->hw_mode == DRV_CIPHER_XCBC_MAC) { key_len = CC_AES_128_BIT_KEY_SIZE; @@ -1393,7 +1399,6 @@ static int cc_mac_finup(struct ahash_request *req) /* Get final MAC result */ hw_desc_init(&desc[idx]); - /* TODO */ set_dout_dlli(&desc[idx], state->digest_result_dma_addr, digestsize, NS_BIT, 1); set_queue_last_ind(ctx->drvdata, &desc[idx]); @@ -1448,8 +1453,8 @@ static int cc_mac_digest(struct ahash_request *req) } /* Setup request structure */ - cc_req.user_cb = (void *)cc_digest_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_digest_complete; + cc_req.user_arg = req; if (ctx->hw_mode == DRV_CIPHER_XCBC_MAC) { key_len = CC_AES_128_BIT_KEY_SIZE; @@ -1820,7 +1825,7 @@ static struct cc_hash_alg *cc_alloc_hash_alg(struct cc_hash_template *template, struct crypto_alg *alg; struct ahash_alg *halg; - t_crypto_alg = kzalloc(sizeof(*t_crypto_alg), GFP_KERNEL); + t_crypto_alg = devm_kzalloc(dev, sizeof(*t_crypto_alg), GFP_KERNEL); if (!t_crypto_alg) return ERR_PTR(-ENOMEM); @@ -1857,104 +1862,85 @@ static struct cc_hash_alg *cc_alloc_hash_alg(struct cc_hash_template *template, return t_crypto_alg; } +static int cc_init_copy_sram(struct cc_drvdata *drvdata, const u32 *data, + unsigned int size, u32 *sram_buff_ofs) +{ + struct cc_hw_desc larval_seq[CC_DIGEST_SIZE_MAX / sizeof(u32)]; + unsigned int larval_seq_len = 0; + int rc; + + cc_set_sram_desc(data, *sram_buff_ofs, size / sizeof(*data), + larval_seq, &larval_seq_len); + rc = send_request_init(drvdata, larval_seq, larval_seq_len); + if (rc) + return rc; + + *sram_buff_ofs += size; + return 0; +} + int cc_init_hash_sram(struct cc_drvdata *drvdata) { struct cc_hash_handle *hash_handle = drvdata->hash_handle; - cc_sram_addr_t sram_buff_ofs = hash_handle->digest_len_sram_addr; - unsigned int larval_seq_len = 0; - struct cc_hw_desc larval_seq[CC_DIGEST_SIZE_MAX / sizeof(u32)]; + u32 sram_buff_ofs = hash_handle->digest_len_sram_addr; bool large_sha_supported = (drvdata->hw_rev >= CC_HW_REV_712); bool sm3_supported = (drvdata->hw_rev >= CC_HW_REV_713); int rc = 0; /* Copy-to-sram digest-len */ - cc_set_sram_desc(cc_digest_len_init, sram_buff_ofs, - ARRAY_SIZE(cc_digest_len_init), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_digest_len_init, + sizeof(cc_digest_len_init), &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_digest_len_init); - larval_seq_len = 0; - if (large_sha_supported) { /* Copy-to-sram digest-len for sha384/512 */ - cc_set_sram_desc(cc_digest_len_sha512_init, sram_buff_ofs, - ARRAY_SIZE(cc_digest_len_sha512_init), - larval_seq, &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_digest_len_sha512_init, + sizeof(cc_digest_len_sha512_init), + &sram_buff_ofs); if (rc) goto init_digest_const_err; - - sram_buff_ofs += sizeof(cc_digest_len_sha512_init); - larval_seq_len = 0; } /* The initial digests offset */ hash_handle->larval_digest_sram_addr = sram_buff_ofs; /* Copy-to-sram initial SHA* digests */ - cc_set_sram_desc(cc_md5_init, sram_buff_ofs, ARRAY_SIZE(cc_md5_init), - larval_seq, &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_md5_init, sizeof(cc_md5_init), + &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_md5_init); - larval_seq_len = 0; - cc_set_sram_desc(cc_sha1_init, sram_buff_ofs, - ARRAY_SIZE(cc_sha1_init), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sha1_init, sizeof(cc_sha1_init), + &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_sha1_init); - larval_seq_len = 0; - cc_set_sram_desc(cc_sha224_init, sram_buff_ofs, - ARRAY_SIZE(cc_sha224_init), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sha224_init, sizeof(cc_sha224_init), + &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_sha224_init); - larval_seq_len = 0; - cc_set_sram_desc(cc_sha256_init, sram_buff_ofs, - ARRAY_SIZE(cc_sha256_init), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sha256_init, sizeof(cc_sha256_init), + &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_sha256_init); - larval_seq_len = 0; if (sm3_supported) { - cc_set_sram_desc(cc_sm3_init, sram_buff_ofs, - ARRAY_SIZE(cc_sm3_init), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sm3_init, + sizeof(cc_sm3_init), &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_sm3_init); - larval_seq_len = 0; } if (large_sha_supported) { - cc_set_sram_desc((u32 *)cc_sha384_init, sram_buff_ofs, - (ARRAY_SIZE(cc_sha384_init) * 2), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sha384_init, + sizeof(cc_sha384_init), &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_sha384_init); - larval_seq_len = 0; - cc_set_sram_desc((u32 *)cc_sha512_init, sram_buff_ofs, - (ARRAY_SIZE(cc_sha512_init) * 2), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sha512_init, + sizeof(cc_sha512_init), &sram_buff_ofs); if (rc) goto init_digest_const_err; } @@ -1963,38 +1949,16 @@ init_digest_const_err: return rc; } -static void __init cc_swap_dwords(u32 *buf, unsigned long size) -{ - int i; - u32 tmp; - - for (i = 0; i < size; i += 2) { - tmp = buf[i]; - buf[i] = buf[i + 1]; - buf[i + 1] = tmp; - } -} - -/* - * Due to the way the HW works we need to swap every - * double word in the SHA384 and SHA512 larval hashes - */ -void __init cc_hash_global_init(void) -{ - cc_swap_dwords((u32 *)&cc_sha384_init, (ARRAY_SIZE(cc_sha384_init) * 2)); - cc_swap_dwords((u32 *)&cc_sha512_init, (ARRAY_SIZE(cc_sha512_init) * 2)); -} - int cc_hash_alloc(struct cc_drvdata *drvdata) { struct cc_hash_handle *hash_handle; - cc_sram_addr_t sram_buff; + u32 sram_buff; u32 sram_size_to_alloc; struct device *dev = drvdata_to_dev(drvdata); int rc = 0; int alg; - hash_handle = kzalloc(sizeof(*hash_handle), GFP_KERNEL); + hash_handle = devm_kzalloc(dev, sizeof(*hash_handle), GFP_KERNEL); if (!hash_handle) return -ENOMEM; @@ -2016,7 +1980,6 @@ int cc_hash_alloc(struct cc_drvdata *drvdata) sram_buff = cc_sram_alloc(drvdata, sram_size_to_alloc); if (sram_buff == NULL_SRAM_ADDR) { - dev_err(dev, "SRAM pool exhausted\n"); rc = -ENOMEM; goto fail; } @@ -2056,12 +2019,10 @@ int cc_hash_alloc(struct cc_drvdata *drvdata) if (rc) { dev_err(dev, "%s alg registration failed\n", driver_hash[alg].driver_name); - kfree(t_alg); goto fail; - } else { - list_add_tail(&t_alg->entry, - &hash_handle->hash_list); } + + list_add_tail(&t_alg->entry, &hash_handle->hash_list); } if (hw_mode == DRV_CIPHER_XCBC_MAC || hw_mode == DRV_CIPHER_CMAC) @@ -2081,18 +2042,16 @@ int cc_hash_alloc(struct cc_drvdata *drvdata) if (rc) { dev_err(dev, "%s alg registration failed\n", driver_hash[alg].driver_name); - kfree(t_alg); goto fail; - } else { - list_add_tail(&t_alg->entry, &hash_handle->hash_list); } + + list_add_tail(&t_alg->entry, &hash_handle->hash_list); } return 0; fail: - kfree(drvdata->hash_handle); - drvdata->hash_handle = NULL; + cc_hash_free(drvdata); return rc; } @@ -2101,17 +2060,12 @@ int cc_hash_free(struct cc_drvdata *drvdata) struct cc_hash_alg *t_hash_alg, *hash_n; struct cc_hash_handle *hash_handle = drvdata->hash_handle; - if (hash_handle) { - list_for_each_entry_safe(t_hash_alg, hash_n, - &hash_handle->hash_list, entry) { - crypto_unregister_ahash(&t_hash_alg->ahash_alg); - list_del(&t_hash_alg->entry); - kfree(t_hash_alg); - } - - kfree(hash_handle); - drvdata->hash_handle = NULL; + list_for_each_entry_safe(t_hash_alg, hash_n, &hash_handle->hash_list, + entry) { + crypto_unregister_ahash(&t_hash_alg->ahash_alg); + list_del(&t_hash_alg->entry); } + return 0; } @@ -2272,22 +2226,23 @@ static const void *cc_larval_digest(struct device *dev, u32 mode) } } -/*! - * Gets the address of the initial digest in SRAM +/** + * cc_larval_digest_addr() - Get the address of the initial digest in SRAM * according to the given hash mode * - * \param drvdata - * \param mode The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256 + * @drvdata: Associated device driver context + * @mode: The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256 * - * \return u32 The address of the initial digest in SRAM + * Return: + * The address of the initial digest in SRAM */ -cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode) +u32 cc_larval_digest_addr(void *drvdata, u32 mode) { struct cc_drvdata *_drvdata = (struct cc_drvdata *)drvdata; struct cc_hash_handle *hash_handle = _drvdata->hash_handle; struct device *dev = drvdata_to_dev(_drvdata); bool sm3_supported = (_drvdata->hw_rev >= CC_HW_REV_713); - cc_sram_addr_t addr; + u32 addr; switch (mode) { case DRV_HASH_NULL: @@ -2339,12 +2294,11 @@ cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode) return hash_handle->larval_digest_sram_addr; } -cc_sram_addr_t -cc_digest_len_addr(void *drvdata, u32 mode) +u32 cc_digest_len_addr(void *drvdata, u32 mode) { struct cc_drvdata *_drvdata = (struct cc_drvdata *)drvdata; struct cc_hash_handle *hash_handle = _drvdata->hash_handle; - cc_sram_addr_t digest_len_addr = hash_handle->digest_len_sram_addr; + u32 digest_len_addr = hash_handle->digest_len_sram_addr; switch (mode) { case DRV_HASH_SHA1: diff --git a/drivers/crypto/ccree/cc_hash.h b/drivers/crypto/ccree/cc_hash.h index 0d6dc61484d7..3d0f2179e07e 100644 --- a/drivers/crypto/ccree/cc_hash.h +++ b/drivers/crypto/ccree/cc_hash.h @@ -80,30 +80,27 @@ int cc_hash_alloc(struct cc_drvdata *drvdata); int cc_init_hash_sram(struct cc_drvdata *drvdata); int cc_hash_free(struct cc_drvdata *drvdata); -/*! - * Gets the initial digest length +/** + * cc_digest_len_addr() - Gets the initial digest length * - * \param drvdata - * \param mode The Hash mode. Supported modes: - * MD5/SHA1/SHA224/SHA256/SHA384/SHA512 + * @drvdata: Associated device driver context + * @mode: The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256/SHA384/SHA512 * - * \return u32 returns the address of the initial digest length in SRAM + * Return: + * Returns the address of the initial digest length in SRAM */ -cc_sram_addr_t -cc_digest_len_addr(void *drvdata, u32 mode); +u32 cc_digest_len_addr(void *drvdata, u32 mode); -/*! - * Gets the address of the initial digest in SRAM +/** + * cc_larval_digest_addr() - Gets the address of the initial digest in SRAM * according to the given hash mode * - * \param drvdata - * \param mode The Hash mode. Supported modes: - * MD5/SHA1/SHA224/SHA256/SHA384/SHA512 + * @drvdata: Associated device driver context + * @mode: The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256/SHA384/SHA512 * - * \return u32 The address of the initial digest in SRAM + * Return: + * The address of the initial digest in SRAM */ -cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode); - -void cc_hash_global_init(void); +u32 cc_larval_digest_addr(void *drvdata, u32 mode); #endif /*__CC_HASH_H__*/ diff --git a/drivers/crypto/ccree/cc_hw_queue_defs.h b/drivers/crypto/ccree/cc_hw_queue_defs.h index 9f4db9956e91..15df58c66911 100644 --- a/drivers/crypto/ccree/cc_hw_queue_defs.h +++ b/drivers/crypto/ccree/cc_hw_queue_defs.h @@ -17,46 +17,43 @@ /* Define max. available slots in HW queue */ #define HW_QUEUE_SLOTS_MAX 15 -#define CC_REG_LOW(word, name) \ - (CC_DSCRPTR_QUEUE_WORD ## word ## _ ## name ## _BIT_SHIFT) - -#define CC_REG_HIGH(word, name) \ - (CC_REG_LOW(word, name) + \ - CC_DSCRPTR_QUEUE_WORD ## word ## _ ## name ## _BIT_SIZE - 1) - -#define CC_GENMASK(word, name) \ - GENMASK(CC_REG_HIGH(word, name), CC_REG_LOW(word, name)) - -#define WORD0_VALUE CC_GENMASK(0, VALUE) -#define WORD0_CPP_CIPHER_MODE CC_GENMASK(0, CPP_CIPHER_MODE) -#define WORD1_DIN_CONST_VALUE CC_GENMASK(1, DIN_CONST_VALUE) -#define WORD1_DIN_DMA_MODE CC_GENMASK(1, DIN_DMA_MODE) -#define WORD1_DIN_SIZE CC_GENMASK(1, DIN_SIZE) -#define WORD1_NOT_LAST CC_GENMASK(1, NOT_LAST) -#define WORD1_NS_BIT CC_GENMASK(1, NS_BIT) -#define WORD1_LOCK_QUEUE CC_GENMASK(1, LOCK_QUEUE) -#define WORD2_VALUE CC_GENMASK(2, VALUE) -#define WORD3_DOUT_DMA_MODE CC_GENMASK(3, DOUT_DMA_MODE) -#define WORD3_DOUT_LAST_IND CC_GENMASK(3, DOUT_LAST_IND) -#define WORD3_DOUT_SIZE CC_GENMASK(3, DOUT_SIZE) -#define WORD3_HASH_XOR_BIT CC_GENMASK(3, HASH_XOR_BIT) -#define WORD3_NS_BIT CC_GENMASK(3, NS_BIT) -#define WORD3_QUEUE_LAST_IND CC_GENMASK(3, QUEUE_LAST_IND) -#define WORD4_ACK_NEEDED CC_GENMASK(4, ACK_NEEDED) -#define WORD4_AES_SEL_N_HASH CC_GENMASK(4, AES_SEL_N_HASH) -#define WORD4_AES_XOR_CRYPTO_KEY CC_GENMASK(4, AES_XOR_CRYPTO_KEY) -#define WORD4_BYTES_SWAP CC_GENMASK(4, BYTES_SWAP) -#define WORD4_CIPHER_CONF0 CC_GENMASK(4, CIPHER_CONF0) -#define WORD4_CIPHER_CONF1 CC_GENMASK(4, CIPHER_CONF1) -#define WORD4_CIPHER_CONF2 CC_GENMASK(4, CIPHER_CONF2) -#define WORD4_CIPHER_DO CC_GENMASK(4, CIPHER_DO) -#define WORD4_CIPHER_MODE CC_GENMASK(4, CIPHER_MODE) -#define WORD4_CMAC_SIZE0 CC_GENMASK(4, CMAC_SIZE0) -#define WORD4_DATA_FLOW_MODE CC_GENMASK(4, DATA_FLOW_MODE) -#define WORD4_KEY_SIZE CC_GENMASK(4, KEY_SIZE) -#define WORD4_SETUP_OPERATION CC_GENMASK(4, SETUP_OPERATION) -#define WORD5_DIN_ADDR_HIGH CC_GENMASK(5, DIN_ADDR_HIGH) -#define WORD5_DOUT_ADDR_HIGH CC_GENMASK(5, DOUT_ADDR_HIGH) +#define CC_REG_LOW(name) (name ## _BIT_SHIFT) +#define CC_REG_HIGH(name) (CC_REG_LOW(name) + name ## _BIT_SIZE - 1) +#define CC_GENMASK(name) GENMASK(CC_REG_HIGH(name), CC_REG_LOW(name)) + +#define CC_HWQ_GENMASK(word, field) \ + CC_GENMASK(CC_DSCRPTR_QUEUE_WORD ## word ## _ ## field) + +#define WORD0_VALUE CC_HWQ_GENMASK(0, VALUE) +#define WORD0_CPP_CIPHER_MODE CC_HWQ_GENMASK(0, CPP_CIPHER_MODE) +#define WORD1_DIN_CONST_VALUE CC_HWQ_GENMASK(1, DIN_CONST_VALUE) +#define WORD1_DIN_DMA_MODE CC_HWQ_GENMASK(1, DIN_DMA_MODE) +#define WORD1_DIN_SIZE CC_HWQ_GENMASK(1, DIN_SIZE) +#define WORD1_NOT_LAST CC_HWQ_GENMASK(1, NOT_LAST) +#define WORD1_NS_BIT CC_HWQ_GENMASK(1, NS_BIT) +#define WORD1_LOCK_QUEUE CC_HWQ_GENMASK(1, LOCK_QUEUE) +#define WORD2_VALUE CC_HWQ_GENMASK(2, VALUE) +#define WORD3_DOUT_DMA_MODE CC_HWQ_GENMASK(3, DOUT_DMA_MODE) +#define WORD3_DOUT_LAST_IND CC_HWQ_GENMASK(3, DOUT_LAST_IND) +#define WORD3_DOUT_SIZE CC_HWQ_GENMASK(3, DOUT_SIZE) +#define WORD3_HASH_XOR_BIT CC_HWQ_GENMASK(3, HASH_XOR_BIT) +#define WORD3_NS_BIT CC_HWQ_GENMASK(3, NS_BIT) +#define WORD3_QUEUE_LAST_IND CC_HWQ_GENMASK(3, QUEUE_LAST_IND) +#define WORD4_ACK_NEEDED CC_HWQ_GENMASK(4, ACK_NEEDED) +#define WORD4_AES_SEL_N_HASH CC_HWQ_GENMASK(4, AES_SEL_N_HASH) +#define WORD4_AES_XOR_CRYPTO_KEY CC_HWQ_GENMASK(4, AES_XOR_CRYPTO_KEY) +#define WORD4_BYTES_SWAP CC_HWQ_GENMASK(4, BYTES_SWAP) +#define WORD4_CIPHER_CONF0 CC_HWQ_GENMASK(4, CIPHER_CONF0) +#define WORD4_CIPHER_CONF1 CC_HWQ_GENMASK(4, CIPHER_CONF1) +#define WORD4_CIPHER_CONF2 CC_HWQ_GENMASK(4, CIPHER_CONF2) +#define WORD4_CIPHER_DO CC_HWQ_GENMASK(4, CIPHER_DO) +#define WORD4_CIPHER_MODE CC_HWQ_GENMASK(4, CIPHER_MODE) +#define WORD4_CMAC_SIZE0 CC_HWQ_GENMASK(4, CMAC_SIZE0) +#define WORD4_DATA_FLOW_MODE CC_HWQ_GENMASK(4, DATA_FLOW_MODE) +#define WORD4_KEY_SIZE CC_HWQ_GENMASK(4, KEY_SIZE) +#define WORD4_SETUP_OPERATION CC_HWQ_GENMASK(4, SETUP_OPERATION) +#define WORD5_DIN_ADDR_HIGH CC_HWQ_GENMASK(5, DIN_ADDR_HIGH) +#define WORD5_DOUT_ADDR_HIGH CC_HWQ_GENMASK(5, DOUT_ADDR_HIGH) /****************************************************************************** * TYPE DEFINITIONS @@ -207,31 +204,32 @@ enum cc_hash_cipher_pad { /* Descriptor packing macros */ /*****************************/ -/* - * Init a HW descriptor struct - * @pdesc: pointer HW descriptor struct +/** + * hw_desc_init() - Init a HW descriptor struct + * @pdesc: pointer to HW descriptor struct */ static inline void hw_desc_init(struct cc_hw_desc *pdesc) { memset(pdesc, 0, sizeof(struct cc_hw_desc)); } -/* - * Indicates the end of current HW descriptors flow and release the HW engines. +/** + * set_queue_last_ind_bit() - Indicate the end of current HW descriptors flow + * and release the HW engines. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_queue_last_ind_bit(struct cc_hw_desc *pdesc) { pdesc->word[3] |= FIELD_PREP(WORD3_QUEUE_LAST_IND, 1); } -/* - * Set the DIN field of a HW descriptors +/** + * set_din_type() - Set the DIN field of a HW descriptor * - * @pdesc: pointer HW descriptor struct - * @dma_mode: dmaMode The DMA mode: NO_DMA, SRAM, DLLI, MLLI, CONSTANT - * @addr: dinAdr DIN address + * @pdesc: Pointer to HW descriptor struct + * @dma_mode: The DMA mode: NO_DMA, SRAM, DLLI, MLLI, CONSTANT + * @addr: DIN address * @size: Data size in bytes * @axi_sec: AXI secure bit */ @@ -239,20 +237,20 @@ static inline void set_din_type(struct cc_hw_desc *pdesc, enum cc_dma_mode dma_mode, dma_addr_t addr, u32 size, enum cc_axi_sec axi_sec) { - pdesc->word[0] = (u32)addr; + pdesc->word[0] = lower_32_bits(addr); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - pdesc->word[5] |= FIELD_PREP(WORD5_DIN_ADDR_HIGH, ((u16)(addr >> 32))); + pdesc->word[5] |= FIELD_PREP(WORD5_DIN_ADDR_HIGH, upper_32_bits(addr)); #endif pdesc->word[1] |= FIELD_PREP(WORD1_DIN_DMA_MODE, dma_mode) | FIELD_PREP(WORD1_DIN_SIZE, size) | FIELD_PREP(WORD1_NS_BIT, axi_sec); } -/* - * Set the DIN field of a HW descriptors to NO DMA mode. +/** + * set_din_no_dma() - Set the DIN field of a HW descriptor to NO DMA mode. * Used for NOP descriptor, register patches and other special modes. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @addr: DIN address * @size: Data size in bytes */ @@ -262,14 +260,11 @@ static inline void set_din_no_dma(struct cc_hw_desc *pdesc, u32 addr, u32 size) pdesc->word[1] |= FIELD_PREP(WORD1_DIN_SIZE, size); } -/* - * Setup the special CPP descriptor +/** + * set_cpp_crypto_key() - Setup the special CPP descriptor * - * @pdesc: pointer HW descriptor struct - * @alg: cipher used (AES / SM4) - * @mode: mode used (CTR or CBC) - * @slot: slot number - * @ksize: key size + * @pdesc: Pointer to HW descriptor struct + * @slot: Slot number */ static inline void set_cpp_crypto_key(struct cc_hw_desc *pdesc, u8 slot) { @@ -281,27 +276,26 @@ static inline void set_cpp_crypto_key(struct cc_hw_desc *pdesc, u8 slot) pdesc->word[4] |= FIELD_PREP(WORD4_SETUP_OPERATION, slot); } -/* - * Set the DIN field of a HW descriptors to SRAM mode. +/** + * set_din_sram() - Set the DIN field of a HW descriptor to SRAM mode. * Note: No need to check SRAM alignment since host requests do not use SRAM and - * adaptor will enforce alignment check. + * the adaptor will enforce alignment checks. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @addr: DIN address - * @size Data size in bytes + * @size: Data size in bytes */ -static inline void set_din_sram(struct cc_hw_desc *pdesc, dma_addr_t addr, - u32 size) +static inline void set_din_sram(struct cc_hw_desc *pdesc, u32 addr, u32 size) { - pdesc->word[0] = (u32)addr; + pdesc->word[0] = addr; pdesc->word[1] |= FIELD_PREP(WORD1_DIN_SIZE, size) | FIELD_PREP(WORD1_DIN_DMA_MODE, DMA_SRAM); } -/* - * Set the DIN field of a HW descriptors to CONST mode +/** + * set_din_const() - Set the DIN field of a HW descriptor to CONST mode * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @val: DIN const value * @size: Data size in bytes */ @@ -313,20 +307,20 @@ static inline void set_din_const(struct cc_hw_desc *pdesc, u32 val, u32 size) FIELD_PREP(WORD1_DIN_SIZE, size); } -/* - * Set the DIN not last input data indicator +/** + * set_din_not_last_indication() - Set the DIN not last input data indicator * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_din_not_last_indication(struct cc_hw_desc *pdesc) { pdesc->word[1] |= FIELD_PREP(WORD1_NOT_LAST, 1); } -/* - * Set the DOUT field of a HW descriptors +/** + * set_dout_type() - Set the DOUT field of a HW descriptor * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @dma_mode: The DMA mode: NO_DMA, SRAM, DLLI, MLLI, CONSTANT * @addr: DOUT address * @size: Data size in bytes @@ -336,24 +330,24 @@ static inline void set_dout_type(struct cc_hw_desc *pdesc, enum cc_dma_mode dma_mode, dma_addr_t addr, u32 size, enum cc_axi_sec axi_sec) { - pdesc->word[2] = (u32)addr; + pdesc->word[2] = lower_32_bits(addr); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - pdesc->word[5] |= FIELD_PREP(WORD5_DOUT_ADDR_HIGH, ((u16)(addr >> 32))); + pdesc->word[5] |= FIELD_PREP(WORD5_DOUT_ADDR_HIGH, upper_32_bits(addr)); #endif pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_DMA_MODE, dma_mode) | FIELD_PREP(WORD3_DOUT_SIZE, size) | FIELD_PREP(WORD3_NS_BIT, axi_sec); } -/* - * Set the DOUT field of a HW descriptors to DLLI type +/** + * set_dout_dlli() - Set the DOUT field of a HW descriptor to DLLI type * The LAST INDICATION is provided by the user * - * @pdesc pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @addr: DOUT address * @size: Data size in bytes - * @last_ind: The last indication bit * @axi_sec: AXI secure bit + * @last_ind: The last indication bit */ static inline void set_dout_dlli(struct cc_hw_desc *pdesc, dma_addr_t addr, u32 size, enum cc_axi_sec axi_sec, @@ -363,29 +357,28 @@ static inline void set_dout_dlli(struct cc_hw_desc *pdesc, dma_addr_t addr, pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_LAST_IND, last_ind); } -/* - * Set the DOUT field of a HW descriptors to DLLI type +/** + * set_dout_mlli() - Set the DOUT field of a HW descriptor to MLLI type * The LAST INDICATION is provided by the user * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @addr: DOUT address * @size: Data size in bytes - * @last_ind: The last indication bit * @axi_sec: AXI secure bit + * @last_ind: The last indication bit */ -static inline void set_dout_mlli(struct cc_hw_desc *pdesc, dma_addr_t addr, - u32 size, enum cc_axi_sec axi_sec, - bool last_ind) +static inline void set_dout_mlli(struct cc_hw_desc *pdesc, u32 addr, u32 size, + enum cc_axi_sec axi_sec, bool last_ind) { set_dout_type(pdesc, DMA_MLLI, addr, size, axi_sec); pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_LAST_IND, last_ind); } -/* - * Set the DOUT field of a HW descriptors to NO DMA mode. +/** + * set_dout_no_dma() - Set the DOUT field of a HW descriptor to NO DMA mode. * Used for NOP descriptor, register patches and other special modes. * - * @pdesc: pointer HW descriptor struct + * @pdesc: pointer to HW descriptor struct * @addr: DOUT address * @size: Data size in bytes * @write_enable: Enables a write operation to a register @@ -398,54 +391,55 @@ static inline void set_dout_no_dma(struct cc_hw_desc *pdesc, u32 addr, FIELD_PREP(WORD3_DOUT_LAST_IND, write_enable); } -/* - * Set the word for the XOR operation. +/** + * set_xor_val() - Set the word for the XOR operation. * - * @pdesc: pointer HW descriptor struct - * @val: xor data value + * @pdesc: Pointer to HW descriptor struct + * @val: XOR data value */ static inline void set_xor_val(struct cc_hw_desc *pdesc, u32 val) { pdesc->word[2] = val; } -/* - * Sets the XOR indicator bit in the descriptor +/** + * set_xor_active() - Set the XOR indicator bit in the descriptor * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_xor_active(struct cc_hw_desc *pdesc) { pdesc->word[3] |= FIELD_PREP(WORD3_HASH_XOR_BIT, 1); } -/* - * Select the AES engine instead of HASH engine when setting up combined mode - * with AES XCBC MAC +/** + * set_aes_not_hash_mode() - Select the AES engine instead of HASH engine when + * setting up combined mode with AES XCBC MAC * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_aes_not_hash_mode(struct cc_hw_desc *pdesc) { pdesc->word[4] |= FIELD_PREP(WORD4_AES_SEL_N_HASH, 1); } -/* - * Set aes xor crypto key, this in some secenrios select SM3 engine +/** + * set_aes_xor_crypto_key() - Set aes xor crypto key, which in some scenarios + * selects the SM3 engine * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_aes_xor_crypto_key(struct cc_hw_desc *pdesc) { pdesc->word[4] |= FIELD_PREP(WORD4_AES_XOR_CRYPTO_KEY, 1); } -/* - * Set the DOUT field of a HW descriptors to SRAM mode +/** + * set_dout_sram() - Set the DOUT field of a HW descriptor to SRAM mode * Note: No need to check SRAM alignment since host requests do not use SRAM and - * adaptor will enforce alignment check. + * the adaptor will enforce alignment checks. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @addr: DOUT address * @size: Data size in bytes */ @@ -456,32 +450,34 @@ static inline void set_dout_sram(struct cc_hw_desc *pdesc, u32 addr, u32 size) FIELD_PREP(WORD3_DOUT_SIZE, size); } -/* - * Sets the data unit size for XEX mode in data_out_addr[15:0] +/** + * set_xex_data_unit_size() - Set the data unit size for XEX mode in + * data_out_addr[15:0] * - * @pdesc: pDesc pointer HW descriptor struct - * @size: data unit size for XEX mode + * @pdesc: Pointer to HW descriptor struct + * @size: Data unit size for XEX mode */ static inline void set_xex_data_unit_size(struct cc_hw_desc *pdesc, u32 size) { pdesc->word[2] = size; } -/* - * Set the number of rounds for Multi2 in data_out_addr[15:0] +/** + * set_multi2_num_rounds() - Set the number of rounds for Multi2 in + * data_out_addr[15:0] * - * @pdesc: pointer HW descriptor struct - * @num: number of rounds for Multi2 + * @pdesc: Pointer to HW descriptor struct + * @num: Number of rounds for Multi2 */ static inline void set_multi2_num_rounds(struct cc_hw_desc *pdesc, u32 num) { pdesc->word[2] = num; } -/* - * Set the flow mode. +/** + * set_flow_mode() - Set the flow mode. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @mode: Any one of the modes defined in [CC7x-DESC] */ static inline void set_flow_mode(struct cc_hw_desc *pdesc, @@ -490,22 +486,22 @@ static inline void set_flow_mode(struct cc_hw_desc *pdesc, pdesc->word[4] |= FIELD_PREP(WORD4_DATA_FLOW_MODE, mode); } -/* - * Set the cipher mode. +/** + * set_cipher_mode() - Set the cipher mode. * - * @pdesc: pointer HW descriptor struct - * @mode: Any one of the modes defined in [CC7x-DESC] + * @pdesc: Pointer to HW descriptor struct + * @mode: Any one of the modes defined in [CC7x-DESC] */ static inline void set_cipher_mode(struct cc_hw_desc *pdesc, int mode) { pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_MODE, mode); } -/* - * Set the cipher mode for hash algorithms. +/** + * set_hash_cipher_mode() - Set the cipher mode for hash algorithms. * - * @pdesc: pointer HW descriptor struct - * @cipher_mode: Any one of the modes defined in [CC7x-DESC] + * @pdesc: Pointer to HW descriptor struct + * @cipher_mode: Any one of the modes defined in [CC7x-DESC] * @hash_mode: specifies which hash is being handled */ static inline void set_hash_cipher_mode(struct cc_hw_desc *pdesc, @@ -517,10 +513,10 @@ static inline void set_hash_cipher_mode(struct cc_hw_desc *pdesc, set_aes_xor_crypto_key(pdesc); } -/* - * Set the cipher configuration fields. +/** + * set_cipher_config0() - Set the cipher configuration fields. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @mode: Any one of the modes defined in [CC7x-DESC] */ static inline void set_cipher_config0(struct cc_hw_desc *pdesc, int mode) @@ -528,11 +524,11 @@ static inline void set_cipher_config0(struct cc_hw_desc *pdesc, int mode) pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_CONF0, mode); } -/* - * Set the cipher configuration fields. +/** + * set_cipher_config1() - Set the cipher configuration fields. * - * @pdesc: pointer HW descriptor struct - * @config: Any one of the modes defined in [CC7x-DESC] + * @pdesc: Pointer to HW descriptor struct + * @config: Padding mode */ static inline void set_cipher_config1(struct cc_hw_desc *pdesc, enum cc_hash_conf_pad config) @@ -540,10 +536,10 @@ static inline void set_cipher_config1(struct cc_hw_desc *pdesc, pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_CONF1, config); } -/* - * Set HW key configuration fields. +/** + * set_hw_crypto_key() - Set HW key configuration fields. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @hw_key: The HW key slot asdefined in enum cc_hw_crypto_key */ static inline void set_hw_crypto_key(struct cc_hw_desc *pdesc, @@ -555,64 +551,64 @@ static inline void set_hw_crypto_key(struct cc_hw_desc *pdesc, (hw_key >> HW_KEY_SHIFT_CIPHER_CFG2)); } -/* - * Set byte order of all setup-finalize descriptors. +/** + * set_bytes_swap() - Set byte order of all setup-finalize descriptors. * - * @pdesc: pointer HW descriptor struct - * @config: Any one of the modes defined in [CC7x-DESC] + * @pdesc: Pointer to HW descriptor struct + * @config: True to enable byte swapping */ static inline void set_bytes_swap(struct cc_hw_desc *pdesc, bool config) { pdesc->word[4] |= FIELD_PREP(WORD4_BYTES_SWAP, config); } -/* - * Set CMAC_SIZE0 mode. +/** + * set_cmac_size0_mode() - Set CMAC_SIZE0 mode. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_cmac_size0_mode(struct cc_hw_desc *pdesc) { pdesc->word[4] |= FIELD_PREP(WORD4_CMAC_SIZE0, 1); } -/* - * Set key size descriptor field. +/** + * set_key_size() - Set key size descriptor field. * - * @pdesc: pointer HW descriptor struct - * @size: key size in bytes (NOT size code) + * @pdesc: Pointer to HW descriptor struct + * @size: Key size in bytes (NOT size code) */ static inline void set_key_size(struct cc_hw_desc *pdesc, u32 size) { pdesc->word[4] |= FIELD_PREP(WORD4_KEY_SIZE, size); } -/* - * Set AES key size. +/** + * set_key_size_aes() - Set AES key size. * - * @pdesc: pointer HW descriptor struct - * @size: key size in bytes (NOT size code) + * @pdesc: Pointer to HW descriptor struct + * @size: Key size in bytes (NOT size code) */ static inline void set_key_size_aes(struct cc_hw_desc *pdesc, u32 size) { set_key_size(pdesc, ((size >> 3) - 2)); } -/* - * Set DES key size. +/** + * set_key_size_des() - Set DES key size. * - * @pdesc: pointer HW descriptor struct - * @size: key size in bytes (NOT size code) + * @pdesc: Pointer to HW descriptor struct + * @size: Key size in bytes (NOT size code) */ static inline void set_key_size_des(struct cc_hw_desc *pdesc, u32 size) { set_key_size(pdesc, ((size >> 3) - 1)); } -/* - * Set the descriptor setup mode +/** + * set_setup_mode() - Set the descriptor setup mode * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @mode: Any one of the setup modes defined in [CC7x-DESC] */ static inline void set_setup_mode(struct cc_hw_desc *pdesc, @@ -621,10 +617,10 @@ static inline void set_setup_mode(struct cc_hw_desc *pdesc, pdesc->word[4] |= FIELD_PREP(WORD4_SETUP_OPERATION, mode); } -/* - * Set the descriptor cipher DO +/** + * set_cipher_do() - Set the descriptor cipher DO * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @config: Any one of the cipher do defined in [CC7x-DESC] */ static inline void set_cipher_do(struct cc_hw_desc *pdesc, diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c index 24c368b866f6..d39e1664fc7e 100644 --- a/drivers/crypto/ccree/cc_pm.c +++ b/drivers/crypto/ccree/cc_pm.c @@ -15,29 +15,25 @@ #define POWER_DOWN_ENABLE 0x01 #define POWER_DOWN_DISABLE 0x00 -const struct dev_pm_ops ccree_pm = { - SET_RUNTIME_PM_OPS(cc_pm_suspend, cc_pm_resume, NULL) -}; - -int cc_pm_suspend(struct device *dev) +static int cc_pm_suspend(struct device *dev) { struct cc_drvdata *drvdata = dev_get_drvdata(dev); dev_dbg(dev, "set HOST_POWER_DOWN_EN\n"); fini_cc_regs(drvdata); cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_ENABLE); - cc_clk_off(drvdata); + clk_disable_unprepare(drvdata->clk); return 0; } -int cc_pm_resume(struct device *dev) +static int cc_pm_resume(struct device *dev) { int rc; struct cc_drvdata *drvdata = dev_get_drvdata(dev); dev_dbg(dev, "unset HOST_POWER_DOWN_EN\n"); /* Enables the device source clk */ - rc = cc_clk_on(drvdata); + rc = clk_prepare_enable(drvdata->clk); if (rc) { dev_err(dev, "failed getting clock back on. We're toast.\n"); return rc; @@ -62,53 +58,19 @@ int cc_pm_resume(struct device *dev) return 0; } +const struct dev_pm_ops ccree_pm = { + SET_RUNTIME_PM_OPS(cc_pm_suspend, cc_pm_resume, NULL) +}; + int cc_pm_get(struct device *dev) { - int rc = 0; - struct cc_drvdata *drvdata = dev_get_drvdata(dev); - - if (drvdata->pm_on) - rc = pm_runtime_get_sync(dev); + int rc = pm_runtime_get_sync(dev); return (rc == 1 ? 0 : rc); } void cc_pm_put_suspend(struct device *dev) { - struct cc_drvdata *drvdata = dev_get_drvdata(dev); - - if (drvdata->pm_on) { - pm_runtime_mark_last_busy(dev); - pm_runtime_put_autosuspend(dev); - } -} - -bool cc_pm_is_dev_suspended(struct device *dev) -{ - /* check device state using runtime api */ - return pm_runtime_suspended(dev); -} - -int cc_pm_init(struct cc_drvdata *drvdata) -{ - struct device *dev = drvdata_to_dev(drvdata); - - /* must be before the enabling to avoid redundant suspending */ - pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT); - pm_runtime_use_autosuspend(dev); - /* set us as active - note we won't do PM ops until cc_pm_go()! */ - return pm_runtime_set_active(dev); -} - -/* enable the PM module*/ -void cc_pm_go(struct cc_drvdata *drvdata) -{ - pm_runtime_enable(drvdata_to_dev(drvdata)); - drvdata->pm_on = true; -} - -void cc_pm_fini(struct cc_drvdata *drvdata) -{ - pm_runtime_disable(drvdata_to_dev(drvdata)); - drvdata->pm_on = false; + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); } diff --git a/drivers/crypto/ccree/cc_pm.h b/drivers/crypto/ccree/cc_pm.h index 80a18e11cae4..50cac33de118 100644 --- a/drivers/crypto/ccree/cc_pm.h +++ b/drivers/crypto/ccree/cc_pm.h @@ -15,26 +15,11 @@ extern const struct dev_pm_ops ccree_pm; -int cc_pm_init(struct cc_drvdata *drvdata); -void cc_pm_go(struct cc_drvdata *drvdata); -void cc_pm_fini(struct cc_drvdata *drvdata); -int cc_pm_suspend(struct device *dev); -int cc_pm_resume(struct device *dev); int cc_pm_get(struct device *dev); void cc_pm_put_suspend(struct device *dev); -bool cc_pm_is_dev_suspended(struct device *dev); #else -static inline int cc_pm_init(struct cc_drvdata *drvdata) -{ - return 0; -} - -static inline void cc_pm_go(struct cc_drvdata *drvdata) {} - -static inline void cc_pm_fini(struct cc_drvdata *drvdata) {} - static inline int cc_pm_get(struct device *dev) { return 0; @@ -42,12 +27,6 @@ static inline int cc_pm_get(struct device *dev) static inline void cc_pm_put_suspend(struct device *dev) {} -static inline bool cc_pm_is_dev_suspended(struct device *dev) -{ - /* if PM not supported device is never suspend */ - return false; -} - #endif #endif /*__POWER_MGR_H__*/ diff --git a/drivers/crypto/ccree/cc_request_mgr.c b/drivers/crypto/ccree/cc_request_mgr.c index 9d61e6f12478..1d7649ecf44e 100644 --- a/drivers/crypto/ccree/cc_request_mgr.c +++ b/drivers/crypto/ccree/cc_request_mgr.c @@ -206,12 +206,13 @@ static void enqueue_seq(struct cc_drvdata *drvdata, struct cc_hw_desc seq[], } } -/*! - * Completion will take place if and only if user requested completion - * by cc_send_sync_request(). +/** + * request_mgr_complete() - Completion will take place if and only if user + * requested completion by cc_send_sync_request(). * - * \param dev - * \param dx_compl_h The completion event to signal + * @dev: Device pointer + * @dx_compl_h: The completion event to signal + * @dummy: unused error code */ static void request_mgr_complete(struct device *dev, void *dx_compl_h, int dummy) @@ -264,15 +265,15 @@ static int cc_queues_status(struct cc_drvdata *drvdata, return -ENOSPC; } -/*! - * Enqueue caller request to crypto hardware. +/** + * cc_do_send_request() - Enqueue caller request to crypto hardware. * Need to be called with HW lock held and PM running * - * \param drvdata - * \param cc_req The request to enqueue - * \param desc The crypto sequence - * \param len The crypto sequence length - * \param add_comp If "true": add an artificial dout DMA to mark completion + * @drvdata: Associated device driver context + * @cc_req: The request to enqueue + * @desc: The crypto sequence + * @len: The crypto sequence length + * @add_comp: If "true": add an artificial dout DMA to mark completion * */ static void cc_do_send_request(struct cc_drvdata *drvdata, @@ -295,7 +296,6 @@ static void cc_do_send_request(struct cc_drvdata *drvdata, req_mgr_h->req_queue[req_mgr_h->req_queue_head] = *cc_req; req_mgr_h->req_queue_head = (req_mgr_h->req_queue_head + 1) & (MAX_REQUEST_QUEUE_SIZE - 1); - /* TODO: Use circ_buf.h ? */ dev_dbg(dev, "Enqueue request head=%u\n", req_mgr_h->req_queue_head); @@ -377,7 +377,7 @@ static void cc_proc_backlog(struct cc_drvdata *drvdata) rc = cc_queues_status(drvdata, mgr, bli->len); if (rc) { /* - * There is still not room in the FIFO for + * There is still no room in the FIFO for * this request. Bail out. We'll return here * on the next completion irq. */ @@ -476,10 +476,6 @@ int cc_send_sync_request(struct cc_drvdata *drvdata, break; spin_unlock_bh(&mgr->hw_lock); - if (rc != -EAGAIN) { - cc_pm_put_suspend(dev); - return rc; - } wait_for_completion_interruptible(&drvdata->hw_queue_avail); reinit_completion(&drvdata->hw_queue_avail); } @@ -490,16 +486,18 @@ int cc_send_sync_request(struct cc_drvdata *drvdata, return 0; } -/*! - * Enqueue caller request to crypto hardware during init process. - * assume this function is not called in middle of a flow, +/** + * send_request_init() - Enqueue caller request to crypto hardware during init + * process. + * Assume this function is not called in the middle of a flow, * since we set QUEUE_LAST_IND flag in the last descriptor. * - * \param drvdata - * \param desc The crypto sequence - * \param len The crypto sequence length + * @drvdata: Associated device driver context + * @desc: The crypto sequence + * @len: The crypto sequence length * - * \return int Returns "0" upon success + * Return: + * Returns "0" upon success */ int send_request_init(struct cc_drvdata *drvdata, struct cc_hw_desc *desc, unsigned int len) diff --git a/drivers/crypto/ccree/cc_request_mgr.h b/drivers/crypto/ccree/cc_request_mgr.h index ff7746aaaf35..ae25ca843dce 100644 --- a/drivers/crypto/ccree/cc_request_mgr.h +++ b/drivers/crypto/ccree/cc_request_mgr.h @@ -12,18 +12,17 @@ int cc_req_mgr_init(struct cc_drvdata *drvdata); -/*! - * Enqueue caller request to crypto hardware. +/** + * cc_send_request() - Enqueue caller request to crypto hardware. * - * \param drvdata - * \param cc_req The request to enqueue - * \param desc The crypto sequence - * \param len The crypto sequence length - * \param is_dout If "true": completion is handled by the caller - * If "false": this function adds a dummy descriptor completion - * and waits upon completion signal. + * @drvdata: Associated device driver context + * @cc_req: The request to enqueue + * @desc: The crypto sequence + * @len: The crypto sequence length + * @req: Asynchronous crypto request * - * \return int Returns -EINPROGRESS or error + * Return: + * Returns -EINPROGRESS or error */ int cc_send_request(struct cc_drvdata *drvdata, struct cc_crypto_req *cc_req, struct cc_hw_desc *desc, unsigned int len, diff --git a/drivers/crypto/ccree/cc_sram_mgr.c b/drivers/crypto/ccree/cc_sram_mgr.c index 62c885e6e791..37a95856361f 100644 --- a/drivers/crypto/ccree/cc_sram_mgr.c +++ b/drivers/crypto/ccree/cc_sram_mgr.c @@ -5,88 +5,61 @@ #include "cc_sram_mgr.h" /** - * struct cc_sram_ctx -Internal RAM context manager - * @sram_free_offset: the offset to the non-allocated area - */ -struct cc_sram_ctx { - cc_sram_addr_t sram_free_offset; -}; - -/** - * cc_sram_mgr_fini() - Cleanup SRAM pool. - * - * @drvdata: Associated device driver context - */ -void cc_sram_mgr_fini(struct cc_drvdata *drvdata) -{ - /* Nothing needed */ -} - -/** * cc_sram_mgr_init() - Initializes SRAM pool. * The pool starts right at the beginning of SRAM. * Returns zero for success, negative value otherwise. * * @drvdata: Associated device driver context + * + * Return: + * 0 for success, negative error code for failure. */ int cc_sram_mgr_init(struct cc_drvdata *drvdata) { - struct cc_sram_ctx *ctx; - dma_addr_t start = 0; + u32 start = 0; struct device *dev = drvdata_to_dev(drvdata); if (drvdata->hw_rev < CC_HW_REV_712) { /* Pool starts after ROM bytes */ - start = (dma_addr_t)cc_ioread(drvdata, - CC_REG(HOST_SEP_SRAM_THRESHOLD)); - + start = cc_ioread(drvdata, CC_REG(HOST_SEP_SRAM_THRESHOLD)); if ((start & 0x3) != 0) { - dev_err(dev, "Invalid SRAM offset %pad\n", &start); + dev_err(dev, "Invalid SRAM offset 0x%x\n", start); return -EINVAL; } } - /* Allocate "this" context */ - ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); - - if (!ctx) - return -ENOMEM; - - ctx->sram_free_offset = start; - drvdata->sram_mgr_handle = ctx; - + drvdata->sram_free_offset = start; return 0; } -/*! - * Allocated buffer from SRAM pool. - * Note: Caller is responsible to free the LAST allocated buffer. - * This function does not taking care of any fragmentation may occur - * by the order of calls to alloc/free. +/** + * cc_sram_alloc() - Allocate buffer from SRAM pool. + * + * @drvdata: Associated device driver context + * @size: The requested numer of bytes to allocate * - * \param drvdata - * \param size The requested bytes to allocate + * Return: + * Address offset in SRAM or NULL_SRAM_ADDR for failure. */ -cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size) +u32 cc_sram_alloc(struct cc_drvdata *drvdata, u32 size) { - struct cc_sram_ctx *smgr_ctx = drvdata->sram_mgr_handle; struct device *dev = drvdata_to_dev(drvdata); - cc_sram_addr_t p; + u32 p; if ((size & 0x3)) { dev_err(dev, "Requested buffer size (%u) is not multiple of 4", size); return NULL_SRAM_ADDR; } - if (size > (CC_CC_SRAM_SIZE - smgr_ctx->sram_free_offset)) { - dev_err(dev, "Not enough space to allocate %u B (at offset %llu)\n", - size, smgr_ctx->sram_free_offset); + if (size > (CC_CC_SRAM_SIZE - drvdata->sram_free_offset)) { + dev_err(dev, "Not enough space to allocate %u B (at offset %u)\n", + size, drvdata->sram_free_offset); return NULL_SRAM_ADDR; } - p = smgr_ctx->sram_free_offset; - smgr_ctx->sram_free_offset += size; - dev_dbg(dev, "Allocated %u B @ %u\n", size, (unsigned int)p); + p = drvdata->sram_free_offset; + drvdata->sram_free_offset += size; + dev_dbg(dev, "Allocated %u B @ %u\n", size, p); return p; } @@ -97,13 +70,12 @@ cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size) * * @src: A pointer to array of words to set as consts. * @dst: The target SRAM buffer to set into - * @nelements: The number of words in "src" array + * @nelement: The number of words in "src" array * @seq: A pointer to the given IN/OUT descriptor sequence * @seq_len: A pointer to the given IN/OUT sequence length */ -void cc_set_sram_desc(const u32 *src, cc_sram_addr_t dst, - unsigned int nelement, struct cc_hw_desc *seq, - unsigned int *seq_len) +void cc_set_sram_desc(const u32 *src, u32 dst, unsigned int nelement, + struct cc_hw_desc *seq, unsigned int *seq_len) { u32 i; unsigned int idx = *seq_len; diff --git a/drivers/crypto/ccree/cc_sram_mgr.h b/drivers/crypto/ccree/cc_sram_mgr.h index 1d14de9ee8c3..1c965ef83002 100644 --- a/drivers/crypto/ccree/cc_sram_mgr.h +++ b/drivers/crypto/ccree/cc_sram_mgr.h @@ -10,42 +10,30 @@ struct cc_drvdata; -/** - * Address (offset) within CC internal SRAM - */ - -typedef u64 cc_sram_addr_t; - -#define NULL_SRAM_ADDR ((cc_sram_addr_t)-1) +#define NULL_SRAM_ADDR ((u32)-1) -/*! - * Initializes SRAM pool. +/** + * cc_sram_mgr_init() - Initializes SRAM pool. * The first X bytes of SRAM are reserved for ROM usage, hence, pool * starts right after X bytes. * - * \param drvdata + * @drvdata: Associated device driver context * - * \return int Zero for success, negative value otherwise. + * Return: + * Zero for success, negative value otherwise. */ int cc_sram_mgr_init(struct cc_drvdata *drvdata); -/*! - * Uninits SRAM pool. +/** + * cc_sram_alloc() - Allocate buffer from SRAM pool. * - * \param drvdata - */ -void cc_sram_mgr_fini(struct cc_drvdata *drvdata); - -/*! - * Allocated buffer from SRAM pool. - * Note: Caller is responsible to free the LAST allocated buffer. - * This function does not taking care of any fragmentation may occur - * by the order of calls to alloc/free. + * @drvdata: Associated device driver context + * @size: The requested bytes to allocate * - * \param drvdata - * \param size The requested bytes to allocate + * Return: + * Address offset in SRAM or NULL_SRAM_ADDR for failure. */ -cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size); +u32 cc_sram_alloc(struct cc_drvdata *drvdata, u32 size); /** * cc_set_sram_desc() - Create const descriptors sequence to @@ -54,12 +42,11 @@ cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size); * * @src: A pointer to array of words to set as consts. * @dst: The target SRAM buffer to set into - * @nelements: The number of words in "src" array + * @nelement: The number of words in "src" array * @seq: A pointer to the given IN/OUT descriptor sequence * @seq_len: A pointer to the given IN/OUT sequence length */ -void cc_set_sram_desc(const u32 *src, cc_sram_addr_t dst, - unsigned int nelement, struct cc_hw_desc *seq, - unsigned int *seq_len); +void cc_set_sram_desc(const u32 *src, u32 dst, unsigned int nelement, + struct cc_hw_desc *seq, unsigned int *seq_len); #endif /*__CC_SRAM_MGR_H__*/ diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index b4b9b22125d1..c29b80dd30d8 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -715,6 +715,52 @@ static int chcr_cipher_fallback(struct crypto_sync_skcipher *cipher, return err; } + +static inline int get_qidxs(struct crypto_async_request *req, + unsigned int *txqidx, unsigned int *rxqidx) +{ + struct crypto_tfm *tfm = req->tfm; + int ret = 0; + + switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { + case CRYPTO_ALG_TYPE_AEAD: + { + struct aead_request *aead_req = + container_of(req, struct aead_request, base); + struct chcr_aead_reqctx *reqctx = aead_request_ctx(aead_req); + *txqidx = reqctx->txqidx; + *rxqidx = reqctx->rxqidx; + break; + } + case CRYPTO_ALG_TYPE_SKCIPHER: + { + struct skcipher_request *sk_req = + container_of(req, struct skcipher_request, base); + struct chcr_skcipher_req_ctx *reqctx = + skcipher_request_ctx(sk_req); + *txqidx = reqctx->txqidx; + *rxqidx = reqctx->rxqidx; + break; + } + case CRYPTO_ALG_TYPE_AHASH: + { + struct ahash_request *ahash_req = + container_of(req, struct ahash_request, base); + struct chcr_ahash_req_ctx *reqctx = + ahash_request_ctx(ahash_req); + *txqidx = reqctx->txqidx; + *rxqidx = reqctx->rxqidx; + break; + } + default: + ret = -EINVAL; + /* should never get here */ + BUG(); + break; + } + return ret; +} + static inline void create_wreq(struct chcr_context *ctx, struct chcr_wr *chcr_req, struct crypto_async_request *req, @@ -725,7 +771,15 @@ static inline void create_wreq(struct chcr_context *ctx, unsigned int lcb) { struct uld_ctx *u_ctx = ULD_CTX(ctx); - int qid = u_ctx->lldi.rxq_ids[ctx->rx_qidx]; + unsigned int tx_channel_id, rx_channel_id; + unsigned int txqidx = 0, rxqidx = 0; + unsigned int qid, fid; + + get_qidxs(req, &txqidx, &rxqidx); + qid = u_ctx->lldi.rxq_ids[rxqidx]; + fid = u_ctx->lldi.rxq_ids[0]; + tx_channel_id = txqidx / ctx->txq_perchan; + rx_channel_id = rxqidx / ctx->rxq_perchan; chcr_req->wreq.op_to_cctx_size = FILL_WR_OP_CCTX_SIZE; @@ -734,15 +788,12 @@ static inline void create_wreq(struct chcr_context *ctx, chcr_req->wreq.len16_pkd = htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP(len16, 16))); chcr_req->wreq.cookie = cpu_to_be64((uintptr_t)req); - chcr_req->wreq.rx_chid_to_rx_q_id = - FILL_WR_RX_Q_ID(ctx->tx_chan_id, qid, - !!lcb, ctx->tx_qidx); + chcr_req->wreq.rx_chid_to_rx_q_id = FILL_WR_RX_Q_ID(rx_channel_id, qid, + !!lcb, txqidx); - chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->tx_chan_id, - qid); + chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(tx_channel_id, fid); chcr_req->ulptx.len = htonl((DIV_ROUND_UP(len16, 16) - - ((sizeof(chcr_req->wreq)) >> 4))); - + ((sizeof(chcr_req->wreq)) >> 4))); chcr_req->sc_imm.cmd_more = FILL_CMD_MORE(!imm); chcr_req->sc_imm.len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) + sizeof(chcr_req->key_ctx) + sc_len); @@ -758,7 +809,8 @@ static inline void create_wreq(struct chcr_context *ctx, static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(wrparam->req); - struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm)); + struct chcr_context *ctx = c_ctx(tfm); + struct ablk_ctx *ablkctx = ABLK_CTX(ctx); struct sk_buff *skb = NULL; struct chcr_wr *chcr_req; struct cpl_rx_phys_dsgl *phys_cpl; @@ -771,7 +823,8 @@ static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam) unsigned int kctx_len; gfp_t flags = wrparam->req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; - struct adapter *adap = padap(c_ctx(tfm)->dev); + struct adapter *adap = padap(ctx->dev); + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; nents = sg_nents_xlen(reqctx->dstsg, wrparam->bytes, CHCR_DST_SG_SIZE, reqctx->dst_ofst); @@ -791,7 +844,7 @@ static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam) } chcr_req = __skb_put_zero(skb, transhdr_len); chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(c_ctx(tfm)->tx_chan_id, 2, 1); + FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 1); chcr_req->sec_cpl.pldlen = htonl(IV + wrparam->bytes); chcr_req->sec_cpl.aadstart_cipherstop_hi = @@ -1086,8 +1139,12 @@ static int chcr_final_cipher_iv(struct skcipher_request *req, if (subtype == CRYPTO_ALG_SUB_TYPE_CTR) ctr_add_iv(iv, req->iv, DIV_ROUND_UP(reqctx->processed, AES_BLOCK_SIZE)); - else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS) - ret = chcr_update_tweak(req, iv, 1); + else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS) { + if (!reqctx->partial_req) + memcpy(iv, reqctx->iv, AES_BLOCK_SIZE); + else + ret = chcr_update_tweak(req, iv, 1); + } else if (subtype == CRYPTO_ALG_SUB_TYPE_CBC) { /*Already updated for Decrypt*/ if (!reqctx->op) @@ -1102,12 +1159,13 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req, unsigned char *input, int err) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chcr_context *ctx = c_ctx(tfm); struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm)); struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm)); struct sk_buff *skb; struct cpl_fw6_pld *fw6_pld = (struct cpl_fw6_pld *)input; struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req); - struct cipher_wr_param wrparam; + struct cipher_wr_param wrparam; struct chcr_dev *dev = c_ctx(tfm)->dev; int bytes; @@ -1152,7 +1210,7 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req, if (get_cryptoalg_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_CTR) bytes = adjust_ctr_overflow(reqctx->iv, bytes); - wrparam.qid = u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx]; + wrparam.qid = u_ctx->lldi.rxq_ids[reqctx->rxqidx]; wrparam.req = req; wrparam.bytes = bytes; skb = create_cipher_wr(&wrparam); @@ -1162,14 +1220,24 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req, goto unmap; } skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx); chcr_send_wr(skb); reqctx->last_req_len = bytes; reqctx->processed += bytes; + if (get_cryptoalg_subtype(tfm) == + CRYPTO_ALG_SUB_TYPE_CBC && req->base.flags == + CRYPTO_TFM_REQ_MAY_SLEEP ) { + complete(&ctx->cbc_aes_aio_done); + } return 0; unmap: chcr_cipher_dma_unmap(&ULD_CTX(c_ctx(tfm))->lldi.pdev->dev, req); complete: + if (get_cryptoalg_subtype(tfm) == + CRYPTO_ALG_SUB_TYPE_CBC && req->base.flags == + CRYPTO_TFM_REQ_MAY_SLEEP ) { + complete(&ctx->cbc_aes_aio_done); + } chcr_dec_wrcount(dev); req->base.complete(&req->base, err); return err; @@ -1188,6 +1256,7 @@ static int process_cipher(struct skcipher_request *req, int bytes, err = -EINVAL; reqctx->processed = 0; + reqctx->partial_req = 0; if (!req->iv) goto error; if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) || @@ -1278,6 +1347,7 @@ static int process_cipher(struct skcipher_request *req, } reqctx->processed = bytes; reqctx->last_req_len = bytes; + reqctx->partial_req = !!(req->cryptlen - reqctx->processed); return 0; unmap: @@ -1289,31 +1359,43 @@ error: static int chcr_aes_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req); struct chcr_dev *dev = c_ctx(tfm)->dev; struct sk_buff *skb = NULL; - int err, isfull = 0; + int err; struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm)); + struct chcr_context *ctx = c_ctx(tfm); + unsigned int cpu; + + cpu = get_cpu(); + reqctx->txqidx = cpu % ctx->ntxq; + reqctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); err = chcr_inc_wrcount(dev); if (err) return -ENXIO; if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - c_ctx(tfm)->tx_qidx))) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + reqctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) { err = -ENOSPC; goto error; - } } - err = process_cipher(req, u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx], + err = process_cipher(req, u_ctx->lldi.rxq_ids[reqctx->rxqidx], &skb, CHCR_ENCRYPT_OP); if (err || !skb) return err; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx); chcr_send_wr(skb); - return isfull ? -EBUSY : -EINPROGRESS; + if (get_cryptoalg_subtype(tfm) == + CRYPTO_ALG_SUB_TYPE_CBC && req->base.flags == + CRYPTO_TFM_REQ_MAY_SLEEP ) { + reqctx->partial_req = 1; + wait_for_completion(&ctx->cbc_aes_aio_done); + } + return -EINPROGRESS; error: chcr_dec_wrcount(dev); return err; @@ -1322,44 +1404,45 @@ error: static int chcr_aes_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req); struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm)); struct chcr_dev *dev = c_ctx(tfm)->dev; struct sk_buff *skb = NULL; - int err, isfull = 0; + int err; + struct chcr_context *ctx = c_ctx(tfm); + unsigned int cpu; + + cpu = get_cpu(); + reqctx->txqidx = cpu % ctx->ntxq; + reqctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); err = chcr_inc_wrcount(dev); if (err) return -ENXIO; if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - c_ctx(tfm)->tx_qidx))) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + reqctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) return -ENOSPC; - } - - err = process_cipher(req, u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx], + err = process_cipher(req, u_ctx->lldi.rxq_ids[reqctx->rxqidx], &skb, CHCR_DECRYPT_OP); if (err || !skb) return err; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx); chcr_send_wr(skb); - return isfull ? -EBUSY : -EINPROGRESS; + return -EINPROGRESS; } - static int chcr_device_init(struct chcr_context *ctx) { struct uld_ctx *u_ctx = NULL; - unsigned int id; - int txq_perchan, txq_idx, ntxq; - int err = 0, rxq_perchan, rxq_idx; + int txq_perchan, ntxq; + int err = 0, rxq_perchan; - id = smp_processor_id(); if (!ctx->dev) { u_ctx = assign_chcr_device(); if (!u_ctx) { - err = -ENXIO; pr_err("chcr device assignment fails\n"); goto out; } @@ -1367,23 +1450,10 @@ static int chcr_device_init(struct chcr_context *ctx) ntxq = u_ctx->lldi.ntxq; rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan; txq_perchan = ntxq / u_ctx->lldi.nchan; - spin_lock(&ctx->dev->lock_chcr_dev); - ctx->tx_chan_id = ctx->dev->tx_channel_id; - ctx->dev->tx_channel_id = - (ctx->dev->tx_channel_id + 1) % u_ctx->lldi.nchan; - spin_unlock(&ctx->dev->lock_chcr_dev); - rxq_idx = ctx->tx_chan_id * rxq_perchan; - rxq_idx += id % rxq_perchan; - txq_idx = ctx->tx_chan_id * txq_perchan; - txq_idx += id % txq_perchan; - ctx->rx_qidx = rxq_idx; - ctx->tx_qidx = txq_idx; - /* Channel Id used by SGE to forward packet to Host. - * Same value should be used in cpl_fw6_pld RSS_CH field - * by FW. Driver programs PCI channel ID to be used in fw - * at the time of queue allocation with value "pi->tx_chan" - */ - ctx->pci_chan_id = txq_idx / txq_perchan; + ctx->ntxq = ntxq; + ctx->nrxq = u_ctx->lldi.nrxq; + ctx->rxq_perchan = rxq_perchan; + ctx->txq_perchan = txq_perchan; } out: return err; @@ -1401,7 +1471,7 @@ static int chcr_init_tfm(struct crypto_skcipher *tfm) pr_err("failed to allocate fallback for %s\n", alg->base.cra_name); return PTR_ERR(ablkctx->sw_cipher); } - + init_completion(&ctx->cbc_aes_aio_done); crypto_skcipher_set_reqsize(tfm, sizeof(struct chcr_skcipher_req_ctx)); return chcr_device_init(ctx); @@ -1485,9 +1555,10 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req, { struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct hmac_ctx *hmacctx = HMAC_CTX(h_ctx(tfm)); + struct chcr_context *ctx = h_ctx(tfm); + struct hmac_ctx *hmacctx = HMAC_CTX(ctx); struct sk_buff *skb = NULL; - struct uld_ctx *u_ctx = ULD_CTX(h_ctx(tfm)); + struct uld_ctx *u_ctx = ULD_CTX(ctx); struct chcr_wr *chcr_req; struct ulptx_sgl *ulptx; unsigned int nents = 0, transhdr_len; @@ -1496,6 +1567,7 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req, GFP_ATOMIC; struct adapter *adap = padap(h_ctx(tfm)->dev); int error = 0; + unsigned int rx_channel_id = req_ctx->rxqidx / ctx->rxq_perchan; transhdr_len = HASH_TRANSHDR_SIZE(param->kctx_len); req_ctx->hctx_wr.imm = (transhdr_len + param->bfr_len + @@ -1513,7 +1585,8 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req, chcr_req = __skb_put_zero(skb, transhdr_len); chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(h_ctx(tfm)->tx_chan_id, 2, 0); + FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 0); + chcr_req->sec_cpl.pldlen = htonl(param->bfr_len + param->sg_len); chcr_req->sec_cpl.aadstart_cipherstop_hi = @@ -1576,16 +1649,22 @@ static int chcr_ahash_update(struct ahash_request *req) { struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); - struct uld_ctx *u_ctx = NULL; + struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm)); + struct chcr_context *ctx = h_ctx(rtfm); struct chcr_dev *dev = h_ctx(rtfm)->dev; struct sk_buff *skb; u8 remainder = 0, bs; unsigned int nbytes = req->nbytes; struct hash_wr_param params; - int error, isfull = 0; + int error; + unsigned int cpu; + + cpu = get_cpu(); + req_ctx->txqidx = cpu % ctx->ntxq; + req_ctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); - u_ctx = ULD_CTX(h_ctx(rtfm)); if (nbytes + req_ctx->reqlen >= bs) { remainder = (nbytes + req_ctx->reqlen) % bs; @@ -1603,12 +1682,10 @@ static int chcr_ahash_update(struct ahash_request *req) * inflight count for dev guarantees that lldi and padap is valid */ if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - h_ctx(rtfm)->tx_qidx))) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + req_ctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) { error = -ENOSPC; goto err; - } } chcr_init_hctx_per_wr(req_ctx); @@ -1650,10 +1727,9 @@ static int chcr_ahash_update(struct ahash_request *req) } req_ctx->reqlen = remainder; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx); chcr_send_wr(skb); - - return isfull ? -EBUSY : -EINPROGRESS; + return -EINPROGRESS; unmap: chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req); err: @@ -1678,16 +1754,22 @@ static int chcr_ahash_final(struct ahash_request *req) struct chcr_dev *dev = h_ctx(rtfm)->dev; struct hash_wr_param params; struct sk_buff *skb; - struct uld_ctx *u_ctx = NULL; + struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm)); + struct chcr_context *ctx = h_ctx(rtfm); u8 bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); int error = -EINVAL; + unsigned int cpu; + + cpu = get_cpu(); + req_ctx->txqidx = cpu % ctx->ntxq; + req_ctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); error = chcr_inc_wrcount(dev); if (error) return -ENXIO; chcr_init_hctx_per_wr(req_ctx); - u_ctx = ULD_CTX(h_ctx(rtfm)); if (is_hmac(crypto_ahash_tfm(rtfm))) params.opad_needed = 1; else @@ -1727,7 +1809,7 @@ static int chcr_ahash_final(struct ahash_request *req) } req_ctx->reqlen = 0; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx); chcr_send_wr(skb); return -EINPROGRESS; err: @@ -1740,25 +1822,29 @@ static int chcr_ahash_finup(struct ahash_request *req) struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); struct chcr_dev *dev = h_ctx(rtfm)->dev; - struct uld_ctx *u_ctx = NULL; + struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm)); + struct chcr_context *ctx = h_ctx(rtfm); struct sk_buff *skb; struct hash_wr_param params; u8 bs; - int error, isfull = 0; + int error; + unsigned int cpu; + + cpu = get_cpu(); + req_ctx->txqidx = cpu % ctx->ntxq; + req_ctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); - u_ctx = ULD_CTX(h_ctx(rtfm)); error = chcr_inc_wrcount(dev); if (error) return -ENXIO; if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - h_ctx(rtfm)->tx_qidx))) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + req_ctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) { error = -ENOSPC; goto err; - } } chcr_init_hctx_per_wr(req_ctx); error = chcr_hash_dma_map(&u_ctx->lldi.pdev->dev, req); @@ -1816,10 +1902,9 @@ static int chcr_ahash_finup(struct ahash_request *req) req_ctx->reqlen = 0; req_ctx->hctx_wr.processed += params.sg_len; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx); chcr_send_wr(skb); - - return isfull ? -EBUSY : -EINPROGRESS; + return -EINPROGRESS; unmap: chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req); err: @@ -1832,11 +1917,18 @@ static int chcr_ahash_digest(struct ahash_request *req) struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); struct chcr_dev *dev = h_ctx(rtfm)->dev; - struct uld_ctx *u_ctx = NULL; + struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm)); + struct chcr_context *ctx = h_ctx(rtfm); struct sk_buff *skb; struct hash_wr_param params; u8 bs; - int error, isfull = 0; + int error; + unsigned int cpu; + + cpu = get_cpu(); + req_ctx->txqidx = cpu % ctx->ntxq; + req_ctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); rtfm->init(req); bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); @@ -1844,14 +1936,11 @@ static int chcr_ahash_digest(struct ahash_request *req) if (error) return -ENXIO; - u_ctx = ULD_CTX(h_ctx(rtfm)); if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - h_ctx(rtfm)->tx_qidx))) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + req_ctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) { error = -ENOSPC; goto err; - } } chcr_init_hctx_per_wr(req_ctx); @@ -1907,9 +1996,9 @@ static int chcr_ahash_digest(struct ahash_request *req) } req_ctx->hctx_wr.processed += params.sg_len; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx); chcr_send_wr(skb); - return isfull ? -EBUSY : -EINPROGRESS; + return -EINPROGRESS; unmap: chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req); err: @@ -1922,14 +2011,20 @@ static int chcr_ahash_continue(struct ahash_request *req) struct chcr_ahash_req_ctx *reqctx = ahash_request_ctx(req); struct chcr_hctx_per_wr *hctx_wr = &reqctx->hctx_wr; struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); - struct uld_ctx *u_ctx = NULL; + struct chcr_context *ctx = h_ctx(rtfm); + struct uld_ctx *u_ctx = ULD_CTX(ctx); struct sk_buff *skb; struct hash_wr_param params; u8 bs; int error; + unsigned int cpu; + + cpu = get_cpu(); + reqctx->txqidx = cpu % ctx->ntxq; + reqctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); - u_ctx = ULD_CTX(h_ctx(rtfm)); get_alg_config(¶ms.alg_prm, crypto_ahash_digestsize(rtfm)); params.kctx_len = roundup(params.alg_prm.result_size, 16); if (is_hmac(crypto_ahash_tfm(rtfm))) { @@ -1969,7 +2064,7 @@ static int chcr_ahash_continue(struct ahash_request *req) } hctx_wr->processed += params.sg_len; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx); chcr_send_wr(skb); return 0; err: @@ -2315,7 +2410,8 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, int size) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); + struct chcr_context *ctx = a_ctx(tfm); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx); struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); struct sk_buff *skb = NULL; @@ -2331,7 +2427,8 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, int null = 0; gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; - struct adapter *adap = padap(a_ctx(tfm)->dev); + struct adapter *adap = padap(ctx->dev); + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; if (req->cryptlen == 0) return NULL; @@ -2351,7 +2448,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, snents = sg_nents_xlen(req->src, req->assoclen + req->cryptlen, CHCR_SRC_SG_SIZE, 0); dst_size = get_space_for_phys_dsgl(dnents); - kctx_len = (ntohl(KEY_CONTEXT_CTX_LEN_V(aeadctx->key_ctx_hdr)) << 4) + kctx_len = (KEY_CONTEXT_CTX_LEN_G(ntohl(aeadctx->key_ctx_hdr)) << 4) - sizeof(chcr_req->key_ctx); transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size); reqctx->imm = (transhdr_len + req->assoclen + req->cryptlen) < @@ -2383,7 +2480,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, * to the hardware spec */ chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(a_ctx(tfm)->tx_chan_id, 2, 1); + FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 1); chcr_req->sec_cpl.pldlen = htonl(req->assoclen + IV + req->cryptlen); chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI( null ? 0 : 1 + IV, @@ -2471,8 +2568,9 @@ int chcr_aead_dma_map(struct device *dev, else reqctx->b0_dma = 0; if (req->src == req->dst) { - error = dma_map_sg(dev, req->src, sg_nents(req->src), - DMA_BIDIRECTIONAL); + error = dma_map_sg(dev, req->src, + sg_nents_for_len(req->src, dst_size), + DMA_BIDIRECTIONAL); if (!error) goto err; } else { @@ -2558,13 +2656,14 @@ void chcr_add_aead_dst_ent(struct aead_request *req, unsigned int authsize = crypto_aead_authsize(tfm); struct chcr_context *ctx = a_ctx(tfm); u32 temp; + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; dsgl_walk_init(&dsgl_walk, phys_cpl); dsgl_walk_add_page(&dsgl_walk, IV + reqctx->b0_len, reqctx->iv_dma); temp = req->assoclen + req->cryptlen + (reqctx->op ? -authsize : authsize); dsgl_walk_add_sg(&dsgl_walk, req->dst, temp, 0); - dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id); + dsgl_walk_end(&dsgl_walk, qid, rx_channel_id); } void chcr_add_cipher_src_ent(struct skcipher_request *req, @@ -2599,14 +2698,14 @@ void chcr_add_cipher_dst_ent(struct skcipher_request *req, struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(wrparam->req); struct chcr_context *ctx = c_ctx(tfm); struct dsgl_walk dsgl_walk; + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; dsgl_walk_init(&dsgl_walk, phys_cpl); dsgl_walk_add_sg(&dsgl_walk, reqctx->dstsg, wrparam->bytes, reqctx->dst_ofst); reqctx->dstsg = dsgl_walk.last_sg; reqctx->dst_ofst = dsgl_walk.last_sg_len; - - dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id); + dsgl_walk_end(&dsgl_walk, qid, rx_channel_id); } void chcr_add_hash_src_ent(struct ahash_request *req, @@ -2804,10 +2903,12 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl, unsigned short op_type) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); + struct chcr_context *ctx = a_ctx(tfm); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); + struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); unsigned int cipher_mode = CHCR_SCMD_CIPHER_MODE_AES_CCM; unsigned int mac_mode = CHCR_SCMD_AUTH_MODE_CBCMAC; - unsigned int c_id = a_ctx(tfm)->tx_chan_id; + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; unsigned int ccm_xtra; unsigned char tag_offset = 0, auth_offset = 0; unsigned int assoclen; @@ -2828,9 +2929,7 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl, auth_offset = 0; } - - sec_cpl->op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(c_id, - 2, 1); + sec_cpl->op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 1); sec_cpl->pldlen = htonl(req->assoclen + IV + req->cryptlen + ccm_xtra); /* For CCM there wil be b0 always. So AAD start will be 1 always */ @@ -2973,7 +3072,8 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, int size) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); + struct chcr_context *ctx = a_ctx(tfm); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); struct sk_buff *skb = NULL; struct chcr_wr *chcr_req; @@ -2986,7 +3086,8 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, u8 *ivptr; gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; - struct adapter *adap = padap(a_ctx(tfm)->dev); + struct adapter *adap = padap(ctx->dev); + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) assoclen = req->assoclen - 8; @@ -3028,7 +3129,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, //Offset of tag from end temp = (reqctx->op == CHCR_ENCRYPT_OP) ? 0 : authsize; chcr_req->sec_cpl.op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR( - a_ctx(tfm)->tx_chan_id, 2, 1); + rx_channel_id, 2, 1); chcr_req->sec_cpl.pldlen = htonl(req->assoclen + IV + req->cryptlen); chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI( @@ -3576,9 +3677,9 @@ static int chcr_aead_op(struct aead_request *req, { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); - struct uld_ctx *u_ctx; + struct chcr_context *ctx = a_ctx(tfm); + struct uld_ctx *u_ctx = ULD_CTX(ctx); struct sk_buff *skb; - int isfull = 0; struct chcr_dev *cdev; cdev = a_ctx(tfm)->dev; @@ -3594,18 +3695,15 @@ static int chcr_aead_op(struct aead_request *req, return chcr_aead_fallback(req, reqctx->op); } - u_ctx = ULD_CTX(a_ctx(tfm)); if (cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - a_ctx(tfm)->tx_qidx)) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + reqctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))) { chcr_dec_wrcount(cdev); return -ENOSPC; - } } /* Form a WR from req */ - skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[a_ctx(tfm)->rx_qidx], size); + skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[reqctx->rxqidx], size); if (IS_ERR_OR_NULL(skb)) { chcr_dec_wrcount(cdev); @@ -3613,15 +3711,22 @@ static int chcr_aead_op(struct aead_request *req, } skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, a_ctx(tfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx); chcr_send_wr(skb); - return isfull ? -EBUSY : -EINPROGRESS; + return -EINPROGRESS; } static int chcr_aead_encrypt(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_context *ctx = a_ctx(tfm); + unsigned int cpu; + + cpu = get_cpu(); + reqctx->txqidx = cpu % ctx->ntxq; + reqctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); reqctx->verify = VERIFY_HW; reqctx->op = CHCR_ENCRYPT_OP; @@ -3643,9 +3748,16 @@ static int chcr_aead_encrypt(struct aead_request *req) static int chcr_aead_decrypt(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); + struct chcr_context *ctx = a_ctx(tfm); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); int size; + unsigned int cpu; + + cpu = get_cpu(); + reqctx->txqidx = cpu % ctx->ntxq; + reqctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); if (aeadctx->mayverify == VERIFY_SW) { size = crypto_aead_maxauthsize(tfm); diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c index dfb53e746e51..ffd4ec0c7374 100644 --- a/drivers/crypto/chelsio/chcr_core.c +++ b/drivers/crypto/chelsio/chcr_core.c @@ -195,6 +195,7 @@ static void *chcr_uld_add(const struct cxgb4_lld_info *lld) struct uld_ctx *u_ctx; /* Create the device and add it in the device list */ + pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION); if (!(lld->ulp_crypto & ULP_CRYPTO_LOOKASIDE)) return ERR_PTR(-EOPNOTSUPP); @@ -287,6 +288,8 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state) case CXGB4_STATE_DETACH: chcr_detach_device(u_ctx); + if (!atomic_read(&drv_data.dev_count)) + stop_crypto(); break; case CXGB4_STATE_START_RECOVERY: diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h index b5b371b8d343..2c09672e00a4 100644 --- a/drivers/crypto/chelsio/chcr_core.h +++ b/drivers/crypto/chelsio/chcr_core.h @@ -43,7 +43,8 @@ #include "cxgb4_uld.h" #define DRV_MODULE_NAME "chcr" -#define DRV_VERSION "1.0.0.0" +#define DRV_VERSION "1.0.0.0-ko" +#define DRV_DESC "Chelsio T6 Crypto Co-processor Driver" #define MAX_PENDING_REQ_TO_HW 20 #define CHCR_TEST_RESPONSE_TIMEOUT 1000 @@ -67,7 +68,7 @@ struct _key_ctx { __be32 ctx_hdr; u8 salt[MAX_SALT]; __be64 iv_to_auth; - unsigned char key[0]; + unsigned char key[]; }; #define KEYCTX_TX_WR_IV_S 55 @@ -147,7 +148,6 @@ struct chcr_dev { int wqretry; struct delayed_work detach_work; struct completion detach_comp; - unsigned char tx_channel_id; }; struct uld_ctx { diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h index 6db2df8c8a05..542bebae001f 100644 --- a/drivers/crypto/chelsio/chcr_crypto.h +++ b/drivers/crypto/chelsio/chcr_crypto.h @@ -187,6 +187,8 @@ struct chcr_aead_reqctx { unsigned int op; u16 imm; u16 verify; + u16 txqidx; + u16 rxqidx; u8 iv[CHCR_MAX_CRYPTO_IV_LEN + MAX_SCRATCH_PAD_SIZE]; u8 *scratch_pad; }; @@ -250,10 +252,11 @@ struct __crypto_ctx { struct chcr_context { struct chcr_dev *dev; - unsigned char tx_qidx; - unsigned char rx_qidx; - unsigned char tx_chan_id; - unsigned char pci_chan_id; + unsigned char rxq_perchan; + unsigned char txq_perchan; + unsigned int ntxq; + unsigned int nrxq; + struct completion cbc_aes_aio_done; struct __crypto_ctx crypto_ctx[0]; }; @@ -279,6 +282,8 @@ struct chcr_ahash_req_ctx { u8 *skbfr; /* SKB which is being sent to the hardware for processing */ u64 data_len; /* Data len till time */ + u16 txqidx; + u16 rxqidx; u8 reqlen; u8 partial_hash[CHCR_HASH_MAX_DIGEST_SIZE]; u8 bfr1[CHCR_HASH_MAX_BLOCK_SIZE_128]; @@ -290,12 +295,15 @@ struct chcr_skcipher_req_ctx { struct scatterlist *dstsg; unsigned int processed; unsigned int last_req_len; + unsigned int partial_req; struct scatterlist *srcsg; unsigned int src_ofst; unsigned int dst_ofst; unsigned int op; u16 imm; u8 iv[CHCR_MAX_CRYPTO_IV_LEN]; + u16 txqidx; + u16 rxqidx; }; struct chcr_alg_template { diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c index e1651adb9d06..dccef3a2908b 100644 --- a/drivers/crypto/chelsio/chtls/chtls_io.c +++ b/drivers/crypto/chelsio/chtls/chtls_io.c @@ -1110,10 +1110,10 @@ new_buf: pg_size = page_size(page); if (off < pg_size && skb_can_coalesce(skb, i, page, off)) { - merge = 1; + merge = true; goto copy; } - merge = 0; + merge = false; if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) : MAX_SKB_FRAGS)) goto new_buf; @@ -1428,6 +1428,8 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); struct chtls_hws *hws = &csk->tlshws; + struct net_device *dev = csk->egress_dev; + struct adapter *adap = netdev2adap(dev); struct tcp_sock *tp = tcp_sk(sk); unsigned long avail; int buffers_freed; @@ -1585,6 +1587,7 @@ skip_copy: tp->copied_seq += skb->len; hws->rcvpld = skb->hdr_len; } else { + atomic_inc(&adap->chcr_stats.tls_pdu_rx); tp->copied_seq += hws->rcvpld; } chtls_free_skb(sk, skb); diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c index a038de90b2ea..2110d0893bc7 100644 --- a/drivers/crypto/chelsio/chtls/chtls_main.c +++ b/drivers/crypto/chelsio/chtls/chtls_main.c @@ -174,9 +174,16 @@ static inline void chtls_dev_release(struct kref *kref) { struct tls_toe_device *dev; struct chtls_dev *cdev; + struct adapter *adap; dev = container_of(kref, struct tls_toe_device, kref); cdev = to_chtls_dev(dev); + + /* Reset tls rx/tx stats */ + adap = pci_get_drvdata(cdev->pdev); + atomic_set(&adap->chcr_stats.tls_pdu_tx, 0); + atomic_set(&adap->chcr_stats.tls_pdu_rx, 0); + chtls_free_uld(cdev); } @@ -229,8 +236,7 @@ static void *chtls_uld_add(const struct cxgb4_lld_info *info) struct chtls_dev *cdev; int i, j; - cdev = kzalloc(sizeof(*cdev) + info->nports * - (sizeof(struct net_device *)), GFP_KERNEL); + cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); if (!cdev) goto out; diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig index 8851161f722f..095850d01dcc 100644 --- a/drivers/crypto/hisilicon/Kconfig +++ b/drivers/crypto/hisilicon/Kconfig @@ -40,6 +40,7 @@ config CRYPTO_DEV_HISI_QM tristate depends on ARM64 || COMPILE_TEST depends on PCI && PCI_MSI + depends on UACCE || UACCE=n help HiSilicon accelerator engines use a common queue management interface. Specific engine driver may use this module. @@ -49,6 +50,7 @@ config CRYPTO_DEV_HISI_ZIP depends on PCI && PCI_MSI depends on ARM64 || (COMPILE_TEST && 64BIT) depends on !CPU_BIG_ENDIAN || COMPILE_TEST + depends on UACCE || UACCE=n select CRYPTO_DEV_HISI_QM help Support for HiSilicon ZIP Driver diff --git a/drivers/crypto/hisilicon/hpre/hpre.h b/drivers/crypto/hisilicon/hpre/hpre.h index ddf13ea9862a..03d512ec6336 100644 --- a/drivers/crypto/hisilicon/hpre/hpre.h +++ b/drivers/crypto/hisilicon/hpre/hpre.h @@ -46,7 +46,6 @@ struct hpre_debug { struct hpre { struct hisi_qm qm; - struct list_head list; struct hpre_debug debug; u32 num_vfs; unsigned long status; @@ -76,7 +75,7 @@ struct hpre_sqe { __le32 rsvd1[_HPRE_SQE_ALIGN_EXT]; }; -struct hpre *hpre_find_device(int node); +struct hisi_qp *hpre_create_qp(void); int hpre_algs_register(void); void hpre_algs_unregister(void); diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index 5d400d69e8e4..65425250b2e9 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -147,26 +147,18 @@ static void hpre_rm_req_from_ctx(struct hpre_asym_request *hpre_req) static struct hisi_qp *hpre_get_qp_and_start(void) { struct hisi_qp *qp; - struct hpre *hpre; int ret; - /* find the proper hpre device, which is near the current CPU core */ - hpre = hpre_find_device(cpu_to_node(smp_processor_id())); - if (!hpre) { - pr_err("Can not find proper hpre device!\n"); - return ERR_PTR(-ENODEV); - } - - qp = hisi_qm_create_qp(&hpre->qm, 0); - if (IS_ERR(qp)) { - pci_err(hpre->qm.pdev, "Can not create qp!\n"); + qp = hpre_create_qp(); + if (!qp) { + pr_err("Can not create hpre qp!\n"); return ERR_PTR(-ENODEV); } ret = hisi_qm_start_qp(qp, 0); if (ret < 0) { - hisi_qm_release_qp(qp); - pci_err(hpre->qm.pdev, "Can not start qp!\n"); + hisi_qm_free_qps(&qp, 1); + pci_err(qp->qm->pdev, "Can not start qp!\n"); return ERR_PTR(-EINVAL); } @@ -338,7 +330,7 @@ static void hpre_ctx_clear(struct hpre_ctx *ctx, bool is_clear_all) if (is_clear_all) { idr_destroy(&ctx->req_idr); kfree(ctx->req_list); - hisi_qm_release_qp(ctx->qp); + hisi_qm_free_qps(&ctx->qp, 1); } ctx->crt_g2_mode = false; diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 401747de67a8..88be53bf4a38 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -82,8 +82,7 @@ #define HPRE_VIA_MSI_DSM 1 -static LIST_HEAD(hpre_list); -static DEFINE_MUTEX(hpre_list_lock); +static struct hisi_qm_list hpre_devices; static const char hpre_name[] = "hisi_hpre"; static struct dentry *hpre_debugfs_root; static const struct pci_device_id hpre_dev_ids[] = { @@ -196,43 +195,17 @@ static u32 hpre_pf_q_num = HPRE_PF_DEF_Q_NUM; module_param_cb(hpre_pf_q_num, &hpre_pf_q_num_ops, &hpre_pf_q_num, 0444); MODULE_PARM_DESC(hpre_pf_q_num, "Number of queues in PF of CS(1-1024)"); -static inline void hpre_add_to_list(struct hpre *hpre) +struct hisi_qp *hpre_create_qp(void) { - mutex_lock(&hpre_list_lock); - list_add_tail(&hpre->list, &hpre_list); - mutex_unlock(&hpre_list_lock); -} - -static inline void hpre_remove_from_list(struct hpre *hpre) -{ - mutex_lock(&hpre_list_lock); - list_del(&hpre->list); - mutex_unlock(&hpre_list_lock); -} + int node = cpu_to_node(smp_processor_id()); + struct hisi_qp *qp = NULL; + int ret; -struct hpre *hpre_find_device(int node) -{ - struct hpre *hpre, *ret = NULL; - int min_distance = INT_MAX; - struct device *dev; - int dev_node = 0; - - mutex_lock(&hpre_list_lock); - list_for_each_entry(hpre, &hpre_list, list) { - dev = &hpre->qm.pdev->dev; -#ifdef CONFIG_NUMA - dev_node = dev->numa_node; - if (dev_node < 0) - dev_node = 0; -#endif - if (node_distance(dev_node, node) < min_distance) { - ret = hpre; - min_distance = node_distance(dev_node, node); - } - } - mutex_unlock(&hpre_list_lock); + ret = hisi_qm_alloc_qps_node(&hpre_devices, 1, 0, node, &qp); + if (!ret) + return qp; - return ret; + return NULL; } static int hpre_cfg_by_dsm(struct hisi_qm *qm) @@ -349,18 +322,14 @@ static void hpre_cnt_regs_clear(struct hisi_qm *qm) hisi_qm_debug_regs_clear(qm); } -static void hpre_hw_error_disable(struct hpre *hpre) +static void hpre_hw_error_disable(struct hisi_qm *qm) { - struct hisi_qm *qm = &hpre->qm; - /* disable hpre hw error interrupts */ writel(HPRE_CORE_INT_DISABLE, qm->io_base + HPRE_INT_MASK); } -static void hpre_hw_error_enable(struct hpre *hpre) +static void hpre_hw_error_enable(struct hisi_qm *qm) { - struct hisi_qm *qm = &hpre->qm; - /* enable hpre hw error interrupts */ writel(HPRE_CORE_INT_ENABLE, qm->io_base + HPRE_INT_MASK); writel(HPRE_HAC_RAS_CE_ENABLE, qm->io_base + HPRE_RAS_CE_ENB); @@ -713,13 +682,39 @@ static int hpre_qm_pre_init(struct hisi_qm *qm, struct pci_dev *pdev) return 0; } -static void hpre_hw_err_init(struct hpre *hpre) +static void hpre_log_hw_error(struct hisi_qm *qm, u32 err_sts) { - hisi_qm_hw_error_init(&hpre->qm, QM_BASE_CE, QM_BASE_NFE, - 0, QM_DB_RANDOM_INVALID); - hpre_hw_error_enable(hpre); + const struct hpre_hw_error *err = hpre_hw_errors; + struct device *dev = &qm->pdev->dev; + + while (err->msg) { + if (err->int_msk & err_sts) + dev_warn(dev, "%s [error status=0x%x] found\n", + err->msg, err->int_msk); + err++; + } + + writel(err_sts, qm->io_base + HPRE_HAC_SOURCE_INT); +} + +static u32 hpre_get_hw_err_status(struct hisi_qm *qm) +{ + return readl(qm->io_base + HPRE_HAC_INT_STATUS); } +static const struct hisi_qm_err_ini hpre_err_ini = { + .hw_err_enable = hpre_hw_error_enable, + .hw_err_disable = hpre_hw_error_disable, + .get_dev_hw_err_status = hpre_get_hw_err_status, + .log_dev_hw_err = hpre_log_hw_error, + .err_info = { + .ce = QM_BASE_CE, + .nfe = QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT, + .fe = 0, + .msi = QM_DB_RANDOM_INVALID, + } +}; + static int hpre_pf_probe_init(struct hpre *hpre) { struct hisi_qm *qm = &hpre->qm; @@ -731,7 +726,8 @@ static int hpre_pf_probe_init(struct hpre *hpre) if (ret) return ret; - hpre_hw_err_init(hpre); + qm->err_ini = &hpre_err_ini; + hisi_qm_dev_err_init(qm); return 0; } @@ -776,22 +772,21 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) dev_warn(&pdev->dev, "init debugfs fail!\n"); - hpre_add_to_list(hpre); + hisi_qm_add_to_list(qm, &hpre_devices); ret = hpre_algs_register(); if (ret < 0) { - hpre_remove_from_list(hpre); pci_err(pdev, "fail to register algs to crypto!\n"); goto err_with_qm_start; } return 0; err_with_qm_start: + hisi_qm_del_from_list(qm, &hpre_devices); hisi_qm_stop(qm); err_with_err_init: - if (pdev->is_physfn) - hpre_hw_error_disable(hpre); + hisi_qm_dev_err_uninit(qm); err_with_qm_init: hisi_qm_uninit(qm); @@ -907,7 +902,7 @@ static void hpre_remove(struct pci_dev *pdev) int ret; hpre_algs_unregister(); - hpre_remove_from_list(hpre); + hisi_qm_del_from_list(qm, &hpre_devices); if (qm->fun_type == QM_HW_PF && hpre->num_vfs != 0) { ret = hpre_sriov_disable(pdev); if (ret) { @@ -922,69 +917,13 @@ static void hpre_remove(struct pci_dev *pdev) hpre_debugfs_exit(hpre); hisi_qm_stop(qm); - if (qm->fun_type == QM_HW_PF) - hpre_hw_error_disable(hpre); + hisi_qm_dev_err_uninit(qm); hisi_qm_uninit(qm); } -static void hpre_log_hw_error(struct hpre *hpre, u32 err_sts) -{ - const struct hpre_hw_error *err = hpre_hw_errors; - struct device *dev = &hpre->qm.pdev->dev; - - while (err->msg) { - if (err->int_msk & err_sts) - dev_warn(dev, "%s [error status=0x%x] found\n", - err->msg, err->int_msk); - err++; - } -} - -static pci_ers_result_t hpre_hw_error_handle(struct hpre *hpre) -{ - u32 err_sts; - - /* read err sts */ - err_sts = readl(hpre->qm.io_base + HPRE_HAC_INT_STATUS); - if (err_sts) { - hpre_log_hw_error(hpre, err_sts); - - /* clear error interrupts */ - writel(err_sts, hpre->qm.io_base + HPRE_HAC_SOURCE_INT); - return PCI_ERS_RESULT_NEED_RESET; - } - - return PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t hpre_process_hw_error(struct pci_dev *pdev) -{ - struct hpre *hpre = pci_get_drvdata(pdev); - pci_ers_result_t qm_ret, hpre_ret; - - /* log qm error */ - qm_ret = hisi_qm_hw_error_handle(&hpre->qm); - - /* log hpre error */ - hpre_ret = hpre_hw_error_handle(hpre); - - return (qm_ret == PCI_ERS_RESULT_NEED_RESET || - hpre_ret == PCI_ERS_RESULT_NEED_RESET) ? - PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t hpre_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - pci_info(pdev, "PCI error detected, state(=%d)!!\n", state); - if (state == pci_channel_io_perm_failure) - return PCI_ERS_RESULT_DISCONNECT; - - return hpre_process_hw_error(pdev); -} static const struct pci_error_handlers hpre_err_handler = { - .error_detected = hpre_error_detected, + .error_detected = hisi_qm_dev_err_detected, }; static struct pci_driver hpre_pci_driver = { @@ -1013,6 +952,7 @@ static int __init hpre_init(void) { int ret; + hisi_qm_init_list(&hpre_devices); hpre_register_debugfs(); ret = pci_register_driver(&hpre_pci_driver); diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index b57da5ef8b5b..f795fb557630 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -9,6 +9,9 @@ #include <linux/log2.h> #include <linux/seq_file.h> #include <linux/slab.h> +#include <linux/uacce.h> +#include <linux/uaccess.h> +#include <uapi/misc/uacce/hisi_qm.h> #include "qm.h" /* eq/aeq irq enable */ @@ -269,6 +272,12 @@ struct qm_doorbell { __le16 priority; }; +struct hisi_qm_resource { + struct hisi_qm *qm; + int distance; + struct list_head list; +}; + struct hisi_qm_hw_ops { int (*get_vft)(struct hisi_qm *qm, u32 *base, u32 *number); void (*qm_db)(struct hisi_qm *qm, u16 qn, @@ -277,6 +286,7 @@ struct hisi_qm_hw_ops { int (*debug_init)(struct hisi_qm *qm); void (*hw_error_init)(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe, u32 msi); + void (*hw_error_uninit)(struct hisi_qm *qm); pci_ers_result_t (*hw_error_handle)(struct hisi_qm *qm); }; @@ -465,9 +475,14 @@ static void qm_cq_head_update(struct hisi_qp *qp) static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm) { - struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head; + if (qp->event_cb) { + qp->event_cb(qp); + return; + } if (qp->req_cb) { + struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head; + while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) { dma_rmb(); qp->req_cb(qp, qp->sqe + qm->sqe_size * @@ -485,17 +500,9 @@ static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm) } } -static void qm_qp_work_func(struct work_struct *work) +static void qm_work_process(struct work_struct *work) { - struct hisi_qp *qp; - - qp = container_of(work, struct hisi_qp, work); - qm_poll_qp(qp, qp->qm); -} - -static irqreturn_t qm_irq_handler(int irq, void *data) -{ - struct hisi_qm *qm = data; + struct hisi_qm *qm = container_of(work, struct hisi_qm, work); struct qm_eqe *eqe = qm->eqe + qm->status.eq_head; struct hisi_qp *qp; int eqe_num = 0; @@ -504,7 +511,7 @@ static irqreturn_t qm_irq_handler(int irq, void *data) eqe_num++; qp = qm_to_hisi_qp(qm, eqe); if (qp) - queue_work(qp->wq, &qp->work); + qm_poll_qp(qp, qm); if (qm->status.eq_head == QM_Q_DEPTH - 1) { qm->status.eqc_phase = !qm->status.eqc_phase; @@ -522,6 +529,17 @@ static irqreturn_t qm_irq_handler(int irq, void *data) } qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0); +} + +static irqreturn_t do_qm_irq(int irq, void *data) +{ + struct hisi_qm *qm = (struct hisi_qm *)data; + + /* the workqueue created by device driver of QM */ + if (qm->wq) + queue_work(qm->wq, &qm->work); + else + schedule_work(&qm->work); return IRQ_HANDLED; } @@ -531,7 +549,7 @@ static irqreturn_t qm_irq(int irq, void *data) struct hisi_qm *qm = data; if (readl(qm->io_base + QM_VF_EQ_INT_SOURCE)) - return qm_irq_handler(irq, data); + return do_qm_irq(irq, data); dev_err(&qm->pdev->dev, "invalid int source\n"); qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0); @@ -1011,43 +1029,45 @@ static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe, writel(irq_unmask, qm->io_base + QM_ABNORMAL_INT_MASK); } +static void qm_hw_error_uninit_v2(struct hisi_qm *qm) +{ + writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK); +} + static void qm_log_hw_error(struct hisi_qm *qm, u32 error_status) { - const struct hisi_qm_hw_error *err = qm_hw_error; + const struct hisi_qm_hw_error *err; struct device *dev = &qm->pdev->dev; u32 reg_val, type, vf_num; + int i; - while (err->msg) { - if (err->int_msk & error_status) { - dev_err(dev, "%s [error status=0x%x] found\n", - err->msg, err->int_msk); - - if (error_status & QM_DB_TIMEOUT) { - reg_val = readl(qm->io_base + - QM_ABNORMAL_INF01); - type = (reg_val & QM_DB_TIMEOUT_TYPE) >> - QM_DB_TIMEOUT_TYPE_SHIFT; - vf_num = reg_val & QM_DB_TIMEOUT_VF; - dev_err(dev, "qm %s doorbell timeout in function %u\n", - qm_db_timeout[type], vf_num); - } - - if (error_status & QM_OF_FIFO_OF) { - reg_val = readl(qm->io_base + - QM_ABNORMAL_INF00); - type = (reg_val & QM_FIFO_OVERFLOW_TYPE) >> - QM_FIFO_OVERFLOW_TYPE_SHIFT; - vf_num = reg_val & QM_FIFO_OVERFLOW_VF; - - if (type < ARRAY_SIZE(qm_fifo_overflow)) - dev_err(dev, "qm %s fifo overflow in function %u\n", - qm_fifo_overflow[type], - vf_num); - else - dev_err(dev, "unknown error type\n"); - } + for (i = 0; i < ARRAY_SIZE(qm_hw_error); i++) { + err = &qm_hw_error[i]; + if (!(err->int_msk & error_status)) + continue; + + dev_err(dev, "%s [error status=0x%x] found\n", + err->msg, err->int_msk); + + if (err->int_msk & QM_DB_TIMEOUT) { + reg_val = readl(qm->io_base + QM_ABNORMAL_INF01); + type = (reg_val & QM_DB_TIMEOUT_TYPE) >> + QM_DB_TIMEOUT_TYPE_SHIFT; + vf_num = reg_val & QM_DB_TIMEOUT_VF; + dev_err(dev, "qm %s doorbell timeout in function %u\n", + qm_db_timeout[type], vf_num); + } else if (err->int_msk & QM_OF_FIFO_OF) { + reg_val = readl(qm->io_base + QM_ABNORMAL_INF00); + type = (reg_val & QM_FIFO_OVERFLOW_TYPE) >> + QM_FIFO_OVERFLOW_TYPE_SHIFT; + vf_num = reg_val & QM_FIFO_OVERFLOW_VF; + + if (type < ARRAY_SIZE(qm_fifo_overflow)) + dev_err(dev, "qm %s fifo overflow in function %u\n", + qm_fifo_overflow[type], vf_num); + else + dev_err(dev, "unknown error type\n"); } - err++; } } @@ -1082,6 +1102,7 @@ static const struct hisi_qm_hw_ops qm_hw_ops_v2 = { .qm_db = qm_db_v2, .get_irq_num = qm_get_irq_num_v2, .hw_error_init = qm_hw_error_init_v2, + .hw_error_uninit = qm_hw_error_uninit_v2, .hw_error_handle = qm_hw_error_handle_v2, }; @@ -1147,20 +1168,9 @@ struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type) qp->qp_id = qp_id; qp->alg_type = alg_type; - INIT_WORK(&qp->work, qm_qp_work_func); - qp->wq = alloc_workqueue("hisi_qm", WQ_UNBOUND | WQ_HIGHPRI | - WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0); - if (!qp->wq) { - ret = -EFAULT; - goto err_free_qp_mem; - } return qp; -err_free_qp_mem: - if (qm->use_dma_api) - dma_free_coherent(dev, qp->qdma.size, qp->qdma.va, - qp->qdma.dma); err_clear_bit: write_lock(&qm->qps_lock); qm->qp_array[qp_id] = NULL; @@ -1269,7 +1279,7 @@ static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, int pasid) * @qp: The qp we want to start to run. * @arg: Accelerator specific argument. * - * After this function, qp can receive request from user. Return qp_id if + * After this function, qp can receive request from user. Return 0 if * successful, Return -EBUSY if failed. */ int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg) @@ -1314,7 +1324,7 @@ int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg) dev_dbg(dev, "queue %d started\n", qp_id); - return qp_id; + return 0; } EXPORT_SYMBOL_GPL(hisi_qm_start_qp); @@ -1395,6 +1405,214 @@ static void hisi_qm_cache_wb(struct hisi_qm *qm) } } +static void qm_qp_event_notifier(struct hisi_qp *qp) +{ + wake_up_interruptible(&qp->uacce_q->wait); +} + +static int hisi_qm_get_available_instances(struct uacce_device *uacce) +{ + int i, ret; + struct hisi_qm *qm = uacce->priv; + + read_lock(&qm->qps_lock); + for (i = 0, ret = 0; i < qm->qp_num; i++) + if (!qm->qp_array[i]) + ret++; + read_unlock(&qm->qps_lock); + + return ret; +} + +static int hisi_qm_uacce_get_queue(struct uacce_device *uacce, + unsigned long arg, + struct uacce_queue *q) +{ + struct hisi_qm *qm = uacce->priv; + struct hisi_qp *qp; + u8 alg_type = 0; + + qp = hisi_qm_create_qp(qm, alg_type); + if (IS_ERR(qp)) + return PTR_ERR(qp); + + q->priv = qp; + q->uacce = uacce; + qp->uacce_q = q; + qp->event_cb = qm_qp_event_notifier; + qp->pasid = arg; + + return 0; +} + +static void hisi_qm_uacce_put_queue(struct uacce_queue *q) +{ + struct hisi_qp *qp = q->priv; + + hisi_qm_cache_wb(qp->qm); + hisi_qm_release_qp(qp); +} + +/* map sq/cq/doorbell to user space */ +static int hisi_qm_uacce_mmap(struct uacce_queue *q, + struct vm_area_struct *vma, + struct uacce_qfile_region *qfr) +{ + struct hisi_qp *qp = q->priv; + struct hisi_qm *qm = qp->qm; + size_t sz = vma->vm_end - vma->vm_start; + struct pci_dev *pdev = qm->pdev; + struct device *dev = &pdev->dev; + unsigned long vm_pgoff; + int ret; + + switch (qfr->type) { + case UACCE_QFRT_MMIO: + if (qm->ver == QM_HW_V2) { + if (sz > PAGE_SIZE * (QM_DOORBELL_PAGE_NR + + QM_DOORBELL_SQ_CQ_BASE_V2 / PAGE_SIZE)) + return -EINVAL; + } else { + if (sz > PAGE_SIZE * QM_DOORBELL_PAGE_NR) + return -EINVAL; + } + + vma->vm_flags |= VM_IO; + + return remap_pfn_range(vma, vma->vm_start, + qm->phys_base >> PAGE_SHIFT, + sz, pgprot_noncached(vma->vm_page_prot)); + case UACCE_QFRT_DUS: + if (sz != qp->qdma.size) + return -EINVAL; + + /* + * dma_mmap_coherent() requires vm_pgoff as 0 + * restore vm_pfoff to initial value for mmap() + */ + vm_pgoff = vma->vm_pgoff; + vma->vm_pgoff = 0; + ret = dma_mmap_coherent(dev, vma, qp->qdma.va, + qp->qdma.dma, sz); + vma->vm_pgoff = vm_pgoff; + return ret; + + default: + return -EINVAL; + } +} + +static int hisi_qm_uacce_start_queue(struct uacce_queue *q) +{ + struct hisi_qp *qp = q->priv; + + return hisi_qm_start_qp(qp, qp->pasid); +} + +static void hisi_qm_uacce_stop_queue(struct uacce_queue *q) +{ + hisi_qm_stop_qp(q->priv); +} + +static int qm_set_sqctype(struct uacce_queue *q, u16 type) +{ + struct hisi_qm *qm = q->uacce->priv; + struct hisi_qp *qp = q->priv; + + write_lock(&qm->qps_lock); + qp->alg_type = type; + write_unlock(&qm->qps_lock); + + return 0; +} + +static long hisi_qm_uacce_ioctl(struct uacce_queue *q, unsigned int cmd, + unsigned long arg) +{ + struct hisi_qp *qp = q->priv; + struct hisi_qp_ctx qp_ctx; + + if (cmd == UACCE_CMD_QM_SET_QP_CTX) { + if (copy_from_user(&qp_ctx, (void __user *)arg, + sizeof(struct hisi_qp_ctx))) + return -EFAULT; + + if (qp_ctx.qc_type != 0 && qp_ctx.qc_type != 1) + return -EINVAL; + + qm_set_sqctype(q, qp_ctx.qc_type); + qp_ctx.id = qp->qp_id; + + if (copy_to_user((void __user *)arg, &qp_ctx, + sizeof(struct hisi_qp_ctx))) + return -EFAULT; + } else { + return -EINVAL; + } + + return 0; +} + +static const struct uacce_ops uacce_qm_ops = { + .get_available_instances = hisi_qm_get_available_instances, + .get_queue = hisi_qm_uacce_get_queue, + .put_queue = hisi_qm_uacce_put_queue, + .start_queue = hisi_qm_uacce_start_queue, + .stop_queue = hisi_qm_uacce_stop_queue, + .mmap = hisi_qm_uacce_mmap, + .ioctl = hisi_qm_uacce_ioctl, +}; + +static int qm_alloc_uacce(struct hisi_qm *qm) +{ + struct pci_dev *pdev = qm->pdev; + struct uacce_device *uacce; + unsigned long mmio_page_nr; + unsigned long dus_page_nr; + struct uacce_interface interface = { + .flags = UACCE_DEV_SVA, + .ops = &uacce_qm_ops, + }; + + strncpy(interface.name, pdev->driver->name, sizeof(interface.name)); + + uacce = uacce_alloc(&pdev->dev, &interface); + if (IS_ERR(uacce)) + return PTR_ERR(uacce); + + if (uacce->flags & UACCE_DEV_SVA) { + qm->use_sva = true; + } else { + /* only consider sva case */ + uacce_remove(uacce); + qm->uacce = NULL; + return -EINVAL; + } + + uacce->is_vf = pdev->is_virtfn; + uacce->priv = qm; + uacce->algs = qm->algs; + + if (qm->ver == QM_HW_V1) { + mmio_page_nr = QM_DOORBELL_PAGE_NR; + uacce->api_ver = HISI_QM_API_VER_BASE; + } else { + mmio_page_nr = QM_DOORBELL_PAGE_NR + + QM_DOORBELL_SQ_CQ_BASE_V2 / PAGE_SIZE; + uacce->api_ver = HISI_QM_API_VER2_BASE; + } + + dus_page_nr = (PAGE_SIZE - 1 + qm->sqe_size * QM_Q_DEPTH + + sizeof(struct qm_cqe) * QM_Q_DEPTH) >> PAGE_SHIFT; + + uacce->qf_pg_num[UACCE_QFRT_MMIO] = mmio_page_nr; + uacce->qf_pg_num[UACCE_QFRT_DUS] = dus_page_nr; + + qm->uacce = uacce; + + return 0; +} + /** * hisi_qm_get_free_qp_num() - Get free number of qp in qm. * @qm: The qm which want to get free qp. @@ -1437,10 +1655,14 @@ int hisi_qm_init(struct hisi_qm *qm) return -EINVAL; } + ret = qm_alloc_uacce(qm); + if (ret < 0) + dev_warn(&pdev->dev, "fail to alloc uacce (%d)\n", ret); + ret = pci_enable_device_mem(pdev); if (ret < 0) { dev_err(&pdev->dev, "Failed to enable device mem!\n"); - return ret; + goto err_remove_uacce; } ret = pci_request_mem_regions(pdev, qm->dev_name); @@ -1449,8 +1671,9 @@ int hisi_qm_init(struct hisi_qm *qm) goto err_disable_pcidev; } - qm->io_base = ioremap(pci_resource_start(pdev, PCI_BAR_2), - pci_resource_len(qm->pdev, PCI_BAR_2)); + qm->phys_base = pci_resource_start(pdev, PCI_BAR_2); + qm->phys_size = pci_resource_len(qm->pdev, PCI_BAR_2); + qm->io_base = ioremap(qm->phys_base, qm->phys_size); if (!qm->io_base) { ret = -EIO; goto err_release_mem_regions; @@ -1479,6 +1702,7 @@ int hisi_qm_init(struct hisi_qm *qm) qm->qp_in_used = 0; mutex_init(&qm->mailbox_lock); rwlock_init(&qm->qps_lock); + INIT_WORK(&qm->work, qm_work_process); dev_dbg(dev, "init qm %s with %s\n", pdev->is_physfn ? "pf" : "vf", qm->use_dma_api ? "dma api" : "iommu api"); @@ -1493,6 +1717,9 @@ err_release_mem_regions: pci_release_mem_regions(pdev); err_disable_pcidev: pci_disable_device(pdev); +err_remove_uacce: + uacce_remove(qm->uacce); + qm->uacce = NULL; return ret; } @@ -1509,6 +1736,9 @@ void hisi_qm_uninit(struct hisi_qm *qm) struct pci_dev *pdev = qm->pdev; struct device *dev = &pdev->dev; + uacce_remove(qm->uacce); + qm->uacce = NULL; + if (qm->use_dma_api && qm->qdma.va) { hisi_qm_cache_wb(qm); dma_free_coherent(dev, qm->qdma.size, @@ -1856,43 +2086,30 @@ void hisi_qm_debug_regs_clear(struct hisi_qm *qm) } EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear); -/** - * hisi_qm_hw_error_init() - Configure qm hardware error report method. - * @qm: The qm which we want to configure. - * @ce: Bit mask of correctable error configure. - * @nfe: Bit mask of non-fatal error configure. - * @fe: Bit mask of fatal error configure. - * @msi: Bit mask of error reported by message signal interrupt. - * - * Hardware errors of qm can be reported either by RAS interrupts which will - * be handled by UEFI and then PCIe AER or by device MSI. User can configure - * each error to use either of above two methods. For RAS interrupts, we can - * configure an error as one of correctable error, non-fatal error or - * fatal error. - * - * Bits indicating errors can be configured to ce, nfe, fe and msi to enable - * related report methods. Error report will be masked if related error bit - * does not configure. - */ -void hisi_qm_hw_error_init(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe, - u32 msi) +static void qm_hw_error_init(struct hisi_qm *qm) { + const struct hisi_qm_err_info *err_info = &qm->err_ini->err_info; + if (!qm->ops->hw_error_init) { dev_err(&qm->pdev->dev, "QM doesn't support hw error handling!\n"); return; } - qm->ops->hw_error_init(qm, ce, nfe, fe, msi); + qm->ops->hw_error_init(qm, err_info->ce, err_info->nfe, + err_info->fe, err_info->msi); } -EXPORT_SYMBOL_GPL(hisi_qm_hw_error_init); -/** - * hisi_qm_hw_error_handle() - Handle qm non-fatal hardware errors. - * @qm: The qm which has non-fatal hardware errors. - * - * Accelerators use this function to handle qm non-fatal hardware errors. - */ -pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm) +static void qm_hw_error_uninit(struct hisi_qm *qm) +{ + if (!qm->ops->hw_error_uninit) { + dev_err(&qm->pdev->dev, "Unexpected QM hw error uninit!\n"); + return; + } + + qm->ops->hw_error_uninit(qm); +} + +static pci_ers_result_t qm_hw_error_handle(struct hisi_qm *qm) { if (!qm->ops->hw_error_handle) { dev_err(&qm->pdev->dev, "QM doesn't support hw error report!\n"); @@ -1901,7 +2118,6 @@ pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm) return qm->ops->hw_error_handle(qm); } -EXPORT_SYMBOL_GPL(hisi_qm_hw_error_handle); /** * hisi_qm_get_hw_version() - Get hardware version of a qm. @@ -1922,6 +2138,229 @@ enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev) } EXPORT_SYMBOL_GPL(hisi_qm_get_hw_version); +/** + * hisi_qm_dev_err_init() - Initialize device error configuration. + * @qm: The qm for which we want to do error initialization. + * + * Initialize QM and device error related configuration. + */ +void hisi_qm_dev_err_init(struct hisi_qm *qm) +{ + if (qm->fun_type == QM_HW_VF) + return; + + qm_hw_error_init(qm); + + if (!qm->err_ini->hw_err_enable) { + dev_err(&qm->pdev->dev, "Device doesn't support hw error init!\n"); + return; + } + qm->err_ini->hw_err_enable(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_dev_err_init); + +/** + * hisi_qm_dev_err_uninit() - Uninitialize device error configuration. + * @qm: The qm for which we want to do error uninitialization. + * + * Uninitialize QM and device error related configuration. + */ +void hisi_qm_dev_err_uninit(struct hisi_qm *qm) +{ + if (qm->fun_type == QM_HW_VF) + return; + + qm_hw_error_uninit(qm); + + if (!qm->err_ini->hw_err_disable) { + dev_err(&qm->pdev->dev, "Unexpected device hw error uninit!\n"); + return; + } + qm->err_ini->hw_err_disable(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_dev_err_uninit); + +/** + * hisi_qm_free_qps() - free multiple queue pairs. + * @qps: The queue pairs need to be freed. + * @qp_num: The num of queue pairs. + */ +void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num) +{ + int i; + + if (!qps || qp_num <= 0) + return; + + for (i = qp_num - 1; i >= 0; i--) + hisi_qm_release_qp(qps[i]); +} +EXPORT_SYMBOL_GPL(hisi_qm_free_qps); + +static void free_list(struct list_head *head) +{ + struct hisi_qm_resource *res, *tmp; + + list_for_each_entry_safe(res, tmp, head, list) { + list_del(&res->list); + kfree(res); + } +} + +static int hisi_qm_sort_devices(int node, struct list_head *head, + struct hisi_qm_list *qm_list) +{ + struct hisi_qm_resource *res, *tmp; + struct hisi_qm *qm; + struct list_head *n; + struct device *dev; + int dev_node = 0; + + list_for_each_entry(qm, &qm_list->list, list) { + dev = &qm->pdev->dev; + + if (IS_ENABLED(CONFIG_NUMA)) { + dev_node = dev_to_node(dev); + if (dev_node < 0) + dev_node = 0; + } + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + + res->qm = qm; + res->distance = node_distance(dev_node, node); + n = head; + list_for_each_entry(tmp, head, list) { + if (res->distance < tmp->distance) { + n = &tmp->list; + break; + } + } + list_add_tail(&res->list, n); + } + + return 0; +} + +/** + * hisi_qm_alloc_qps_node() - Create multiple queue pairs. + * @qm_list: The list of all available devices. + * @qp_num: The number of queue pairs need created. + * @alg_type: The algorithm type. + * @node: The numa node. + * @qps: The queue pairs need created. + * + * This function will sort all available device according to numa distance. + * Then try to create all queue pairs from one device, if all devices do + * not meet the requirements will return error. + */ +int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, + u8 alg_type, int node, struct hisi_qp **qps) +{ + struct hisi_qm_resource *tmp; + int ret = -ENODEV; + LIST_HEAD(head); + int i; + + if (!qps || !qm_list || qp_num <= 0) + return -EINVAL; + + mutex_lock(&qm_list->lock); + if (hisi_qm_sort_devices(node, &head, qm_list)) { + mutex_unlock(&qm_list->lock); + goto err; + } + + list_for_each_entry(tmp, &head, list) { + for (i = 0; i < qp_num; i++) { + qps[i] = hisi_qm_create_qp(tmp->qm, alg_type); + if (IS_ERR(qps[i])) { + hisi_qm_free_qps(qps, i); + break; + } + } + + if (i == qp_num) { + ret = 0; + break; + } + } + + mutex_unlock(&qm_list->lock); + if (ret) + pr_info("Failed to create qps, node[%d], alg[%d], qp[%d]!\n", + node, alg_type, qp_num); + +err: + free_list(&head); + return ret; +} +EXPORT_SYMBOL_GPL(hisi_qm_alloc_qps_node); + +static pci_ers_result_t qm_dev_err_handle(struct hisi_qm *qm) +{ + u32 err_sts; + + if (!qm->err_ini->get_dev_hw_err_status) { + dev_err(&qm->pdev->dev, "Device doesn't support get hw error status!\n"); + return PCI_ERS_RESULT_NONE; + } + + /* get device hardware error status */ + err_sts = qm->err_ini->get_dev_hw_err_status(qm); + if (err_sts) { + if (!qm->err_ini->log_dev_hw_err) { + dev_err(&qm->pdev->dev, "Device doesn't support log hw error!\n"); + return PCI_ERS_RESULT_NEED_RESET; + } + + qm->err_ini->log_dev_hw_err(qm, err_sts); + return PCI_ERS_RESULT_NEED_RESET; + } + + return PCI_ERS_RESULT_RECOVERED; +} + +static pci_ers_result_t qm_process_dev_error(struct pci_dev *pdev) +{ + struct hisi_qm *qm = pci_get_drvdata(pdev); + pci_ers_result_t qm_ret, dev_ret; + + /* log qm error */ + qm_ret = qm_hw_error_handle(qm); + + /* log device error */ + dev_ret = qm_dev_err_handle(qm); + + return (qm_ret == PCI_ERS_RESULT_NEED_RESET || + dev_ret == PCI_ERS_RESULT_NEED_RESET) ? + PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED; +} + +/** + * hisi_qm_dev_err_detected() - Get device and qm error status then log it. + * @pdev: The PCI device which need report error. + * @state: The connectivity between CPU and device. + * + * We register this function into PCIe AER handlers, It will report device or + * qm hardware error status when error occur. + */ +pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + if (pdev->is_virtfn) + return PCI_ERS_RESULT_NONE; + + pci_info(pdev, "PCI error detected, state(=%d)!!\n", state); + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + + return qm_process_dev_error(pdev); +} +EXPORT_SYMBOL_GPL(hisi_qm_dev_err_detected); + MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Zhou Wang <wangzhou1@hisilicon.com>"); MODULE_DESCRIPTION("HiSilicon Accelerator queue manager driver"); diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index 078b8f1f1b77..ec5b6f48db6c 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -77,6 +77,9 @@ #define HISI_ACC_SGL_SGE_NR_MAX 255 +/* page number for queue file region */ +#define QM_DOORBELL_PAGE_NR 1 + enum qp_state { QP_STOP, }; @@ -125,6 +128,28 @@ struct hisi_qm_status { unsigned long flags; }; +struct hisi_qm; + +struct hisi_qm_err_info { + u32 ce; + u32 nfe; + u32 fe; + u32 msi; +}; + +struct hisi_qm_err_ini { + void (*hw_err_enable)(struct hisi_qm *qm); + void (*hw_err_disable)(struct hisi_qm *qm); + u32 (*get_dev_hw_err_status)(struct hisi_qm *qm); + void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts); + struct hisi_qm_err_info err_info; +}; + +struct hisi_qm_list { + struct mutex lock; + struct list_head list; +}; + struct hisi_qm { enum qm_hw_ver ver; enum qm_fun_type fun_type; @@ -136,6 +161,7 @@ struct hisi_qm { u32 qp_num; u32 qp_in_used; u32 ctrl_qp_num; + struct list_head list; struct qm_dma qdma; struct qm_sqc *sqc; @@ -148,6 +174,7 @@ struct hisi_qm { dma_addr_t aeqe_dma; struct hisi_qm_status status; + const struct hisi_qm_err_ini *err_ini; rwlock_t qps_lock; unsigned long *qp_bitmap; @@ -162,7 +189,15 @@ struct hisi_qm { u32 error_mask; u32 msi_mask; + struct workqueue_struct *wq; + struct work_struct work; + + const char *algs; bool use_dma_api; + bool use_sva; + resource_size_t phys_base; + resource_size_t phys_size; + struct uacce_device *uacce; }; struct hisi_qp_status { @@ -192,12 +227,35 @@ struct hisi_qp { struct hisi_qp_ops *hw_ops; void *qp_ctx; void (*req_cb)(struct hisi_qp *qp, void *data); - struct work_struct work; - struct workqueue_struct *wq; + void (*event_cb)(struct hisi_qp *qp); struct hisi_qm *qm; + u16 pasid; + struct uacce_queue *uacce_q; }; +static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list) +{ + INIT_LIST_HEAD(&qm_list->list); + mutex_init(&qm_list->lock); +} + +static inline void hisi_qm_add_to_list(struct hisi_qm *qm, + struct hisi_qm_list *qm_list) +{ + mutex_lock(&qm_list->lock); + list_add_tail(&qm->list, &qm_list->list); + mutex_unlock(&qm_list->lock); +} + +static inline void hisi_qm_del_from_list(struct hisi_qm *qm, + struct hisi_qm_list *qm_list) +{ + mutex_lock(&qm_list->lock); + list_del(&qm->list); + mutex_unlock(&qm_list->lock); +} + int hisi_qm_init(struct hisi_qm *qm); void hisi_qm_uninit(struct hisi_qm *qm); int hisi_qm_start(struct hisi_qm *qm); @@ -211,11 +269,12 @@ int hisi_qm_get_free_qp_num(struct hisi_qm *qm); int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number); int hisi_qm_set_vft(struct hisi_qm *qm, u32 fun_num, u32 base, u32 number); int hisi_qm_debug_init(struct hisi_qm *qm); -void hisi_qm_hw_error_init(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe, - u32 msi); -pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm); enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev); void hisi_qm_debug_regs_clear(struct hisi_qm *qm); +void hisi_qm_dev_err_init(struct hisi_qm *qm); +void hisi_qm_dev_err_uninit(struct hisi_qm *qm); +pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, + pci_channel_state_t state); struct hisi_acc_sgl_pool; struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, @@ -227,4 +286,7 @@ struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev, u32 count, u32 sge_nr); void hisi_acc_free_sgl_pool(struct device *dev, struct hisi_acc_sgl_pool *pool); +int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, + u8 alg_type, int node, struct hisi_qp **qps); +void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num); #endif diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index 13e2d8d7be94..3598fa17beb2 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -11,6 +11,8 @@ /* Algorithm resource per hardware SEC queue */ struct sec_alg_res { + u8 *pbuf; + dma_addr_t pbuf_dma; u8 *c_ivin; dma_addr_t c_ivin_dma; u8 *out_mac; @@ -23,6 +25,8 @@ struct sec_cipher_req { dma_addr_t c_in_dma; struct hisi_acc_hw_sgl *c_out; dma_addr_t c_out_dma; + u8 *c_ivin; + dma_addr_t c_ivin_dma; struct skcipher_request *sk_req; u32 c_len; bool encrypt; @@ -48,6 +52,7 @@ struct sec_req { /* Status of the SEC request */ bool fake_busy; + bool use_pbuf; }; /** @@ -114,6 +119,7 @@ struct sec_ctx { struct sec_qp_ctx *qp_ctx; struct sec_dev *sec; const struct sec_req_op *req_op; + struct hisi_qp **qps; /* Half queues for encipher, and half for decipher */ u32 hlf_q_num; @@ -128,6 +134,7 @@ struct sec_ctx { atomic_t dec_qcyclic; enum sec_alg_type alg_type; + bool pbuf_supported; struct sec_cipher_ctx c_ctx; struct sec_auth_ctx a_ctx; }; @@ -162,14 +169,15 @@ struct sec_debug { struct sec_dev { struct hisi_qm qm; - struct list_head list; struct sec_debug debug; u32 ctx_q_num; + bool iommu_used; u32 num_vfs; unsigned long status; }; -struct sec_dev *sec_find_device(int node); +void sec_destroy_qps(struct hisi_qp **qps, int qp_num); +struct hisi_qp **sec_create_qps(void); int sec_register_to_crypto(void); void sec_unregister_from_crypto(void); #endif diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index a2cfcc9ccd94..7f1c6a31b82f 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -46,7 +46,21 @@ #define SEC_CIPHER_AUTH 0xfe #define SEC_AUTH_CIPHER 0x1 #define SEC_MAX_MAC_LEN 64 +#define SEC_MAX_AAD_LEN 65535 #define SEC_TOTAL_MAC_SZ (SEC_MAX_MAC_LEN * QM_Q_DEPTH) + +#define SEC_PBUF_SZ 512 +#define SEC_PBUF_IV_OFFSET SEC_PBUF_SZ +#define SEC_PBUF_MAC_OFFSET (SEC_PBUF_SZ + SEC_IV_SIZE) +#define SEC_PBUF_PKG (SEC_PBUF_SZ + SEC_IV_SIZE + \ + SEC_MAX_MAC_LEN * 2) +#define SEC_PBUF_NUM (PAGE_SIZE / SEC_PBUF_PKG) +#define SEC_PBUF_PAGE_NUM (QM_Q_DEPTH / SEC_PBUF_NUM) +#define SEC_PBUF_LEFT_SZ (SEC_PBUF_PKG * (QM_Q_DEPTH - \ + SEC_PBUF_PAGE_NUM * SEC_PBUF_NUM)) +#define SEC_TOTAL_PBUF_SZ (PAGE_SIZE * SEC_PBUF_PAGE_NUM + \ + SEC_PBUF_LEFT_SZ) + #define SEC_SQE_LEN_RATE 4 #define SEC_SQE_CFLAG 2 #define SEC_SQE_AEAD_FLAG 3 @@ -110,12 +124,12 @@ static void sec_free_req_id(struct sec_req *req) mutex_unlock(&qp_ctx->req_lock); } -static int sec_aead_verify(struct sec_req *req, struct sec_qp_ctx *qp_ctx) +static int sec_aead_verify(struct sec_req *req) { struct aead_request *aead_req = req->aead_req.aead_req; struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req); - u8 *mac_out = qp_ctx->res[req->req_id].out_mac; size_t authsize = crypto_aead_authsize(tfm); + u8 *mac_out = req->aead_req.out_mac; u8 *mac = mac_out + SEC_MAX_MAC_LEN; struct scatterlist *sgl = aead_req->src; size_t sz; @@ -163,7 +177,7 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp) } if (ctx->alg_type == SEC_AEAD && !req->c_req.encrypt) - err = sec_aead_verify(req, qp_ctx); + err = sec_aead_verify(req); atomic64_inc(&ctx->sec->debug.dfx.recv_cnt); @@ -245,6 +259,50 @@ static void sec_free_mac_resource(struct device *dev, struct sec_alg_res *res) res->out_mac, res->out_mac_dma); } +static void sec_free_pbuf_resource(struct device *dev, struct sec_alg_res *res) +{ + if (res->pbuf) + dma_free_coherent(dev, SEC_TOTAL_PBUF_SZ, + res->pbuf, res->pbuf_dma); +} + +/* + * To improve performance, pbuffer is used for + * small packets (< 512Bytes) as IOMMU translation using. + */ +static int sec_alloc_pbuf_resource(struct device *dev, struct sec_alg_res *res) +{ + int pbuf_page_offset; + int i, j, k; + + res->pbuf = dma_alloc_coherent(dev, SEC_TOTAL_PBUF_SZ, + &res->pbuf_dma, GFP_KERNEL); + if (!res->pbuf) + return -ENOMEM; + + /* + * SEC_PBUF_PKG contains data pbuf, iv and + * out_mac : <SEC_PBUF|SEC_IV|SEC_MAC> + * Every PAGE contains six SEC_PBUF_PKG + * The sec_qp_ctx contains QM_Q_DEPTH numbers of SEC_PBUF_PKG + * So we need SEC_PBUF_PAGE_NUM numbers of PAGE + * for the SEC_TOTAL_PBUF_SZ + */ + for (i = 0; i <= SEC_PBUF_PAGE_NUM; i++) { + pbuf_page_offset = PAGE_SIZE * i; + for (j = 0; j < SEC_PBUF_NUM; j++) { + k = i * SEC_PBUF_NUM + j; + if (k == QM_Q_DEPTH) + break; + res[k].pbuf = res->pbuf + + j * SEC_PBUF_PKG + pbuf_page_offset; + res[k].pbuf_dma = res->pbuf_dma + + j * SEC_PBUF_PKG + pbuf_page_offset; + } + } + return 0; +} + static int sec_alg_resource_alloc(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx) { @@ -259,11 +317,18 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx, if (ctx->alg_type == SEC_AEAD) { ret = sec_alloc_mac_resource(dev, res); if (ret) - goto get_fail; + goto alloc_fail; + } + if (ctx->pbuf_supported) { + ret = sec_alloc_pbuf_resource(dev, res); + if (ret) { + dev_err(dev, "fail to alloc pbuf dma resource!\n"); + goto alloc_fail; + } } return 0; -get_fail: +alloc_fail: sec_free_civ_resource(dev, res); return ret; @@ -276,6 +341,8 @@ static void sec_alg_resource_free(struct sec_ctx *ctx, sec_free_civ_resource(dev, qp_ctx->res); + if (ctx->pbuf_supported) + sec_free_pbuf_resource(dev, qp_ctx->res); if (ctx->alg_type == SEC_AEAD) sec_free_mac_resource(dev, qp_ctx->res); } @@ -288,11 +355,8 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx, struct hisi_qp *qp; int ret = -ENOMEM; - qp = hisi_qm_create_qp(qm, alg_type); - if (IS_ERR(qp)) - return PTR_ERR(qp); - qp_ctx = &ctx->qp_ctx[qp_ctx_id]; + qp = ctx->qps[qp_ctx_id]; qp->req_type = 0; qp->qp_ctx = qp_ctx; qp->req_cb = sec_req_cb; @@ -335,7 +399,6 @@ err_free_c_in_pool: hisi_acc_free_sgl_pool(dev, qp_ctx->c_in_pool); err_destroy_idr: idr_destroy(&qp_ctx->req_idr); - hisi_qm_release_qp(qp); return ret; } @@ -352,7 +415,6 @@ static void sec_release_qp_ctx(struct sec_ctx *ctx, hisi_acc_free_sgl_pool(dev, qp_ctx->c_in_pool); idr_destroy(&qp_ctx->req_idr); - hisi_qm_release_qp(qp_ctx->qp); } static int sec_ctx_base_init(struct sec_ctx *ctx) @@ -360,14 +422,18 @@ static int sec_ctx_base_init(struct sec_ctx *ctx) struct sec_dev *sec; int i, ret; - sec = sec_find_device(cpu_to_node(smp_processor_id())); - if (!sec) { - pr_err("Can not find proper Hisilicon SEC device!\n"); + ctx->qps = sec_create_qps(); + if (!ctx->qps) { + pr_err("Can not create sec qps!\n"); return -ENODEV; } + + sec = container_of(ctx->qps[0]->qm, struct sec_dev, qm); ctx->sec = sec; ctx->hlf_q_num = sec->ctx_q_num >> 1; + ctx->pbuf_supported = ctx->sec->iommu_used; + /* Half of queue depth is taken as fake requests limit in the queue. */ ctx->fake_req_limit = QM_Q_DEPTH >> 1; ctx->qp_ctx = kcalloc(sec->ctx_q_num, sizeof(struct sec_qp_ctx), @@ -386,6 +452,7 @@ err_sec_release_qp_ctx: for (i = i - 1; i >= 0; i--) sec_release_qp_ctx(ctx, &ctx->qp_ctx[i]); + sec_destroy_qps(ctx->qps, sec->ctx_q_num); kfree(ctx->qp_ctx); return ret; } @@ -397,6 +464,7 @@ static void sec_ctx_base_uninit(struct sec_ctx *ctx) for (i = 0; i < ctx->sec->ctx_q_num; i++) sec_release_qp_ctx(ctx, &ctx->qp_ctx[i]); + sec_destroy_qps(ctx->qps, ctx->sec->ctx_q_num); kfree(ctx->qp_ctx); } @@ -447,7 +515,6 @@ static int sec_skcipher_init(struct crypto_skcipher *tfm) struct sec_ctx *ctx = crypto_skcipher_ctx(tfm); int ret; - ctx = crypto_skcipher_ctx(tfm); ctx->alg_type = SEC_SKCIPHER; crypto_skcipher_set_reqsize(tfm, sizeof(struct sec_req)); ctx->c_ctx.ivsize = crypto_skcipher_ivsize(tfm); @@ -591,11 +658,94 @@ GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC) GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS) GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC) -static int sec_cipher_map(struct device *dev, struct sec_req *req, +static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req, + struct scatterlist *src) +{ + struct aead_request *aead_req = req->aead_req.aead_req; + struct sec_cipher_req *c_req = &req->c_req; + struct sec_qp_ctx *qp_ctx = req->qp_ctx; + struct device *dev = SEC_CTX_DEV(ctx); + int copy_size, pbuf_length; + int req_id = req->req_id; + + if (ctx->alg_type == SEC_AEAD) + copy_size = aead_req->cryptlen + aead_req->assoclen; + else + copy_size = c_req->c_len; + + pbuf_length = sg_copy_to_buffer(src, sg_nents(src), + qp_ctx->res[req_id].pbuf, + copy_size); + + if (unlikely(pbuf_length != copy_size)) { + dev_err(dev, "copy src data to pbuf error!\n"); + return -EINVAL; + } + + c_req->c_in_dma = qp_ctx->res[req_id].pbuf_dma; + + if (!c_req->c_in_dma) { + dev_err(dev, "fail to set pbuffer address!\n"); + return -ENOMEM; + } + + c_req->c_out_dma = c_req->c_in_dma; + + return 0; +} + +static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req, + struct scatterlist *dst) +{ + struct aead_request *aead_req = req->aead_req.aead_req; + struct sec_cipher_req *c_req = &req->c_req; + struct sec_qp_ctx *qp_ctx = req->qp_ctx; + struct device *dev = SEC_CTX_DEV(ctx); + int copy_size, pbuf_length; + int req_id = req->req_id; + + if (ctx->alg_type == SEC_AEAD) + copy_size = c_req->c_len + aead_req->assoclen; + else + copy_size = c_req->c_len; + + pbuf_length = sg_copy_from_buffer(dst, sg_nents(dst), + qp_ctx->res[req_id].pbuf, + copy_size); + + if (unlikely(pbuf_length != copy_size)) + dev_err(dev, "copy pbuf data to dst error!\n"); + +} + +static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, struct scatterlist *src, struct scatterlist *dst) { struct sec_cipher_req *c_req = &req->c_req; + struct sec_aead_req *a_req = &req->aead_req; struct sec_qp_ctx *qp_ctx = req->qp_ctx; + struct sec_alg_res *res = &qp_ctx->res[req->req_id]; + struct device *dev = SEC_CTX_DEV(ctx); + int ret; + + if (req->use_pbuf) { + ret = sec_cipher_pbuf_map(ctx, req, src); + c_req->c_ivin = res->pbuf + SEC_PBUF_IV_OFFSET; + c_req->c_ivin_dma = res->pbuf_dma + SEC_PBUF_IV_OFFSET; + if (ctx->alg_type == SEC_AEAD) { + a_req->out_mac = res->pbuf + SEC_PBUF_MAC_OFFSET; + a_req->out_mac_dma = res->pbuf_dma + + SEC_PBUF_MAC_OFFSET; + } + + return ret; + } + c_req->c_ivin = res->c_ivin; + c_req->c_ivin_dma = res->c_ivin_dma; + if (ctx->alg_type == SEC_AEAD) { + a_req->out_mac = res->out_mac; + a_req->out_mac_dma = res->out_mac_dma; + } c_req->c_in = hisi_acc_sg_buf_map_to_hw_sgl(dev, src, qp_ctx->c_in_pool, @@ -626,29 +776,34 @@ static int sec_cipher_map(struct device *dev, struct sec_req *req, return 0; } -static void sec_cipher_unmap(struct device *dev, struct sec_cipher_req *req, +static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req, struct scatterlist *src, struct scatterlist *dst) { - if (dst != src) - hisi_acc_sg_buf_unmap(dev, src, req->c_in); + struct sec_cipher_req *c_req = &req->c_req; + struct device *dev = SEC_CTX_DEV(ctx); + + if (req->use_pbuf) { + sec_cipher_pbuf_unmap(ctx, req, dst); + } else { + if (dst != src) + hisi_acc_sg_buf_unmap(dev, src, c_req->c_in); - hisi_acc_sg_buf_unmap(dev, dst, req->c_out); + hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out); + } } static int sec_skcipher_sgl_map(struct sec_ctx *ctx, struct sec_req *req) { struct skcipher_request *sq = req->c_req.sk_req; - return sec_cipher_map(SEC_CTX_DEV(ctx), req, sq->src, sq->dst); + return sec_cipher_map(ctx, req, sq->src, sq->dst); } static void sec_skcipher_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req) { - struct device *dev = SEC_CTX_DEV(ctx); - struct sec_cipher_req *c_req = &req->c_req; - struct skcipher_request *sk_req = c_req->sk_req; + struct skcipher_request *sq = req->c_req.sk_req; - sec_cipher_unmap(dev, c_req, sk_req->src, sk_req->dst); + sec_cipher_unmap(ctx, req, sq->src, sq->dst); } static int sec_aead_aes_set_key(struct sec_cipher_ctx *c_ctx, @@ -759,16 +914,14 @@ static int sec_aead_sgl_map(struct sec_ctx *ctx, struct sec_req *req) { struct aead_request *aq = req->aead_req.aead_req; - return sec_cipher_map(SEC_CTX_DEV(ctx), req, aq->src, aq->dst); + return sec_cipher_map(ctx, req, aq->src, aq->dst); } static void sec_aead_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req) { - struct device *dev = SEC_CTX_DEV(ctx); - struct sec_cipher_req *cq = &req->c_req; struct aead_request *aq = req->aead_req.aead_req; - sec_cipher_unmap(dev, cq, aq->src, aq->dst); + sec_cipher_unmap(ctx, req, aq->src, aq->dst); } static int sec_request_transfer(struct sec_ctx *ctx, struct sec_req *req) @@ -801,9 +954,9 @@ static void sec_request_untransfer(struct sec_ctx *ctx, struct sec_req *req) static void sec_skcipher_copy_iv(struct sec_ctx *ctx, struct sec_req *req) { struct skcipher_request *sk_req = req->c_req.sk_req; - u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin; + struct sec_cipher_req *c_req = &req->c_req; - memcpy(c_ivin, sk_req->iv, ctx->c_ctx.ivsize); + memcpy(c_req->c_ivin, sk_req->iv, ctx->c_ctx.ivsize); } static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req) @@ -818,8 +971,7 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req) memset(sec_sqe, 0, sizeof(struct sec_sqe)); sec_sqe->type2.c_key_addr = cpu_to_le64(c_ctx->c_key_dma); - sec_sqe->type2.c_ivin_addr = - cpu_to_le64(req->qp_ctx->res[req->req_id].c_ivin_dma); + sec_sqe->type2.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma); sec_sqe->type2.data_src_addr = cpu_to_le64(c_req->c_in_dma); sec_sqe->type2.data_dst_addr = cpu_to_le64(c_req->c_out_dma); @@ -836,7 +988,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req) cipher = SEC_CIPHER_DEC << SEC_CIPHER_OFFSET; sec_sqe->type_cipher_auth = bd_type | cipher; - sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET; + if (req->use_pbuf) + sa_type = SEC_PBUF << SEC_SRC_SGL_OFFSET; + else + sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET; scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET; if (c_req->c_in_dma != c_req->c_out_dma) de = 0x1 << SEC_DE_OFFSET; @@ -844,7 +999,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req) sec_sqe->sds_sa_type = (de | scene | sa_type); /* Just set DST address type */ - da_type = SEC_SGL << SEC_DST_SGL_OFFSET; + if (req->use_pbuf) + da_type = SEC_PBUF << SEC_DST_SGL_OFFSET; + else + da_type = SEC_SGL << SEC_DST_SGL_OFFSET; sec_sqe->sdm_addr_type |= da_type; sec_sqe->type2.clen_ivhlen |= cpu_to_le32(c_req->c_len); @@ -904,9 +1062,9 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req, static void sec_aead_copy_iv(struct sec_ctx *ctx, struct sec_req *req) { struct aead_request *aead_req = req->aead_req.aead_req; - u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin; + struct sec_cipher_req *c_req = &req->c_req; - memcpy(c_ivin, aead_req->iv, ctx->c_ctx.ivsize); + memcpy(c_req->c_ivin, aead_req->iv, ctx->c_ctx.ivsize); } static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir, @@ -939,8 +1097,7 @@ static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir, sec_sqe->type2.cipher_src_offset = cpu_to_le16((u16)aq->assoclen); - sec_sqe->type2.mac_addr = - cpu_to_le64(req->qp_ctx->res[req->req_id].out_mac_dma); + sec_sqe->type2.mac_addr = cpu_to_le64(a_req->out_mac_dma); } static int sec_aead_bd_fill(struct sec_ctx *ctx, struct sec_req *req) @@ -964,6 +1121,7 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err) { struct aead_request *a_req = req->aead_req.aead_req; struct crypto_aead *tfm = crypto_aead_reqtfm(a_req); + struct sec_aead_req *aead_req = &req->aead_req; struct sec_cipher_req *c_req = &req->c_req; size_t authsize = crypto_aead_authsize(tfm); struct sec_qp_ctx *qp_ctx = req->qp_ctx; @@ -979,7 +1137,7 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err) struct scatterlist *sgl = a_req->dst; sz = sg_pcopy_from_buffer(sgl, sg_nents(sgl), - qp_ctx->res[req->req_id].out_mac, + aead_req->out_mac, authsize, a_req->cryptlen + a_req->assoclen); @@ -1031,6 +1189,7 @@ static int sec_request_init(struct sec_ctx *ctx, struct sec_req *req) static int sec_process(struct sec_ctx *ctx, struct sec_req *req) { + struct sec_cipher_req *c_req = &req->c_req; int ret; ret = sec_request_init(ctx, req); @@ -1057,12 +1216,10 @@ err_send_req: /* As failing, restore the IV from user */ if (ctx->c_ctx.c_mode == SEC_CMODE_CBC && !req->c_req.encrypt) { if (ctx->alg_type == SEC_SKCIPHER) - memcpy(req->c_req.sk_req->iv, - req->qp_ctx->res[req->req_id].c_ivin, + memcpy(req->c_req.sk_req->iv, c_req->c_ivin, ctx->c_ctx.ivsize); else - memcpy(req->aead_req.aead_req->iv, - req->qp_ctx->res[req->req_id].c_ivin, + memcpy(req->aead_req.aead_req->iv, c_req->c_ivin, ctx->c_ctx.ivsize); } @@ -1208,6 +1365,12 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq) return -EINVAL; } sreq->c_req.c_len = sk_req->cryptlen; + + if (ctx->pbuf_supported && sk_req->cryptlen <= SEC_PBUF_SZ) + sreq->use_pbuf = true; + else + sreq->use_pbuf = false; + if (c_alg == SEC_CALG_3DES) { if (unlikely(sk_req->cryptlen & (DES3_EDE_BLOCK_SIZE - 1))) { dev_err(dev, "skcipher 3des input length error!\n"); @@ -1321,11 +1484,18 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq) struct crypto_aead *tfm = crypto_aead_reqtfm(req); size_t authsize = crypto_aead_authsize(tfm); - if (unlikely(!req->src || !req->dst || !req->cryptlen)) { + if (unlikely(!req->src || !req->dst || !req->cryptlen || + req->assoclen > SEC_MAX_AAD_LEN)) { dev_err(SEC_CTX_DEV(ctx), "aead input param error!\n"); return -EINVAL; } + if (ctx->pbuf_supported && (req->cryptlen + req->assoclen) <= + SEC_PBUF_SZ) + sreq->use_pbuf = true; + else + sreq->use_pbuf = false; + /* Support AES only */ if (unlikely(c_alg != SEC_CALG_AES)) { dev_err(SEC_CTX_DEV(ctx), "aead crypto alg error!\n"); diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 2bbaf1e2dae7..1f54ebe164b6 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -7,6 +7,7 @@ #include <linux/debugfs.h> #include <linux/init.h> #include <linux/io.h> +#include <linux/iommu.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/pci.h> @@ -89,8 +90,7 @@ struct sec_hw_error { static const char sec_name[] = "hisi_sec2"; static struct dentry *sec_debugfs_root; -static LIST_HEAD(sec_list); -static DEFINE_MUTEX(sec_list_lock); +static struct hisi_qm_list sec_devices; static const struct sec_hw_error sec_hw_errors[] = { {.int_msk = BIT(0), .msg = "sec_axi_rresp_err_rint"}, @@ -105,37 +105,6 @@ static const struct sec_hw_error sec_hw_errors[] = { { /* sentinel */ } }; -struct sec_dev *sec_find_device(int node) -{ -#define SEC_NUMA_MAX_DISTANCE 100 - int min_distance = SEC_NUMA_MAX_DISTANCE; - int dev_node = 0, free_qp_num = 0; - struct sec_dev *sec, *ret = NULL; - struct hisi_qm *qm; - struct device *dev; - - mutex_lock(&sec_list_lock); - list_for_each_entry(sec, &sec_list, list) { - qm = &sec->qm; - dev = &qm->pdev->dev; -#ifdef CONFIG_NUMA - dev_node = dev->numa_node; - if (dev_node < 0) - dev_node = 0; -#endif - if (node_distance(dev_node, node) < min_distance) { - free_qp_num = hisi_qm_get_free_qp_num(qm); - if (free_qp_num >= sec->ctx_q_num) { - ret = sec; - min_distance = node_distance(dev_node, node); - } - } - } - mutex_unlock(&sec_list_lock); - - return ret; -} - static const char * const sec_dbg_file_name[] = { [SEC_CURRENT_QM] = "current_qm", [SEC_CLEAR_ENABLE] = "clear_enable", @@ -238,6 +207,32 @@ static u32 ctx_q_num = SEC_CTX_Q_NUM_DEF; module_param_cb(ctx_q_num, &sec_ctx_q_num_ops, &ctx_q_num, 0444); MODULE_PARM_DESC(ctx_q_num, "Queue num in ctx (24 default, 2, 4, ..., 32)"); +void sec_destroy_qps(struct hisi_qp **qps, int qp_num) +{ + hisi_qm_free_qps(qps, qp_num); + kfree(qps); +} + +struct hisi_qp **sec_create_qps(void) +{ + int node = cpu_to_node(smp_processor_id()); + u32 ctx_num = ctx_q_num; + struct hisi_qp **qps; + int ret; + + qps = kcalloc(ctx_num, sizeof(struct hisi_qp *), GFP_KERNEL); + if (!qps) + return NULL; + + ret = hisi_qm_alloc_qps_node(&sec_devices, ctx_num, 0, node, qps); + if (!ret) + return qps; + + kfree(qps); + return NULL; +} + + static const struct pci_device_id sec_dev_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_PF_PCI_DEVICE_ID) }, { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_VF_PCI_DEVICE_ID) }, @@ -245,20 +240,6 @@ static const struct pci_device_id sec_dev_ids[] = { }; MODULE_DEVICE_TABLE(pci, sec_dev_ids); -static inline void sec_add_to_list(struct sec_dev *sec) -{ - mutex_lock(&sec_list_lock); - list_add_tail(&sec->list, &sec_list); - mutex_unlock(&sec_list_lock); -} - -static inline void sec_remove_from_list(struct sec_dev *sec) -{ - mutex_lock(&sec_list_lock); - list_del(&sec->list); - mutex_unlock(&sec_list_lock); -} - static u8 sec_get_endian(struct sec_dev *sec) { struct hisi_qm *qm = &sec->qm; @@ -384,9 +365,8 @@ static void sec_debug_regs_clear(struct hisi_qm *qm) hisi_qm_debug_regs_clear(qm); } -static void sec_hw_error_enable(struct sec_dev *sec) +static void sec_hw_error_enable(struct hisi_qm *qm) { - struct hisi_qm *qm = &sec->qm; u32 val; if (qm->ver == QM_HW_V1) { @@ -414,9 +394,8 @@ static void sec_hw_error_enable(struct sec_dev *sec) writel(val, qm->io_base + SEC_CONTROL_REG); } -static void sec_hw_error_disable(struct sec_dev *sec) +static void sec_hw_error_disable(struct hisi_qm *qm) { - struct hisi_qm *qm = &sec->qm; u32 val; val = readl(qm->io_base + SEC_CONTROL_REG); @@ -435,27 +414,6 @@ static void sec_hw_error_disable(struct sec_dev *sec) writel(val, qm->io_base + SEC_CONTROL_REG); } -static void sec_hw_error_init(struct sec_dev *sec) -{ - if (sec->qm.fun_type == QM_HW_VF) - return; - - hisi_qm_hw_error_init(&sec->qm, QM_BASE_CE, - QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT - | QM_ACC_WB_NOT_READY_TIMEOUT, 0, - QM_DB_RANDOM_INVALID); - sec_hw_error_enable(sec); -} - -static void sec_hw_error_uninit(struct sec_dev *sec) -{ - if (sec->qm.fun_type == QM_HW_VF) - return; - - sec_hw_error_disable(sec); - writel(GENMASK(12, 0), sec->qm.io_base + SEC_QM_ABNORMAL_INT_MASK); -} - static u32 sec_current_qm_read(struct sec_debug_file *file) { struct hisi_qm *qm = file->qm; @@ -695,6 +653,51 @@ static void sec_debugfs_exit(struct sec_dev *sec) debugfs_remove_recursive(sec->qm.debug.debug_root); } +static void sec_log_hw_error(struct hisi_qm *qm, u32 err_sts) +{ + const struct sec_hw_error *errs = sec_hw_errors; + struct device *dev = &qm->pdev->dev; + u32 err_val; + + while (errs->msg) { + if (errs->int_msk & err_sts) { + dev_err(dev, "%s [error status=0x%x] found\n", + errs->msg, errs->int_msk); + + if (SEC_CORE_INT_STATUS_M_ECC & errs->int_msk) { + err_val = readl(qm->io_base + + SEC_CORE_SRAM_ECC_ERR_INFO); + dev_err(dev, "multi ecc sram num=0x%x\n", + SEC_ECC_NUM(err_val)); + dev_err(dev, "multi ecc sram addr=0x%x\n", + SEC_ECC_ADDR(err_val)); + } + } + errs++; + } + + writel(err_sts, qm->io_base + SEC_CORE_INT_SOURCE); +} + +static u32 sec_get_hw_err_status(struct hisi_qm *qm) +{ + return readl(qm->io_base + SEC_CORE_INT_STATUS); +} + +static const struct hisi_qm_err_ini sec_err_ini = { + .hw_err_enable = sec_hw_error_enable, + .hw_err_disable = sec_hw_error_disable, + .get_dev_hw_err_status = sec_get_hw_err_status, + .log_dev_hw_err = sec_log_hw_error, + .err_info = { + .ce = QM_BASE_CE, + .nfe = QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT | + QM_ACC_WB_NOT_READY_TIMEOUT, + .fe = 0, + .msi = QM_DB_RANDOM_INVALID, + } +}; + static int sec_pf_probe_init(struct sec_dev *sec) { struct hisi_qm *qm = &sec->qm; @@ -713,11 +716,13 @@ static int sec_pf_probe_init(struct sec_dev *sec) return -EINVAL; } + qm->err_ini = &sec_err_ini; + ret = sec_set_user_domain_and_cache(sec); if (ret) return ret; - sec_hw_error_init(sec); + hisi_qm_dev_err_init(qm); sec_debug_regs_clear(qm); return 0; @@ -750,12 +755,30 @@ static void sec_qm_uninit(struct hisi_qm *qm) static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec) { + int ret; + + /* + * WQ_HIGHPRI: SEC request must be low delayed, + * so need a high priority workqueue. + * WQ_UNBOUND: SEC task is likely with long + * running CPU intensive workloads. + */ + qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | + WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus(), + pci_name(qm->pdev)); + if (!qm->wq) { + pci_err(qm->pdev, "fail to alloc workqueue\n"); + return -ENOMEM; + } + if (qm->fun_type == QM_HW_PF) { qm->qp_base = SEC_PF_DEF_Q_BASE; qm->qp_num = pf_q_num; qm->debug.curr_qm_qp_num = pf_q_num; - return sec_pf_probe_init(sec); + ret = sec_pf_probe_init(sec); + if (ret) + goto err_probe_uninit; } else if (qm->fun_type == QM_HW_VF) { /* * have no way to get qm configure in VM in v1 hardware, @@ -768,18 +791,43 @@ static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec) qm->qp_num = SEC_QUEUE_NUM_V1 - SEC_PF_DEF_Q_NUM; } else if (qm->ver == QM_HW_V2) { /* v2 starts to support get vft by mailbox */ - return hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num); + ret = hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num); + if (ret) + goto err_probe_uninit; } } else { - return -ENODEV; + ret = -ENODEV; + goto err_probe_uninit; } return 0; +err_probe_uninit: + destroy_workqueue(qm->wq); + return ret; } -static void sec_probe_uninit(struct sec_dev *sec) +static void sec_probe_uninit(struct hisi_qm *qm) { - sec_hw_error_uninit(sec); + hisi_qm_dev_err_uninit(qm); + + destroy_workqueue(qm->wq); +} + +static void sec_iommu_used_check(struct sec_dev *sec) +{ + struct iommu_domain *domain; + struct device *dev = &sec->qm.pdev->dev; + + domain = iommu_get_domain_for_dev(dev); + + /* Check if iommu is used */ + sec->iommu_used = false; + if (domain) { + if (domain->type & __IOMMU_DOMAIN_PAGING) + sec->iommu_used = true; + dev_info(dev, "SMMU Opened, the iommu type = %u\n", + domain->type); + } } static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) @@ -795,6 +843,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, sec); sec->ctx_q_num = ctx_q_num; + sec_iommu_used_check(sec); qm = &sec->qm; @@ -820,7 +869,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) pci_warn(pdev, "Failed to init debugfs!\n"); - sec_add_to_list(sec); + hisi_qm_add_to_list(qm, &sec_devices); ret = sec_register_to_crypto(); if (ret < 0) { @@ -831,12 +880,12 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_remove_from_list: - sec_remove_from_list(sec); + hisi_qm_del_from_list(qm, &sec_devices); sec_debugfs_exit(sec); hisi_qm_stop(qm); err_probe_uninit: - sec_probe_uninit(sec); + sec_probe_uninit(qm); err_qm_uninit: sec_qm_uninit(qm); @@ -955,7 +1004,7 @@ static void sec_remove(struct pci_dev *pdev) sec_unregister_from_crypto(); - sec_remove_from_list(sec); + hisi_qm_del_from_list(qm, &sec_devices); if (qm->fun_type == QM_HW_PF && sec->num_vfs) (void)sec_sriov_disable(pdev); @@ -967,89 +1016,13 @@ static void sec_remove(struct pci_dev *pdev) if (qm->fun_type == QM_HW_PF) sec_debug_regs_clear(qm); - sec_probe_uninit(sec); + sec_probe_uninit(qm); sec_qm_uninit(qm); } -static void sec_log_hw_error(struct sec_dev *sec, u32 err_sts) -{ - const struct sec_hw_error *errs = sec_hw_errors; - struct device *dev = &sec->qm.pdev->dev; - u32 err_val; - - while (errs->msg) { - if (errs->int_msk & err_sts) { - dev_err(dev, "%s [error status=0x%x] found\n", - errs->msg, errs->int_msk); - - if (SEC_CORE_INT_STATUS_M_ECC & err_sts) { - err_val = readl(sec->qm.io_base + - SEC_CORE_SRAM_ECC_ERR_INFO); - dev_err(dev, "multi ecc sram num=0x%x\n", - SEC_ECC_NUM(err_val)); - dev_err(dev, "multi ecc sram addr=0x%x\n", - SEC_ECC_ADDR(err_val)); - } - } - errs++; - } -} - -static pci_ers_result_t sec_hw_error_handle(struct sec_dev *sec) -{ - u32 err_sts; - - /* read err sts */ - err_sts = readl(sec->qm.io_base + SEC_CORE_INT_STATUS); - if (err_sts) { - sec_log_hw_error(sec, err_sts); - - /* clear error interrupts */ - writel(err_sts, sec->qm.io_base + SEC_CORE_INT_SOURCE); - - return PCI_ERS_RESULT_NEED_RESET; - } - - return PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t sec_process_hw_error(struct pci_dev *pdev) -{ - struct sec_dev *sec = pci_get_drvdata(pdev); - pci_ers_result_t qm_ret, sec_ret; - - if (!sec) { - pci_err(pdev, "Can't recover error during device init\n"); - return PCI_ERS_RESULT_NONE; - } - - /* log qm error */ - qm_ret = hisi_qm_hw_error_handle(&sec->qm); - - /* log sec error */ - sec_ret = sec_hw_error_handle(sec); - - return (qm_ret == PCI_ERS_RESULT_NEED_RESET || - sec_ret == PCI_ERS_RESULT_NEED_RESET) ? - PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t sec_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - if (pdev->is_virtfn) - return PCI_ERS_RESULT_NONE; - - pci_info(pdev, "PCI error detected, state(=%d)!!\n", state); - if (state == pci_channel_io_perm_failure) - return PCI_ERS_RESULT_DISCONNECT; - - return sec_process_hw_error(pdev); -} - static const struct pci_error_handlers sec_err_handler = { - .error_detected = sec_error_detected, + .error_detected = hisi_qm_dev_err_detected, }; static struct pci_driver sec_pci_driver = { @@ -1078,6 +1051,7 @@ static int __init sec_init(void) { int ret; + hisi_qm_init_list(&sec_devices); sec_register_debugfs(); ret = pci_register_driver(&sec_pci_driver); diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h index bc1db26598bb..82dc6f867171 100644 --- a/drivers/crypto/hisilicon/zip/zip.h +++ b/drivers/crypto/hisilicon/zip/zip.h @@ -68,7 +68,7 @@ struct hisi_zip_sqe { u32 rsvd1[4]; }; -struct hisi_zip *find_zip_device(int node); +int zip_create_qps(struct hisi_qp **qps, int ctx_num); int hisi_zip_register_to_crypto(void); void hisi_zip_unregister_from_crypto(void); #endif diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c index 9815d5e3ccd0..369ec3220574 100644 --- a/drivers/crypto/hisilicon/zip/zip_crypto.c +++ b/drivers/crypto/hisilicon/zip/zip_crypto.c @@ -132,29 +132,25 @@ static void hisi_zip_fill_sqe(struct hisi_zip_sqe *sqe, u8 req_type, sqe->dest_addr_h = upper_32_bits(d_addr); } -static int hisi_zip_create_qp(struct hisi_qm *qm, struct hisi_zip_qp_ctx *ctx, - int alg_type, int req_type) +static int hisi_zip_start_qp(struct hisi_qp *qp, struct hisi_zip_qp_ctx *ctx, + int alg_type, int req_type) { - struct hisi_qp *qp; + struct device *dev = &qp->qm->pdev->dev; int ret; - qp = hisi_qm_create_qp(qm, alg_type); - if (IS_ERR(qp)) - return PTR_ERR(qp); - qp->req_type = req_type; + qp->alg_type = alg_type; qp->qp_ctx = ctx; - ctx->qp = qp; ret = hisi_qm_start_qp(qp, 0); - if (ret < 0) - goto err_release_qp; + if (ret < 0) { + dev_err(dev, "start qp failed!\n"); + return ret; + } - return 0; + ctx->qp = qp; -err_release_qp: - hisi_qm_release_qp(qp); - return ret; + return 0; } static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *ctx) @@ -165,34 +161,34 @@ static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *ctx) static int hisi_zip_ctx_init(struct hisi_zip_ctx *hisi_zip_ctx, u8 req_type) { + struct hisi_qp *qps[HZIP_CTX_Q_NUM] = { NULL }; struct hisi_zip *hisi_zip; - struct hisi_qm *qm; int ret, i, j; - /* find the proper zip device */ - hisi_zip = find_zip_device(cpu_to_node(smp_processor_id())); - if (!hisi_zip) { - pr_err("Failed to find a proper ZIP device!\n"); + ret = zip_create_qps(qps, HZIP_CTX_Q_NUM); + if (ret) { + pr_err("Can not create zip qps!\n"); return -ENODEV; } - qm = &hisi_zip->qm; + + hisi_zip = container_of(qps[0]->qm, struct hisi_zip, qm); for (i = 0; i < HZIP_CTX_Q_NUM; i++) { /* alg_type = 0 for compress, 1 for decompress in hw sqe */ - ret = hisi_zip_create_qp(qm, &hisi_zip_ctx->qp_ctx[i], i, - req_type); - if (ret) - goto err; + ret = hisi_zip_start_qp(qps[i], &hisi_zip_ctx->qp_ctx[i], i, + req_type); + if (ret) { + for (j = i - 1; j >= 0; j--) + hisi_qm_stop_qp(hisi_zip_ctx->qp_ctx[j].qp); + + hisi_qm_free_qps(qps, HZIP_CTX_Q_NUM); + return ret; + } hisi_zip_ctx->qp_ctx[i].zip_dev = hisi_zip; } return 0; -err: - for (j = i - 1; j >= 0; j--) - hisi_zip_release_qp(&hisi_zip_ctx->qp_ctx[j]); - - return ret; } static void hisi_zip_ctx_exit(struct hisi_zip_ctx *hisi_zip_ctx) diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index e1bab1a91333..fcc85d2dbd07 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -11,6 +11,7 @@ #include <linux/pci.h> #include <linux/seq_file.h> #include <linux/topology.h> +#include <linux/uacce.h> #include "zip.h" #define PCI_DEVICE_ID_ZIP_PF 0xa250 @@ -60,13 +61,17 @@ #define HZIP_CORE_DEBUG_DECOMP_5 0x309000 #define HZIP_CORE_INT_SOURCE 0x3010A0 -#define HZIP_CORE_INT_MASK 0x3010A4 +#define HZIP_CORE_INT_MASK_REG 0x3010A4 #define HZIP_CORE_INT_STATUS 0x3010AC #define HZIP_CORE_INT_STATUS_M_ECC BIT(1) #define HZIP_CORE_SRAM_ECC_ERR_INFO 0x301148 -#define SRAM_ECC_ERR_NUM_SHIFT 16 -#define SRAM_ECC_ERR_ADDR_SHIFT 24 -#define HZIP_CORE_INT_DISABLE 0x000007FF +#define HZIP_CORE_INT_RAS_CE_ENB 0x301160 +#define HZIP_CORE_INT_RAS_NFE_ENB 0x301164 +#define HZIP_CORE_INT_RAS_FE_ENB 0x301168 +#define HZIP_CORE_INT_RAS_NFE_ENABLE 0x7FE +#define HZIP_SRAM_ECC_ERR_NUM_SHIFT 16 +#define HZIP_SRAM_ECC_ERR_ADDR_SHIFT 24 +#define HZIP_CORE_INT_MASK_ALL GENMASK(10, 0) #define HZIP_COMP_CORE_NUM 2 #define HZIP_DECOMP_CORE_NUM 6 #define HZIP_CORE_NUM (HZIP_COMP_CORE_NUM + \ @@ -83,77 +88,7 @@ static const char hisi_zip_name[] = "hisi_zip"; static struct dentry *hzip_debugfs_root; -static LIST_HEAD(hisi_zip_list); -static DEFINE_MUTEX(hisi_zip_list_lock); - -struct hisi_zip_resource { - struct hisi_zip *hzip; - int distance; - struct list_head list; -}; - -static void free_list(struct list_head *head) -{ - struct hisi_zip_resource *res, *tmp; - - list_for_each_entry_safe(res, tmp, head, list) { - list_del(&res->list); - kfree(res); - } -} - -struct hisi_zip *find_zip_device(int node) -{ - struct hisi_zip_resource *res, *tmp; - struct hisi_zip *ret = NULL; - struct hisi_zip *hisi_zip; - struct list_head *n; - struct device *dev; - LIST_HEAD(head); - - mutex_lock(&hisi_zip_list_lock); - - if (IS_ENABLED(CONFIG_NUMA)) { - list_for_each_entry(hisi_zip, &hisi_zip_list, list) { - res = kzalloc(sizeof(*res), GFP_KERNEL); - if (!res) - goto err; - - dev = &hisi_zip->qm.pdev->dev; - res->hzip = hisi_zip; - res->distance = node_distance(dev_to_node(dev), node); - - n = &head; - list_for_each_entry(tmp, &head, list) { - if (res->distance < tmp->distance) { - n = &tmp->list; - break; - } - } - list_add_tail(&res->list, n); - } - - list_for_each_entry(tmp, &head, list) { - if (hisi_qm_get_free_qp_num(&tmp->hzip->qm)) { - ret = tmp->hzip; - break; - } - } - - free_list(&head); - } else { - ret = list_first_entry(&hisi_zip_list, struct hisi_zip, list); - } - - mutex_unlock(&hisi_zip_list_lock); - - return ret; - -err: - free_list(&head); - mutex_unlock(&hisi_zip_list_lock); - return NULL; -} +static struct hisi_qm_list zip_devices; struct hisi_zip_hw_error { u32 int_msk; @@ -297,9 +232,6 @@ static u32 pf_q_num = HZIP_PF_DEF_Q_NUM; module_param_cb(pf_q_num, &pf_q_num_ops, &pf_q_num, 0444); MODULE_PARM_DESC(pf_q_num, "Number of queues in PF(v1 1-4096, v2 1-1024)"); -static int uacce_mode; -module_param(uacce_mode, int, 0); - static u32 vfs_num; module_param(vfs_num, uint, 0444); MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63)"); @@ -311,18 +243,11 @@ static const struct pci_device_id hisi_zip_dev_ids[] = { }; MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids); -static inline void hisi_zip_add_to_list(struct hisi_zip *hisi_zip) +int zip_create_qps(struct hisi_qp **qps, int qp_num) { - mutex_lock(&hisi_zip_list_lock); - list_add_tail(&hisi_zip->list, &hisi_zip_list); - mutex_unlock(&hisi_zip_list_lock); -} + int node = cpu_to_node(smp_processor_id()); -static inline void hisi_zip_remove_from_list(struct hisi_zip *hisi_zip) -{ - mutex_lock(&hisi_zip_list_lock); - list_del(&hisi_zip->list); - mutex_unlock(&hisi_zip_list_lock); + return hisi_qm_alloc_qps_node(&zip_devices, qp_num, 0, node, qps); } static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip) @@ -353,8 +278,14 @@ static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip) writel(AXUSER_BASE, base + HZIP_BD_RUSER_32_63); writel(AXUSER_BASE, base + HZIP_SGL_RUSER_32_63); writel(AXUSER_BASE, base + HZIP_BD_WUSER_32_63); - writel(AXUSER_BASE, base + HZIP_DATA_RUSER_32_63); - writel(AXUSER_BASE, base + HZIP_DATA_WUSER_32_63); + + if (hisi_zip->qm.use_sva) { + writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_RUSER_32_63); + writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_WUSER_32_63); + } else { + writel(AXUSER_BASE, base + HZIP_DATA_RUSER_32_63); + writel(AXUSER_BASE, base + HZIP_DATA_WUSER_32_63); + } /* let's open all compression/decompression cores */ writel(DECOMP_CHECK_ENABLE | ALL_COMP_DECOMP_EN, @@ -366,27 +297,32 @@ static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip) FIELD_PREP(CQC_CACHE_WB_THRD, 1), base + QM_CACHE_CTL); } -static void hisi_zip_hw_error_set_state(struct hisi_zip *hisi_zip, bool state) +static void hisi_zip_hw_error_enable(struct hisi_qm *qm) { - struct hisi_qm *qm = &hisi_zip->qm; - if (qm->ver == QM_HW_V1) { - writel(HZIP_CORE_INT_DISABLE, qm->io_base + HZIP_CORE_INT_MASK); + writel(HZIP_CORE_INT_MASK_ALL, + qm->io_base + HZIP_CORE_INT_MASK_REG); dev_info(&qm->pdev->dev, "Does not support hw error handle\n"); return; } - if (state) { - /* clear ZIP hw error source if having */ - writel(HZIP_CORE_INT_DISABLE, hisi_zip->qm.io_base + - HZIP_CORE_INT_SOURCE); - /* enable ZIP hw error interrupts */ - writel(0, hisi_zip->qm.io_base + HZIP_CORE_INT_MASK); - } else { - /* disable ZIP hw error interrupts */ - writel(HZIP_CORE_INT_DISABLE, - hisi_zip->qm.io_base + HZIP_CORE_INT_MASK); - } + /* clear ZIP hw error source if having */ + writel(HZIP_CORE_INT_MASK_ALL, qm->io_base + HZIP_CORE_INT_SOURCE); + + /* configure error type */ + writel(0x1, qm->io_base + HZIP_CORE_INT_RAS_CE_ENB); + writel(0x0, qm->io_base + HZIP_CORE_INT_RAS_FE_ENB); + writel(HZIP_CORE_INT_RAS_NFE_ENABLE, + qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB); + + /* enable ZIP hw error interrupts */ + writel(0, qm->io_base + HZIP_CORE_INT_MASK_REG); +} + +static void hisi_zip_hw_error_disable(struct hisi_qm *qm) +{ + /* disable ZIP hw error interrupts */ + writel(HZIP_CORE_INT_MASK_ALL, qm->io_base + HZIP_CORE_INT_MASK_REG); } static inline struct hisi_qm *file_to_qm(struct ctrl_debug_file *file) @@ -638,14 +574,53 @@ static void hisi_zip_debugfs_exit(struct hisi_zip *hisi_zip) hisi_zip_debug_regs_clear(hisi_zip); } -static void hisi_zip_hw_error_init(struct hisi_zip *hisi_zip) +static void hisi_zip_log_hw_error(struct hisi_qm *qm, u32 err_sts) +{ + const struct hisi_zip_hw_error *err = zip_hw_error; + struct device *dev = &qm->pdev->dev; + u32 err_val; + + while (err->msg) { + if (err->int_msk & err_sts) { + dev_err(dev, "%s [error status=0x%x] found\n", + err->msg, err->int_msk); + + if (err->int_msk & HZIP_CORE_INT_STATUS_M_ECC) { + err_val = readl(qm->io_base + + HZIP_CORE_SRAM_ECC_ERR_INFO); + dev_err(dev, "hisi-zip multi ecc sram num=0x%x\n", + ((err_val >> + HZIP_SRAM_ECC_ERR_NUM_SHIFT) & 0xFF)); + dev_err(dev, "hisi-zip multi ecc sram addr=0x%x\n", + (err_val >> + HZIP_SRAM_ECC_ERR_ADDR_SHIFT)); + } + } + err++; + } + + writel(err_sts, qm->io_base + HZIP_CORE_INT_SOURCE); +} + +static u32 hisi_zip_get_hw_err_status(struct hisi_qm *qm) { - hisi_qm_hw_error_init(&hisi_zip->qm, QM_BASE_CE, - QM_BASE_NFE | QM_ACC_WB_NOT_READY_TIMEOUT, 0, - QM_DB_RANDOM_INVALID); - hisi_zip_hw_error_set_state(hisi_zip, true); + return readl(qm->io_base + HZIP_CORE_INT_STATUS); } +static const struct hisi_qm_err_ini hisi_zip_err_ini = { + .hw_err_enable = hisi_zip_hw_error_enable, + .hw_err_disable = hisi_zip_hw_error_disable, + .get_dev_hw_err_status = hisi_zip_get_hw_err_status, + .log_dev_hw_err = hisi_zip_log_hw_error, + .err_info = { + .ce = QM_BASE_CE, + .nfe = QM_BASE_NFE | + QM_ACC_WB_NOT_READY_TIMEOUT, + .fe = 0, + .msi = QM_DB_RANDOM_INVALID, + } +}; + static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip) { struct hisi_qm *qm = &hisi_zip->qm; @@ -671,8 +646,10 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip) return -EINVAL; } + qm->err_ini = &hisi_zip_err_ini; + hisi_zip_set_user_domain_and_cache(hisi_zip); - hisi_zip_hw_error_init(hisi_zip); + hisi_qm_dev_err_init(qm); hisi_zip_debug_regs_clear(hisi_zip); return 0; @@ -791,27 +768,15 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, hisi_zip); qm = &hisi_zip->qm; + qm->use_dma_api = true; qm->pdev = pdev; qm->ver = rev_id; + qm->algs = "zlib\ngzip"; qm->sqe_size = HZIP_SQE_SIZE; qm->dev_name = hisi_zip_name; qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ? QM_HW_PF : QM_HW_VF; - switch (uacce_mode) { - case 0: - qm->use_dma_api = true; - break; - case 1: - qm->use_dma_api = false; - break; - case 2: - qm->use_dma_api = true; - break; - default: - return -EINVAL; - } - ret = hisi_qm_init(qm); if (ret) { dev_err(&pdev->dev, "Failed to init qm!\n"); @@ -849,7 +814,13 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) dev_err(&pdev->dev, "Failed to init debugfs (%d)!\n", ret); - hisi_zip_add_to_list(hisi_zip); + hisi_qm_add_to_list(qm, &zip_devices); + + if (qm->uacce) { + ret = uacce_register(qm->uacce); + if (ret) + goto err_qm_uninit; + } if (qm->fun_type == QM_HW_PF && vfs_num > 0) { ret = hisi_zip_sriov_enable(pdev, vfs_num); @@ -860,7 +831,7 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_remove_from_list: - hisi_zip_remove_from_list(hisi_zip); + hisi_qm_del_from_list(qm, &zip_devices); hisi_zip_debugfs_exit(hisi_zip); hisi_qm_stop(qm); err_qm_uninit: @@ -887,92 +858,13 @@ static void hisi_zip_remove(struct pci_dev *pdev) hisi_zip_debugfs_exit(hisi_zip); hisi_qm_stop(qm); - if (qm->fun_type == QM_HW_PF) - hisi_zip_hw_error_set_state(hisi_zip, false); - + hisi_qm_dev_err_uninit(qm); hisi_qm_uninit(qm); - hisi_zip_remove_from_list(hisi_zip); -} - -static void hisi_zip_log_hw_error(struct hisi_zip *hisi_zip, u32 err_sts) -{ - const struct hisi_zip_hw_error *err = zip_hw_error; - struct device *dev = &hisi_zip->qm.pdev->dev; - u32 err_val; - - while (err->msg) { - if (err->int_msk & err_sts) { - dev_warn(dev, "%s [error status=0x%x] found\n", - err->msg, err->int_msk); - - if (HZIP_CORE_INT_STATUS_M_ECC & err->int_msk) { - err_val = readl(hisi_zip->qm.io_base + - HZIP_CORE_SRAM_ECC_ERR_INFO); - dev_warn(dev, "hisi-zip multi ecc sram num=0x%x\n", - ((err_val >> SRAM_ECC_ERR_NUM_SHIFT) & - 0xFF)); - dev_warn(dev, "hisi-zip multi ecc sram addr=0x%x\n", - (err_val >> SRAM_ECC_ERR_ADDR_SHIFT)); - } - } - err++; - } -} - -static pci_ers_result_t hisi_zip_hw_error_handle(struct hisi_zip *hisi_zip) -{ - u32 err_sts; - - /* read err sts */ - err_sts = readl(hisi_zip->qm.io_base + HZIP_CORE_INT_STATUS); - - if (err_sts) { - hisi_zip_log_hw_error(hisi_zip, err_sts); - /* clear error interrupts */ - writel(err_sts, hisi_zip->qm.io_base + HZIP_CORE_INT_SOURCE); - - return PCI_ERS_RESULT_NEED_RESET; - } - - return PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t hisi_zip_process_hw_error(struct pci_dev *pdev) -{ - struct hisi_zip *hisi_zip = pci_get_drvdata(pdev); - struct device *dev = &pdev->dev; - pci_ers_result_t qm_ret, zip_ret; - - if (!hisi_zip) { - dev_err(dev, - "Can't recover ZIP-error occurred during device init\n"); - return PCI_ERS_RESULT_NONE; - } - - qm_ret = hisi_qm_hw_error_handle(&hisi_zip->qm); - - zip_ret = hisi_zip_hw_error_handle(hisi_zip); - - return (qm_ret == PCI_ERS_RESULT_NEED_RESET || - zip_ret == PCI_ERS_RESULT_NEED_RESET) ? - PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t hisi_zip_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - if (pdev->is_virtfn) - return PCI_ERS_RESULT_NONE; - - dev_info(&pdev->dev, "PCI error detected, state(=%d)!!\n", state); - if (state == pci_channel_io_perm_failure) - return PCI_ERS_RESULT_DISCONNECT; - - return hisi_zip_process_hw_error(pdev); + hisi_qm_del_from_list(qm, &zip_devices); } static const struct pci_error_handlers hisi_zip_err_handler = { - .error_detected = hisi_zip_error_detected, + .error_detected = hisi_qm_dev_err_detected, }; static struct pci_driver hisi_zip_pci_driver = { @@ -1002,6 +894,7 @@ static int __init hisi_zip_init(void) { int ret; + hisi_qm_init_list(&zip_devices); hisi_zip_register_debugfs(); ret = pci_register_driver(&hisi_zip_pci_driver); @@ -1010,12 +903,10 @@ static int __init hisi_zip_init(void) goto err_pci; } - if (uacce_mode == 0 || uacce_mode == 2) { - ret = hisi_zip_register_to_crypto(); - if (ret < 0) { - pr_err("Failed to register driver to crypto.\n"); - goto err_crypto; - } + ret = hisi_zip_register_to_crypto(); + if (ret < 0) { + pr_err("Failed to register driver to crypto.\n"); + goto err_crypto; } return 0; @@ -1030,8 +921,7 @@ err_pci: static void __exit hisi_zip_exit(void) { - if (uacce_mode == 0 || uacce_mode == 2) - hisi_zip_unregister_from_crypto(); + hisi_zip_unregister_from_crypto(); pci_unregister_driver(&hisi_zip_pci_driver); hisi_zip_unregister_debugfs(); } diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c index 25d5227f74a1..0e25fc3087f3 100644 --- a/drivers/crypto/img-hash.c +++ b/drivers/crypto/img-hash.c @@ -103,7 +103,7 @@ struct img_hash_request_ctx { struct ahash_request fallback_req; /* Zero length buffer must remain last member of struct */ - u8 buffer[0] __aligned(sizeof(u32)); + u8 buffer[] __aligned(sizeof(u32)); }; struct img_hash_ctx { diff --git a/drivers/crypto/marvell/Kconfig b/drivers/crypto/marvell/Kconfig new file mode 100644 index 000000000000..13063384f958 --- /dev/null +++ b/drivers/crypto/marvell/Kconfig @@ -0,0 +1,37 @@ +# +# Marvell crypto drivers configuration +# + +config CRYPTO_DEV_MARVELL + tristate + +config CRYPTO_DEV_MARVELL_CESA + tristate "Marvell's Cryptographic Engine driver" + depends on PLAT_ORION || ARCH_MVEBU + select CRYPTO_LIB_AES + select CRYPTO_LIB_DES + select CRYPTO_SKCIPHER + select CRYPTO_HASH + select SRAM + select CRYPTO_DEV_MARVELL + help + This driver allows you to utilize the Cryptographic Engines and + Security Accelerator (CESA) which can be found on MVEBU and ORION + platforms. + This driver supports CPU offload through DMA transfers. + +config CRYPTO_DEV_OCTEONTX_CPT + tristate "Support for Marvell OcteonTX CPT driver" + depends on ARCH_THUNDER || COMPILE_TEST + depends on PCI_MSI && 64BIT + depends on CRYPTO_LIB_AES + select CRYPTO_SKCIPHER + select CRYPTO_HASH + select CRYPTO_AEAD + select CRYPTO_DEV_MARVELL + help + This driver allows you to utilize the Marvell Cryptographic + Accelerator Unit(CPT) found in OcteonTX series of processors. + + To compile this driver as module, choose M here: + the modules will be called octeontx-cpt and octeontx-cptvf diff --git a/drivers/crypto/marvell/Makefile b/drivers/crypto/marvell/Makefile index b27cab65e696..6c6a1519b0f1 100644 --- a/drivers/crypto/marvell/Makefile +++ b/drivers/crypto/marvell/Makefile @@ -1,3 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell-cesa.o -marvell-cesa-objs := cesa.o cipher.o hash.o tdma.o +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += cesa/ +obj-$(CONFIG_CRYPTO_DEV_OCTEONTX_CPT) += octeontx/ diff --git a/drivers/crypto/marvell/cesa/Makefile b/drivers/crypto/marvell/cesa/Makefile new file mode 100644 index 000000000000..b27cab65e696 --- /dev/null +++ b/drivers/crypto/marvell/cesa/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell-cesa.o +marvell-cesa-objs := cesa.o cipher.o hash.o tdma.o diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa/cesa.c index 8a5f0b0bdf77..8a5f0b0bdf77 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa/cesa.c diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa/cesa.h index f1ed3b85c0d2..e8632d5f343f 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa/cesa.h @@ -436,7 +436,7 @@ struct mv_cesa_dev { * @queue: fifo of the pending crypto requests * @load: engine load counter, useful for load balancing * @chain: list of the current tdma descriptors being processed - * by this engine. + * by this engine. * @complete_queue: fifo of the processed requests by the engine * * Structure storing CESA engine information. @@ -467,7 +467,7 @@ struct mv_cesa_engine { * @step: launch the crypto operation on the next chunk * @cleanup: cleanup the crypto request (release associated data) * @complete: complete the request, i.e copy result or context from sram when - * needed. + * needed. */ struct mv_cesa_req_ops { int (*process)(struct crypto_async_request *req, u32 status); @@ -734,6 +734,7 @@ static inline struct mv_cesa_engine *mv_cesa_select_engine(int weight) for (i = 0; i < cesa_dev->caps->nengines; i++) { struct mv_cesa_engine *engine = cesa_dev->engines + i; u32 load = atomic_read(&engine->load); + if (load < min_load) { min_load = load; selected = engine; diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cesa/cipher.c index c24f34a48cef..f133c2ccb5ae 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cesa/cipher.c @@ -106,8 +106,8 @@ static void mv_cesa_skcipher_std_step(struct skcipher_request *req) mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); - BUG_ON(readl(engine->regs + CESA_SA_CMD) & - CESA_SA_CMD_EN_CESA_SA_ACCL0); + WARN_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } @@ -178,6 +178,7 @@ static inline void mv_cesa_skcipher_prepare(struct crypto_async_request *req, { struct skcipher_request *skreq = skcipher_request_cast(req); struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(skreq); + creq->base.engine = engine; if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) @@ -336,7 +337,8 @@ static int mv_cesa_skcipher_dma_req_init(struct skcipher_request *req, do { struct mv_cesa_op_ctx *op; - op = mv_cesa_dma_add_op(&basereq->chain, op_templ, skip_ctx, flags); + op = mv_cesa_dma_add_op(&basereq->chain, op_templ, skip_ctx, + flags); if (IS_ERR(op)) { ret = PTR_ERR(op); goto err_free_tdma; @@ -365,9 +367,10 @@ static int mv_cesa_skcipher_dma_req_init(struct skcipher_request *req, } while (mv_cesa_skcipher_req_iter_next_op(&iter)); /* Add output data for IV */ - ret = mv_cesa_dma_add_result_op(&basereq->chain, CESA_SA_CFG_SRAM_OFFSET, - CESA_SA_DATA_SRAM_OFFSET, - CESA_TDMA_SRC_IN_SRAM, flags); + ret = mv_cesa_dma_add_result_op(&basereq->chain, + CESA_SA_CFG_SRAM_OFFSET, + CESA_SA_DATA_SRAM_OFFSET, + CESA_TDMA_SRC_IN_SRAM, flags); if (ret) goto err_free_tdma; diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/cesa/hash.c index a2b35fb0fb89..b971284332b6 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/cesa/hash.c @@ -141,9 +141,11 @@ static int mv_cesa_ahash_pad_req(struct mv_cesa_ahash_req *creq, u8 *buf) if (creq->algo_le) { __le64 bits = cpu_to_le64(creq->len << 3); + memcpy(buf + padlen, &bits, sizeof(bits)); } else { __be64 bits = cpu_to_be64(creq->len << 3); + memcpy(buf + padlen, &bits, sizeof(bits)); } @@ -168,7 +170,8 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) if (!sreq->offset) { digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); for (i = 0; i < digsize / 4; i++) - writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i)); + writel_relaxed(creq->state[i], + engine->regs + CESA_IVDIG(i)); } if (creq->cache_ptr) @@ -245,8 +248,8 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); - BUG_ON(readl(engine->regs + CESA_SA_CMD) & - CESA_SA_CMD_EN_CESA_SA_ACCL0); + WARN_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } @@ -329,11 +332,12 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req) digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq)); if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ && - (creq->base.chain.last->flags & CESA_TDMA_TYPE_MSK) == CESA_TDMA_RESULT) { + (creq->base.chain.last->flags & CESA_TDMA_TYPE_MSK) == + CESA_TDMA_RESULT) { __le32 *data = NULL; /* - * Result is already in the correct endianess when the SA is + * Result is already in the correct endianness when the SA is * used */ data = creq->base.chain.last->op->ctx.hash.hash; @@ -347,9 +351,9 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req) CESA_IVDIG(i)); if (creq->last_req) { /* - * Hardware's MD5 digest is in little endian format, but - * SHA in big endian format - */ + * Hardware's MD5 digest is in little endian format, but + * SHA in big endian format + */ if (creq->algo_le) { __le32 *result = (void *)ahashreq->result; @@ -439,7 +443,8 @@ static bool mv_cesa_ahash_cache_req(struct ahash_request *req) struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); bool cached = false; - if (creq->cache_ptr + req->nbytes < CESA_MAX_HASH_BLOCK_SIZE && !creq->last_req) { + if (creq->cache_ptr + req->nbytes < CESA_MAX_HASH_BLOCK_SIZE && + !creq->last_req) { cached = true; if (!req->nbytes) @@ -648,7 +653,8 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (!mv_cesa_ahash_req_iter_next_op(&iter)) break; - op = mv_cesa_dma_add_frag(&basereq->chain, &creq->op_tmpl, + op = mv_cesa_dma_add_frag(&basereq->chain, + &creq->op_tmpl, frag_len, flags); if (IS_ERR(op)) { ret = PTR_ERR(op); @@ -920,7 +926,7 @@ struct ahash_alg mv_md5_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx), .cra_init = mv_cesa_ahash_cra_init, .cra_module = THIS_MODULE, - } + } } }; @@ -990,7 +996,7 @@ struct ahash_alg mv_sha1_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx), .cra_init = mv_cesa_ahash_cra_init, .cra_module = THIS_MODULE, - } + } } }; @@ -1063,7 +1069,7 @@ struct ahash_alg mv_sha256_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx), .cra_init = mv_cesa_ahash_cra_init, .cra_module = THIS_MODULE, - } + } } }; @@ -1297,7 +1303,7 @@ struct ahash_alg mv_ahmac_md5_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx), .cra_init = mv_cesa_ahmac_cra_init, .cra_module = THIS_MODULE, - } + } } }; @@ -1367,7 +1373,7 @@ struct ahash_alg mv_ahmac_sha1_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx), .cra_init = mv_cesa_ahmac_cra_init, .cra_module = THIS_MODULE, - } + } } }; @@ -1437,6 +1443,6 @@ struct ahash_alg mv_ahmac_sha256_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx), .cra_init = mv_cesa_ahmac_cra_init, .cra_module = THIS_MODULE, - } + } } }; diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/cesa/tdma.c index 45939d53e8d6..b81ee276fe0e 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/cesa/tdma.c @@ -50,8 +50,8 @@ void mv_cesa_dma_step(struct mv_cesa_req *dreq) engine->regs + CESA_SA_CFG); writel_relaxed(dreq->chain.first->cur_dma, engine->regs + CESA_TDMA_NEXT_ADDR); - BUG_ON(readl(engine->regs + CESA_SA_CMD) & - CESA_SA_CMD_EN_CESA_SA_ACCL0); + WARN_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } @@ -175,8 +175,10 @@ int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status) break; } - /* Save the last request in error to engine->req, so that the core - * knows which request was fautly */ + /* + * Save the last request in error to engine->req, so that the core + * knows which request was fautly + */ if (res) { spin_lock_bh(&engine->lock); engine->req = req; diff --git a/drivers/crypto/marvell/octeontx/Makefile b/drivers/crypto/marvell/octeontx/Makefile new file mode 100644 index 000000000000..5e956fe1a85b --- /dev/null +++ b/drivers/crypto/marvell/octeontx/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_CRYPTO_DEV_OCTEONTX_CPT) += octeontx-cpt.o octeontx-cptvf.o + +octeontx-cpt-objs := otx_cptpf_main.o otx_cptpf_mbox.o otx_cptpf_ucode.o +octeontx-cptvf-objs := otx_cptvf_main.o otx_cptvf_mbox.o otx_cptvf_reqmgr.o \ + otx_cptvf_algs.o diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_common.h b/drivers/crypto/marvell/octeontx/otx_cpt_common.h new file mode 100644 index 000000000000..ca704a7a265f --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cpt_common.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPT_COMMON_H +#define __OTX_CPT_COMMON_H + +#include <linux/types.h> +#include <linux/delay.h> +#include <linux/device.h> + +#define OTX_CPT_MAX_MBOX_DATA_STR_SIZE 64 + +enum otx_cptpf_type { + OTX_CPT_AE = 2, + OTX_CPT_SE = 3, + BAD_OTX_CPTPF_TYPE, +}; + +enum otx_cptvf_type { + OTX_CPT_AE_TYPES = 1, + OTX_CPT_SE_TYPES = 2, + BAD_OTX_CPTVF_TYPE, +}; + +/* VF-PF message opcodes */ +enum otx_cpt_mbox_opcode { + OTX_CPT_MSG_VF_UP = 1, + OTX_CPT_MSG_VF_DOWN, + OTX_CPT_MSG_READY, + OTX_CPT_MSG_QLEN, + OTX_CPT_MSG_QBIND_GRP, + OTX_CPT_MSG_VQ_PRIORITY, + OTX_CPT_MSG_PF_TYPE, + OTX_CPT_MSG_ACK, + OTX_CPT_MSG_NACK +}; + +/* OcteonTX CPT mailbox structure */ +struct otx_cpt_mbox { + u64 msg; /* Message type MBOX[0] */ + u64 data;/* Data MBOX[1] */ +}; + +#endif /* __OTX_CPT_COMMON_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h new file mode 100644 index 000000000000..b8bdb9f134f3 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h @@ -0,0 +1,824 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPT_HW_TYPES_H +#define __OTX_CPT_HW_TYPES_H + +#include <linux/types.h> + +/* Device IDs */ +#define OTX_CPT_PCI_PF_DEVICE_ID 0xa040 +#define OTX_CPT_PCI_VF_DEVICE_ID 0xa041 + +#define OTX_CPT_PCI_PF_SUBSYS_ID 0xa340 +#define OTX_CPT_PCI_VF_SUBSYS_ID 0xa341 + +/* Configuration and status registers are in BAR0 on OcteonTX platform */ +#define OTX_CPT_PF_PCI_CFG_BAR 0 +#define OTX_CPT_VF_PCI_CFG_BAR 0 + +#define OTX_CPT_BAR_E_CPTX_VFX_BAR0_OFFSET(a, b) \ + (0x000020000000ll + 0x1000000000ll * (a) + 0x100000ll * (b)) +#define OTX_CPT_BAR_E_CPTX_VFX_BAR0_SIZE 0x400000 + +/* Mailbox interrupts offset */ +#define OTX_CPT_PF_MBOX_INT 3 +#define OTX_CPT_PF_INT_VEC_E_MBOXX(x, a) ((x) + (a)) +/* Number of MSIX supported in PF */ +#define OTX_CPT_PF_MSIX_VECTORS 4 +/* Maximum supported microcode groups */ +#define OTX_CPT_MAX_ENGINE_GROUPS 8 + +/* CPT instruction size in bytes */ +#define OTX_CPT_INST_SIZE 64 +/* CPT queue next chunk pointer size in bytes */ +#define OTX_CPT_NEXT_CHUNK_PTR_SIZE 8 + +/* OcteonTX CPT VF MSIX vectors and their offsets */ +#define OTX_CPT_VF_MSIX_VECTORS 2 +#define OTX_CPT_VF_INTR_MBOX_MASK BIT(0) +#define OTX_CPT_VF_INTR_DOVF_MASK BIT(1) +#define OTX_CPT_VF_INTR_IRDE_MASK BIT(2) +#define OTX_CPT_VF_INTR_NWRP_MASK BIT(3) +#define OTX_CPT_VF_INTR_SERR_MASK BIT(4) + +/* OcteonTX CPT PF registers */ +#define OTX_CPT_PF_CONSTANTS (0x0ll) +#define OTX_CPT_PF_RESET (0x100ll) +#define OTX_CPT_PF_DIAG (0x120ll) +#define OTX_CPT_PF_BIST_STATUS (0x160ll) +#define OTX_CPT_PF_ECC0_CTL (0x200ll) +#define OTX_CPT_PF_ECC0_FLIP (0x210ll) +#define OTX_CPT_PF_ECC0_INT (0x220ll) +#define OTX_CPT_PF_ECC0_INT_W1S (0x230ll) +#define OTX_CPT_PF_ECC0_ENA_W1S (0x240ll) +#define OTX_CPT_PF_ECC0_ENA_W1C (0x250ll) +#define OTX_CPT_PF_MBOX_INTX(b) (0x400ll | (u64)(b) << 3) +#define OTX_CPT_PF_MBOX_INT_W1SX(b) (0x420ll | (u64)(b) << 3) +#define OTX_CPT_PF_MBOX_ENA_W1CX(b) (0x440ll | (u64)(b) << 3) +#define OTX_CPT_PF_MBOX_ENA_W1SX(b) (0x460ll | (u64)(b) << 3) +#define OTX_CPT_PF_EXEC_INT (0x500ll) +#define OTX_CPT_PF_EXEC_INT_W1S (0x520ll) +#define OTX_CPT_PF_EXEC_ENA_W1C (0x540ll) +#define OTX_CPT_PF_EXEC_ENA_W1S (0x560ll) +#define OTX_CPT_PF_GX_EN(b) (0x600ll | (u64)(b) << 3) +#define OTX_CPT_PF_EXEC_INFO (0x700ll) +#define OTX_CPT_PF_EXEC_BUSY (0x800ll) +#define OTX_CPT_PF_EXEC_INFO0 (0x900ll) +#define OTX_CPT_PF_EXEC_INFO1 (0x910ll) +#define OTX_CPT_PF_INST_REQ_PC (0x10000ll) +#define OTX_CPT_PF_INST_LATENCY_PC (0x10020ll) +#define OTX_CPT_PF_RD_REQ_PC (0x10040ll) +#define OTX_CPT_PF_RD_LATENCY_PC (0x10060ll) +#define OTX_CPT_PF_RD_UC_PC (0x10080ll) +#define OTX_CPT_PF_ACTIVE_CYCLES_PC (0x10100ll) +#define OTX_CPT_PF_EXE_CTL (0x4000000ll) +#define OTX_CPT_PF_EXE_STATUS (0x4000008ll) +#define OTX_CPT_PF_EXE_CLK (0x4000010ll) +#define OTX_CPT_PF_EXE_DBG_CTL (0x4000018ll) +#define OTX_CPT_PF_EXE_DBG_DATA (0x4000020ll) +#define OTX_CPT_PF_EXE_BIST_STATUS (0x4000028ll) +#define OTX_CPT_PF_EXE_REQ_TIMER (0x4000030ll) +#define OTX_CPT_PF_EXE_MEM_CTL (0x4000038ll) +#define OTX_CPT_PF_EXE_PERF_CTL (0x4001000ll) +#define OTX_CPT_PF_EXE_DBG_CNTX(b) (0x4001100ll | (u64)(b) << 3) +#define OTX_CPT_PF_EXE_PERF_EVENT_CNT (0x4001180ll) +#define OTX_CPT_PF_EXE_EPCI_INBX_CNT(b) (0x4001200ll | (u64)(b) << 3) +#define OTX_CPT_PF_EXE_EPCI_OUTBX_CNT(b) (0x4001240ll | (u64)(b) << 3) +#define OTX_CPT_PF_ENGX_UCODE_BASE(b) (0x4002000ll | (u64)(b) << 3) +#define OTX_CPT_PF_QX_CTL(b) (0x8000000ll | (u64)(b) << 20) +#define OTX_CPT_PF_QX_GMCTL(b) (0x8000020ll | (u64)(b) << 20) +#define OTX_CPT_PF_QX_CTL2(b) (0x8000100ll | (u64)(b) << 20) +#define OTX_CPT_PF_VFX_MBOXX(b, c) (0x8001000ll | (u64)(b) << 20 | \ + (u64)(c) << 8) + +/* OcteonTX CPT VF registers */ +#define OTX_CPT_VQX_CTL(b) (0x100ll | (u64)(b) << 20) +#define OTX_CPT_VQX_SADDR(b) (0x200ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_WAIT(b) (0x400ll | (u64)(b) << 20) +#define OTX_CPT_VQX_INPROG(b) (0x410ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE(b) (0x420ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_ACK(b) (0x440ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_INT_W1S(b) (0x460ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_INT_W1C(b) (0x468ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_ENA_W1S(b) (0x470ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_ENA_W1C(b) (0x478ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_INT(b) (0x500ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_INT_W1S(b) (0x508ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_ENA_W1S(b) (0x510ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_ENA_W1C(b) (0x518ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DOORBELL(b) (0x600ll | (u64)(b) << 20) +#define OTX_CPT_VFX_PF_MBOXX(b, c) (0x1000ll | ((b) << 20) | ((c) << 3)) + +/* + * Enumeration otx_cpt_ucode_error_code_e + * + * Enumerates ucode errors + */ +enum otx_cpt_ucode_error_code_e { + CPT_NO_UCODE_ERROR = 0x00, + ERR_OPCODE_UNSUPPORTED = 0x01, + + /* Scatter gather */ + ERR_SCATTER_GATHER_WRITE_LENGTH = 0x02, + ERR_SCATTER_GATHER_LIST = 0x03, + ERR_SCATTER_GATHER_NOT_SUPPORTED = 0x04, + +}; + +/* + * Enumeration otx_cpt_comp_e + * + * CPT OcteonTX Completion Enumeration + * Enumerates the values of CPT_RES_S[COMPCODE]. + */ +enum otx_cpt_comp_e { + CPT_COMP_E_NOTDONE = 0x00, + CPT_COMP_E_GOOD = 0x01, + CPT_COMP_E_FAULT = 0x02, + CPT_COMP_E_SWERR = 0x03, + CPT_COMP_E_HWERR = 0x04, + CPT_COMP_E_LAST_ENTRY = 0x05 +}; + +/* + * Enumeration otx_cpt_vf_int_vec_e + * + * CPT OcteonTX VF MSI-X Vector Enumeration + * Enumerates the MSI-X interrupt vectors. + */ +enum otx_cpt_vf_int_vec_e { + CPT_VF_INT_VEC_E_MISC = 0x00, + CPT_VF_INT_VEC_E_DONE = 0x01 +}; + +/* + * Structure cpt_inst_s + * + * CPT Instruction Structure + * This structure specifies the instruction layout. Instructions are + * stored in memory as little-endian unless CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_inst_s_s + * Word 0 + * doneint:1 Done interrupt. + * 0 = No interrupts related to this instruction. + * 1 = When the instruction completes, CPT()_VQ()_DONE[DONE] will be + * incremented,and based on the rules described there an interrupt may + * occur. + * Word 1 + * res_addr [127: 64] Result IOVA. + * If nonzero, specifies where to write CPT_RES_S. + * If zero, no result structure will be written. + * Address must be 16-byte aligned. + * Bits <63:49> are ignored by hardware; software should use a + * sign-extended bit <48> for forward compatibility. + * Word 2 + * grp:10 [171:162] If [WQ_PTR] is nonzero, the SSO guest-group to use when + * CPT submits work SSO. + * For the SSO to not discard the add-work request, FPA_PF_MAP() must map + * [GRP] and CPT()_PF_Q()_GMCTL[GMID] as valid. + * tt:2 [161:160] If [WQ_PTR] is nonzero, the SSO tag type to use when CPT + * submits work to SSO + * tag:32 [159:128] If [WQ_PTR] is nonzero, the SSO tag to use when CPT + * submits work to SSO. + * Word 3 + * wq_ptr [255:192] If [WQ_PTR] is nonzero, it is a pointer to a + * work-queue entry that CPT submits work to SSO after all context, + * output data, and result write operations are visible to other + * CNXXXX units and the cores. Bits <2:0> must be zero. + * Bits <63:49> are ignored by hardware; software should + * use a sign-extended bit <48> for forward compatibility. + * Internal: + * Bits <63:49>, <2:0> are ignored by hardware, treated as always 0x0. + * Word 4 + * ei0; [319:256] Engine instruction word 0. Passed to the AE/SE. + * Word 5 + * ei1; [383:320] Engine instruction word 1. Passed to the AE/SE. + * Word 6 + * ei2; [447:384] Engine instruction word 1. Passed to the AE/SE. + * Word 7 + * ei3; [511:448] Engine instruction word 1. Passed to the AE/SE. + * + */ +union otx_cpt_inst_s { + u64 u[8]; + + struct { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_17_63:47; + u64 doneint:1; + u64 reserved_0_15:16; +#else /* Word 0 - Little Endian */ + u64 reserved_0_15:16; + u64 doneint:1; + u64 reserved_17_63:47; +#endif /* Word 0 - End */ + u64 res_addr; +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 2 - Big Endian */ + u64 reserved_172_191:20; + u64 grp:10; + u64 tt:2; + u64 tag:32; +#else /* Word 2 - Little Endian */ + u64 tag:32; + u64 tt:2; + u64 grp:10; + u64 reserved_172_191:20; +#endif /* Word 2 - End */ + u64 wq_ptr; + u64 ei0; + u64 ei1; + u64 ei2; + u64 ei3; + } s; +}; + +/* + * Structure cpt_res_s + * + * CPT Result Structure + * The CPT coprocessor writes the result structure after it completes a + * CPT_INST_S instruction. The result structure is exactly 16 bytes, and + * each instruction completion produces exactly one result structure. + * + * This structure is stored in memory as little-endian unless + * CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_res_s_s + * Word 0 + * doneint:1 [16:16] Done interrupt. This bit is copied from the + * corresponding instruction's CPT_INST_S[DONEINT]. + * compcode:8 [7:0] Indicates completion/error status of the CPT coprocessor + * for the associated instruction, as enumerated by CPT_COMP_E. + * Core software may write the memory location containing [COMPCODE] to + * 0x0 before ringing the doorbell, and then poll for completion by + * checking for a nonzero value. + * Once the core observes a nonzero [COMPCODE] value in this case,the CPT + * coprocessor will have also completed L2/DRAM write operations. + * Word 1 + * reserved + * + */ +union otx_cpt_res_s { + u64 u[2]; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_17_63:47; + u64 doneint:1; + u64 reserved_8_15:8; + u64 compcode:8; +#else /* Word 0 - Little Endian */ + u64 compcode:8; + u64 reserved_8_15:8; + u64 doneint:1; + u64 reserved_17_63:47; +#endif /* Word 0 - End */ + u64 reserved_64_127; + } s; +}; + +/* + * Register (NCB) otx_cpt#_pf_bist_status + * + * CPT PF Control Bist Status Register + * This register has the BIST status of memories. Each bit is the BIST result + * of an individual memory (per bit, 0 = pass and 1 = fail). + * otx_cptx_pf_bist_status_s + * Word0 + * bstatus [29:0](RO/H) BIST status. One bit per memory, enumerated by + * CPT_RAMS_E. + */ +union otx_cptx_pf_bist_status { + u64 u; + struct otx_cptx_pf_bist_status_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_30_63:34; + u64 bstatus:30; +#else /* Word 0 - Little Endian */ + u64 bstatus:30; + u64 reserved_30_63:34; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_pf_constants + * + * CPT PF Constants Register + * This register contains implementation-related parameters of CPT in CNXXXX. + * otx_cptx_pf_constants_s + * Word 0 + * reserved_40_63:24 [63:40] Reserved. + * epcis:8 [39:32](RO) Number of EPCI busses. + * grps:8 [31:24](RO) Number of engine groups implemented. + * ae:8 [23:16](RO/H) Number of AEs. In CNXXXX, for CPT0 returns 0x0, + * for CPT1 returns 0x18, or less if there are fuse-disables. + * se:8 [15:8](RO/H) Number of SEs. In CNXXXX, for CPT0 returns 0x30, + * or less if there are fuse-disables, for CPT1 returns 0x0. + * vq:8 [7:0](RO) Number of VQs. + */ +union otx_cptx_pf_constants { + u64 u; + struct otx_cptx_pf_constants_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_40_63:24; + u64 epcis:8; + u64 grps:8; + u64 ae:8; + u64 se:8; + u64 vq:8; +#else /* Word 0 - Little Endian */ + u64 vq:8; + u64 se:8; + u64 ae:8; + u64 grps:8; + u64 epcis:8; + u64 reserved_40_63:24; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_pf_exe_bist_status + * + * CPT PF Engine Bist Status Register + * This register has the BIST status of each engine. Each bit is the + * BIST result of an individual engine (per bit, 0 = pass and 1 = fail). + * otx_cptx_pf_exe_bist_status_s + * Word0 + * reserved_48_63:16 [63:48] reserved + * bstatus:48 [47:0](RO/H) BIST status. One bit per engine. + * + */ +union otx_cptx_pf_exe_bist_status { + u64 u; + struct otx_cptx_pf_exe_bist_status_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_48_63:16; + u64 bstatus:48; +#else /* Word 0 - Little Endian */ + u64 bstatus:48; + u64 reserved_48_63:16; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_pf_q#_ctl + * + * CPT Queue Control Register + * This register configures queues. This register should be changed only + * when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]). + * otx_cptx_pf_qx_ctl_s + * Word0 + * reserved_60_63:4 [63:60] reserved. + * aura:12; [59:48](R/W) Guest-aura for returning this queue's + * instruction-chunk buffers to FPA. Only used when [INST_FREE] is set. + * For the FPA to not discard the request, FPA_PF_MAP() must map + * [AURA] and CPT()_PF_Q()_GMCTL[GMID] as valid. + * reserved_45_47:3 [47:45] reserved. + * size:13 [44:32](R/W) Command-buffer size, in number of 64-bit words per + * command buffer segment. Must be 8*n + 1, where n is the number of + * instructions per buffer segment. + * reserved_11_31:21 [31:11] Reserved. + * cont_err:1 [10:10](R/W) Continue on error. + * 0 = When CPT()_VQ()_MISC_INT[NWRP], CPT()_VQ()_MISC_INT[IRDE] or + * CPT()_VQ()_MISC_INT[DOVF] are set by hardware or software via + * CPT()_VQ()_MISC_INT_W1S, then CPT()_VQ()_CTL[ENA] is cleared. Due to + * pipelining, additional instructions may have been processed between the + * instruction causing the error and the next instruction in the disabled + * queue (the instruction at CPT()_VQ()_SADDR). + * 1 = Ignore errors and continue processing instructions. + * For diagnostic use only. + * inst_free:1 [9:9](R/W) Instruction FPA free. When set, when CPT reaches the + * end of an instruction chunk, that chunk will be freed to the FPA. + * inst_be:1 [8:8](R/W) Instruction big-endian control. When set, instructions, + * instruction next chunk pointers, and result structures are stored in + * big-endian format in memory. + * iqb_ldwb:1 [7:7](R/W) Instruction load don't write back. + * 0 = The hardware issues NCB transient load (LDT) towards the cache, + * which if the line hits and is is dirty will cause the line to be + * written back before being replaced. + * 1 = The hardware issues NCB LDWB read-and-invalidate command towards + * the cache when fetching the last word of instructions; as a result the + * line will not be written back when replaced. This improves + * performance, but software must not read the instructions after they are + * posted to the hardware. Reads that do not consume the last word of a + * cache line always use LDI. + * reserved_4_6:3 [6:4] Reserved. + * grp:3; [3:1](R/W) Engine group. + * pri:1; [0:0](R/W) Queue priority. + * 1 = This queue has higher priority. Round-robin between higher + * priority queues. + * 0 = This queue has lower priority. Round-robin between lower + * priority queues. + */ +union otx_cptx_pf_qx_ctl { + u64 u; + struct otx_cptx_pf_qx_ctl_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_60_63:4; + u64 aura:12; + u64 reserved_45_47:3; + u64 size:13; + u64 reserved_11_31:21; + u64 cont_err:1; + u64 inst_free:1; + u64 inst_be:1; + u64 iqb_ldwb:1; + u64 reserved_4_6:3; + u64 grp:3; + u64 pri:1; +#else /* Word 0 - Little Endian */ + u64 pri:1; + u64 grp:3; + u64 reserved_4_6:3; + u64 iqb_ldwb:1; + u64 inst_be:1; + u64 inst_free:1; + u64 cont_err:1; + u64 reserved_11_31:21; + u64 size:13; + u64 reserved_45_47:3; + u64 aura:12; + u64 reserved_60_63:4; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_saddr + * + * CPT Queue Starting Buffer Address Registers + * These registers set the instruction buffer starting address. + * otx_cptx_vqx_saddr_s + * Word0 + * reserved_49_63:15 [63:49] Reserved. + * ptr:43 [48:6](R/W/H) Instruction buffer IOVA <48:6> (64-byte aligned). + * When written, it is the initial buffer starting address; when read, + * it is the next read pointer to be requested from L2C. The PTR field + * is overwritten with the next pointer each time that the command buffer + * segment is exhausted. New commands will then be read from the newly + * specified command buffer pointer. + * reserved_0_5:6 [5:0] Reserved. + * + */ +union otx_cptx_vqx_saddr { + u64 u; + struct otx_cptx_vqx_saddr_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_49_63:15; + u64 ptr:43; + u64 reserved_0_5:6; +#else /* Word 0 - Little Endian */ + u64 reserved_0_5:6; + u64 ptr:43; + u64 reserved_49_63:15; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_misc_ena_w1s + * + * CPT Queue Misc Interrupt Enable Set Register + * This register sets interrupt enable bits. + * otx_cptx_vqx_misc_ena_w1s_s + * Word0 + * reserved_5_63:59 [63:5] Reserved. + * swerr:1 [4:4](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[SWERR]. + * nwrp:1 [3:3](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[NWRP]. + * irde:1 [2:2](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[IRDE]. + * dovf:1 [1:1](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[DOVF]. + * mbox:1 [0:0](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[MBOX]. + * + */ +union otx_cptx_vqx_misc_ena_w1s { + u64 u; + struct otx_cptx_vqx_misc_ena_w1s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_5_63:59; + u64 swerr:1; + u64 nwrp:1; + u64 irde:1; + u64 dovf:1; + u64 mbox:1; +#else /* Word 0 - Little Endian */ + u64 mbox:1; + u64 dovf:1; + u64 irde:1; + u64 nwrp:1; + u64 swerr:1; + u64 reserved_5_63:59; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_doorbell + * + * CPT Queue Doorbell Registers + * Doorbells for the CPT instruction queues. + * otx_cptx_vqx_doorbell_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * dbell_cnt:20 [19:0](R/W/H) Number of instruction queue 64-bit words to add + * to the CPT instruction doorbell count. Readback value is the the + * current number of pending doorbell requests. If counter overflows + * CPT()_VQ()_MISC_INT[DBELL_DOVF] is set. To reset the count back to + * zero, write one to clear CPT()_VQ()_MISC_INT_ENA_W1C[DBELL_DOVF], + * then write a value of 2^20 minus the read [DBELL_CNT], then write one + * to CPT()_VQ()_MISC_INT_W1C[DBELL_DOVF] and + * CPT()_VQ()_MISC_INT_ENA_W1S[DBELL_DOVF]. Must be a multiple of 8. + * All CPT instructions are 8 words and require a doorbell count of + * multiple of 8. + */ +union otx_cptx_vqx_doorbell { + u64 u; + struct otx_cptx_vqx_doorbell_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 dbell_cnt:20; +#else /* Word 0 - Little Endian */ + u64 dbell_cnt:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_inprog + * + * CPT Queue In Progress Count Registers + * These registers contain the per-queue instruction in flight registers. + * otx_cptx_vqx_inprog_s + * Word0 + * reserved_8_63:56 [63:8] Reserved. + * inflight:8 [7:0](RO/H) Inflight count. Counts the number of instructions + * for the VF for which CPT is fetching, executing or responding to + * instructions. However this does not include any interrupts that are + * awaiting software handling (CPT()_VQ()_DONE[DONE] != 0x0). + * A queue may not be reconfigured until: + * 1. CPT()_VQ()_CTL[ENA] is cleared by software. + * 2. [INFLIGHT] is polled until equals to zero. + */ +union otx_cptx_vqx_inprog { + u64 u; + struct otx_cptx_vqx_inprog_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_8_63:56; + u64 inflight:8; +#else /* Word 0 - Little Endian */ + u64 inflight:8; + u64 reserved_8_63:56; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_misc_int + * + * CPT Queue Misc Interrupt Register + * These registers contain the per-queue miscellaneous interrupts. + * otx_cptx_vqx_misc_int_s + * Word 0 + * reserved_5_63:59 [63:5] Reserved. + * swerr:1 [4:4](R/W1C/H) Software error from engines. + * nwrp:1 [3:3](R/W1C/H) NCB result write response error. + * irde:1 [2:2](R/W1C/H) Instruction NCB read response error. + * dovf:1 [1:1](R/W1C/H) Doorbell overflow. + * mbox:1 [0:0](R/W1C/H) PF to VF mailbox interrupt. Set when + * CPT()_VF()_PF_MBOX(0) is written. + * + */ +union otx_cptx_vqx_misc_int { + u64 u; + struct otx_cptx_vqx_misc_int_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_5_63:59; + u64 swerr:1; + u64 nwrp:1; + u64 irde:1; + u64 dovf:1; + u64 mbox:1; +#else /* Word 0 - Little Endian */ + u64 mbox:1; + u64 dovf:1; + u64 irde:1; + u64 nwrp:1; + u64 swerr:1; + u64 reserved_5_63:59; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done_ack + * + * CPT Queue Done Count Ack Registers + * This register is written by software to acknowledge interrupts. + * otx_cptx_vqx_done_ack_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * done_ack:20 [19:0](R/W/H) Number of decrements to CPT()_VQ()_DONE[DONE]. + * Reads CPT()_VQ()_DONE[DONE]. Written by software to acknowledge + * interrupts. If CPT()_VQ()_DONE[DONE] is still nonzero the interrupt + * will be re-sent if the conditions described in CPT()_VQ()_DONE[DONE] + * are satisfied. + * + */ +union otx_cptx_vqx_done_ack { + u64 u; + struct otx_cptx_vqx_done_ack_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 done_ack:20; +#else /* Word 0 - Little Endian */ + u64 done_ack:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done + * + * CPT Queue Done Count Registers + * These registers contain the per-queue instruction done count. + * cptx_vqx_done_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * done:20 [19:0](R/W/H) Done count. When CPT_INST_S[DONEINT] set and that + * instruction completes, CPT()_VQ()_DONE[DONE] is incremented when the + * instruction finishes. Write to this field are for diagnostic use only; + * instead software writes CPT()_VQ()_DONE_ACK with the number of + * decrements for this field. + * Interrupts are sent as follows: + * * When CPT()_VQ()_DONE[DONE] = 0, then no results are pending, the + * interrupt coalescing timer is held to zero, and an interrupt is not + * sent. + * * When CPT()_VQ()_DONE[DONE] != 0, then the interrupt coalescing timer + * counts. If the counter is >= CPT()_VQ()_DONE_WAIT[TIME_WAIT]*1024, or + * CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT], i.e. enough + * time has passed or enough results have arrived, then the interrupt is + * sent. + * * When CPT()_VQ()_DONE_ACK is written (or CPT()_VQ()_DONE is written + * but this is not typical), the interrupt coalescing timer restarts. + * Note after decrementing this interrupt equation is recomputed, + * for example if CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT] + * and because the timer is zero, the interrupt will be resent immediately. + * (This covers the race case between software acknowledging an interrupt + * and a result returning.) + * * When CPT()_VQ()_DONE_ENA_W1S[DONE] = 0, interrupts are not sent, + * but the counting described above still occurs. + * Since CPT instructions complete out-of-order, if software is using + * completion interrupts the suggested scheme is to request a DONEINT on + * each request, and when an interrupt arrives perform a "greedy" scan for + * completions; even if a later command is acknowledged first this will + * not result in missing a completion. + * Software is responsible for making sure [DONE] does not overflow; + * for example by insuring there are not more than 2^20-1 instructions in + * flight that may request interrupts. + * + */ +union otx_cptx_vqx_done { + u64 u; + struct otx_cptx_vqx_done_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 done:20; +#else /* Word 0 - Little Endian */ + u64 done:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done_wait + * + * CPT Queue Done Interrupt Coalescing Wait Registers + * Specifies the per queue interrupt coalescing settings. + * cptx_vqx_done_wait_s + * Word0 + * reserved_48_63:16 [63:48] Reserved. + * time_wait:16; [47:32](R/W) Time hold-off. When CPT()_VQ()_DONE[DONE] = 0 + * or CPT()_VQ()_DONE_ACK is written a timer is cleared. When the timer + * reaches [TIME_WAIT]*1024 then interrupt coalescing ends. + * see CPT()_VQ()_DONE[DONE]. If 0x0, time coalescing is disabled. + * reserved_20_31:12 [31:20] Reserved. + * num_wait:20 [19:0](R/W) Number of messages hold-off. + * When CPT()_VQ()_DONE[DONE] >= [NUM_WAIT] then interrupt coalescing ends + * see CPT()_VQ()_DONE[DONE]. If 0x0, same behavior as 0x1. + * + */ +union otx_cptx_vqx_done_wait { + u64 u; + struct otx_cptx_vqx_done_wait_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_48_63:16; + u64 time_wait:16; + u64 reserved_20_31:12; + u64 num_wait:20; +#else /* Word 0 - Little Endian */ + u64 num_wait:20; + u64 reserved_20_31:12; + u64 time_wait:16; + u64 reserved_48_63:16; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done_ena_w1s + * + * CPT Queue Done Interrupt Enable Set Registers + * Write 1 to these registers will enable the DONEINT interrupt for the queue. + * cptx_vqx_done_ena_w1s_s + * Word0 + * reserved_1_63:63 [63:1] Reserved. + * done:1 [0:0](R/W1S/H) Write 1 will enable DONEINT for this queue. + * Write 0 has no effect. Read will return the enable bit. + */ +union otx_cptx_vqx_done_ena_w1s { + u64 u; + struct otx_cptx_vqx_done_ena_w1s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_1_63:63; + u64 done:1; +#else /* Word 0 - Little Endian */ + u64 done:1; + u64 reserved_1_63:63; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_ctl + * + * CPT VF Queue Control Registers + * This register configures queues. This register should be changed (other than + * clearing [ENA]) only when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]). + * cptx_vqx_ctl_s + * Word0 + * reserved_1_63:63 [63:1] Reserved. + * ena:1 [0:0](R/W/H) Enables the logical instruction queue. + * See also CPT()_PF_Q()_CTL[CONT_ERR] and CPT()_VQ()_INPROG[INFLIGHT]. + * 1 = Queue is enabled. + * 0 = Queue is disabled. + */ +union otx_cptx_vqx_ctl { + u64 u; + struct otx_cptx_vqx_ctl_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_1_63:63; + u64 ena:1; +#else /* Word 0 - Little Endian */ + u64 ena:1; + u64 reserved_1_63:63; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Error Address/Error Codes + * + * In the event of a severe error, microcode writes an 8-byte Error Code + * value (ECODE) to host memory at the Rptr address specified by the host + * system (in the 64-byte request). + * + * Word0 + * [63:56](R) 8-bit completion code + * [55:48](R) Number of the core that reported the severe error + * [47:0] Lower 6 bytes of M-Inst word2. Used to assist in uniquely + * identifying which specific instruction caused the error. This assumes + * that each instruction has a unique result location (RPTR), at least + * for a given period of time. + */ +union otx_cpt_error_code { + u64 u; + struct otx_cpt_error_code_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + uint64_t ccode:8; + uint64_t coreid:8; + uint64_t rptr6:48; +#else /* Word 0 - Little Endian */ + uint64_t rptr6:48; + uint64_t coreid:8; + uint64_t ccode:8; +#endif /* Word 0 - End */ + } s; +}; + +#endif /*__OTX_CPT_HW_TYPES_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf.h b/drivers/crypto/marvell/octeontx/otx_cptpf.h new file mode 100644 index 000000000000..73cd0a9bc563 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPTPF_H +#define __OTX_CPTPF_H + +#include <linux/types.h> +#include <linux/device.h> +#include "otx_cptpf_ucode.h" + +/* + * OcteonTX CPT device structure + */ +struct otx_cpt_device { + void __iomem *reg_base; /* Register start address */ + struct pci_dev *pdev; /* Pci device handle */ + struct otx_cpt_eng_grps eng_grps;/* Engine groups information */ + struct list_head list; + u8 pf_type; /* PF type SE or AE */ + u8 max_vfs; /* Maximum number of VFs supported by the CPT */ + u8 vfs_enabled; /* Number of enabled VFs */ +}; + +void otx_cpt_mbox_intr_handler(struct otx_cpt_device *cpt, int mbx); +void otx_cpt_disable_all_cores(struct otx_cpt_device *cpt); + +#endif /* __OTX_CPTPF_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_main.c b/drivers/crypto/marvell/octeontx/otx_cptpf_main.c new file mode 100644 index 000000000000..200fb3303db0 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_main.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "otx_cpt_common.h" +#include "otx_cptpf.h" + +#define DRV_NAME "octeontx-cpt" +#define DRV_VERSION "1.0" + +static void otx_cpt_disable_mbox_interrupts(struct otx_cpt_device *cpt) +{ + /* Disable mbox(0) interrupts for all VFs */ + writeq(~0ull, cpt->reg_base + OTX_CPT_PF_MBOX_ENA_W1CX(0)); +} + +static void otx_cpt_enable_mbox_interrupts(struct otx_cpt_device *cpt) +{ + /* Enable mbox(0) interrupts for all VFs */ + writeq(~0ull, cpt->reg_base + OTX_CPT_PF_MBOX_ENA_W1SX(0)); +} + +static irqreturn_t otx_cpt_mbx0_intr_handler(int __always_unused irq, + void *cpt) +{ + otx_cpt_mbox_intr_handler(cpt, 0); + + return IRQ_HANDLED; +} + +static void otx_cpt_reset(struct otx_cpt_device *cpt) +{ + writeq(1, cpt->reg_base + OTX_CPT_PF_RESET); +} + +static void otx_cpt_find_max_enabled_cores(struct otx_cpt_device *cpt) +{ + union otx_cptx_pf_constants pf_cnsts = {0}; + + pf_cnsts.u = readq(cpt->reg_base + OTX_CPT_PF_CONSTANTS); + cpt->eng_grps.avail.max_se_cnt = pf_cnsts.s.se; + cpt->eng_grps.avail.max_ae_cnt = pf_cnsts.s.ae; +} + +static u32 otx_cpt_check_bist_status(struct otx_cpt_device *cpt) +{ + union otx_cptx_pf_bist_status bist_sts = {0}; + + bist_sts.u = readq(cpt->reg_base + OTX_CPT_PF_BIST_STATUS); + return bist_sts.u; +} + +static u64 otx_cpt_check_exe_bist_status(struct otx_cpt_device *cpt) +{ + union otx_cptx_pf_exe_bist_status bist_sts = {0}; + + bist_sts.u = readq(cpt->reg_base + OTX_CPT_PF_EXE_BIST_STATUS); + return bist_sts.u; +} + +static int otx_cpt_device_init(struct otx_cpt_device *cpt) +{ + struct device *dev = &cpt->pdev->dev; + u16 sdevid; + u64 bist; + + /* Reset the PF when probed first */ + otx_cpt_reset(cpt); + mdelay(100); + + pci_read_config_word(cpt->pdev, PCI_SUBSYSTEM_ID, &sdevid); + + /* Check BIST status */ + bist = (u64)otx_cpt_check_bist_status(cpt); + if (bist) { + dev_err(dev, "RAM BIST failed with code 0x%llx", bist); + return -ENODEV; + } + + bist = otx_cpt_check_exe_bist_status(cpt); + if (bist) { + dev_err(dev, "Engine BIST failed with code 0x%llx", bist); + return -ENODEV; + } + + /* Get max enabled cores */ + otx_cpt_find_max_enabled_cores(cpt); + + if ((sdevid == OTX_CPT_PCI_PF_SUBSYS_ID) && + (cpt->eng_grps.avail.max_se_cnt == 0)) { + cpt->pf_type = OTX_CPT_AE; + } else if ((sdevid == OTX_CPT_PCI_PF_SUBSYS_ID) && + (cpt->eng_grps.avail.max_ae_cnt == 0)) { + cpt->pf_type = OTX_CPT_SE; + } + + /* Get max VQs/VFs supported by the device */ + cpt->max_vfs = pci_sriov_get_totalvfs(cpt->pdev); + + /* Disable all cores */ + otx_cpt_disable_all_cores(cpt); + + return 0; +} + +static int otx_cpt_register_interrupts(struct otx_cpt_device *cpt) +{ + struct device *dev = &cpt->pdev->dev; + u32 mbox_int_idx = OTX_CPT_PF_MBOX_INT; + u32 num_vec = OTX_CPT_PF_MSIX_VECTORS; + int ret; + + /* Enable MSI-X */ + ret = pci_alloc_irq_vectors(cpt->pdev, num_vec, num_vec, PCI_IRQ_MSIX); + if (ret < 0) { + dev_err(&cpt->pdev->dev, + "Request for #%d msix vectors failed\n", + num_vec); + return ret; + } + + /* Register mailbox interrupt handlers */ + ret = request_irq(pci_irq_vector(cpt->pdev, + OTX_CPT_PF_INT_VEC_E_MBOXX(mbox_int_idx, 0)), + otx_cpt_mbx0_intr_handler, 0, "CPT Mbox0", cpt); + if (ret) { + dev_err(dev, "Request irq failed\n"); + pci_free_irq_vectors(cpt->pdev); + return ret; + } + /* Enable mailbox interrupt */ + otx_cpt_enable_mbox_interrupts(cpt); + return 0; +} + +static void otx_cpt_unregister_interrupts(struct otx_cpt_device *cpt) +{ + u32 mbox_int_idx = OTX_CPT_PF_MBOX_INT; + + otx_cpt_disable_mbox_interrupts(cpt); + free_irq(pci_irq_vector(cpt->pdev, + OTX_CPT_PF_INT_VEC_E_MBOXX(mbox_int_idx, 0)), + cpt); + pci_free_irq_vectors(cpt->pdev); +} + + +static int otx_cpt_sriov_configure(struct pci_dev *pdev, int numvfs) +{ + struct otx_cpt_device *cpt = pci_get_drvdata(pdev); + int ret = 0; + + if (numvfs > cpt->max_vfs) + numvfs = cpt->max_vfs; + + if (numvfs > 0) { + ret = otx_cpt_try_create_default_eng_grps(cpt->pdev, + &cpt->eng_grps, + cpt->pf_type); + if (ret) + return ret; + + cpt->vfs_enabled = numvfs; + ret = pci_enable_sriov(pdev, numvfs); + if (ret) { + cpt->vfs_enabled = 0; + return ret; + } + otx_cpt_set_eng_grps_is_rdonly(&cpt->eng_grps, true); + try_module_get(THIS_MODULE); + ret = numvfs; + } else { + pci_disable_sriov(pdev); + otx_cpt_set_eng_grps_is_rdonly(&cpt->eng_grps, false); + module_put(THIS_MODULE); + cpt->vfs_enabled = 0; + } + dev_notice(&cpt->pdev->dev, "VFs enabled: %d\n", ret); + + return ret; +} + +static int otx_cpt_probe(struct pci_dev *pdev, + const struct pci_device_id __always_unused *ent) +{ + struct device *dev = &pdev->dev; + struct otx_cpt_device *cpt; + int err; + + cpt = devm_kzalloc(dev, sizeof(*cpt), GFP_KERNEL); + if (!cpt) + return -ENOMEM; + + pci_set_drvdata(pdev, cpt); + cpt->pdev = pdev; + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + goto err_clear_drvdata; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto err_disable_device; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto err_release_regions; + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); + goto err_release_regions; + } + + /* MAP PF's configuration registers */ + cpt->reg_base = pci_iomap(pdev, OTX_CPT_PF_PCI_CFG_BAR, 0); + if (!cpt->reg_base) { + dev_err(dev, "Cannot map config register space, aborting\n"); + err = -ENOMEM; + goto err_release_regions; + } + + /* CPT device HW initialization */ + err = otx_cpt_device_init(cpt); + if (err) + goto err_unmap_region; + + /* Register interrupts */ + err = otx_cpt_register_interrupts(cpt); + if (err) + goto err_unmap_region; + + /* Initialize engine groups */ + err = otx_cpt_init_eng_grps(pdev, &cpt->eng_grps, cpt->pf_type); + if (err) + goto err_unregister_interrupts; + + return 0; + +err_unregister_interrupts: + otx_cpt_unregister_interrupts(cpt); +err_unmap_region: + pci_iounmap(pdev, cpt->reg_base); +err_release_regions: + pci_release_regions(pdev); +err_disable_device: + pci_disable_device(pdev); +err_clear_drvdata: + pci_set_drvdata(pdev, NULL); + + return err; +} + +static void otx_cpt_remove(struct pci_dev *pdev) +{ + struct otx_cpt_device *cpt = pci_get_drvdata(pdev); + + if (!cpt) + return; + + /* Disable VFs */ + pci_disable_sriov(pdev); + /* Cleanup engine groups */ + otx_cpt_cleanup_eng_grps(pdev, &cpt->eng_grps); + /* Disable CPT PF interrupts */ + otx_cpt_unregister_interrupts(cpt); + /* Disengage SE and AE cores from all groups */ + otx_cpt_disable_all_cores(cpt); + pci_iounmap(pdev, cpt->reg_base); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +/* Supported devices */ +static const struct pci_device_id otx_cpt_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OTX_CPT_PCI_PF_DEVICE_ID) }, + { 0, } /* end of table */ +}; + +static struct pci_driver otx_cpt_pci_driver = { + .name = DRV_NAME, + .id_table = otx_cpt_id_table, + .probe = otx_cpt_probe, + .remove = otx_cpt_remove, + .sriov_configure = otx_cpt_sriov_configure +}; + +module_pci_driver(otx_cpt_pci_driver); + +MODULE_AUTHOR("Marvell International Ltd."); +MODULE_DESCRIPTION("Marvell OcteonTX CPT Physical Function Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, otx_cpt_id_table); diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c b/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c new file mode 100644 index 000000000000..a6774232e9a3 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "otx_cpt_common.h" +#include "otx_cptpf.h" + +static char *get_mbox_opcode_str(int msg_opcode) +{ + char *str = "Unknown"; + + switch (msg_opcode) { + case OTX_CPT_MSG_VF_UP: + str = "UP"; + break; + + case OTX_CPT_MSG_VF_DOWN: + str = "DOWN"; + break; + + case OTX_CPT_MSG_READY: + str = "READY"; + break; + + case OTX_CPT_MSG_QLEN: + str = "QLEN"; + break; + + case OTX_CPT_MSG_QBIND_GRP: + str = "QBIND_GRP"; + break; + + case OTX_CPT_MSG_VQ_PRIORITY: + str = "VQ_PRIORITY"; + break; + + case OTX_CPT_MSG_PF_TYPE: + str = "PF_TYPE"; + break; + + case OTX_CPT_MSG_ACK: + str = "ACK"; + break; + + case OTX_CPT_MSG_NACK: + str = "NACK"; + break; + } + + return str; +} + +static void dump_mbox_msg(struct otx_cpt_mbox *mbox_msg, int vf_id) +{ + char raw_data_str[OTX_CPT_MAX_MBOX_DATA_STR_SIZE]; + + hex_dump_to_buffer(mbox_msg, sizeof(struct otx_cpt_mbox), 16, 8, + raw_data_str, OTX_CPT_MAX_MBOX_DATA_STR_SIZE, false); + if (vf_id >= 0) + pr_debug("MBOX opcode %s received from VF%d raw_data %s", + get_mbox_opcode_str(mbox_msg->msg), vf_id, + raw_data_str); + else + pr_debug("MBOX opcode %s received from PF raw_data %s", + get_mbox_opcode_str(mbox_msg->msg), raw_data_str); +} + +static void otx_cpt_send_msg_to_vf(struct otx_cpt_device *cpt, int vf, + struct otx_cpt_mbox *mbx) +{ + /* Writing mbox(0) causes interrupt */ + writeq(mbx->data, cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 1)); + writeq(mbx->msg, cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 0)); +} + +/* + * ACKs VF's mailbox message + * @vf: VF to which ACK to be sent + */ +static void otx_cpt_mbox_send_ack(struct otx_cpt_device *cpt, int vf, + struct otx_cpt_mbox *mbx) +{ + mbx->data = 0ull; + mbx->msg = OTX_CPT_MSG_ACK; + otx_cpt_send_msg_to_vf(cpt, vf, mbx); +} + +/* NACKs VF's mailbox message that PF is not able to complete the action */ +static void otx_cptpf_mbox_send_nack(struct otx_cpt_device *cpt, int vf, + struct otx_cpt_mbox *mbx) +{ + mbx->data = 0ull; + mbx->msg = OTX_CPT_MSG_NACK; + otx_cpt_send_msg_to_vf(cpt, vf, mbx); +} + +static void otx_cpt_clear_mbox_intr(struct otx_cpt_device *cpt, u32 vf) +{ + /* W1C for the VF */ + writeq(1ull << vf, cpt->reg_base + OTX_CPT_PF_MBOX_INTX(0)); +} + +/* + * Configure QLEN/Chunk sizes for VF + */ +static void otx_cpt_cfg_qlen_for_vf(struct otx_cpt_device *cpt, int vf, + u32 size) +{ + union otx_cptx_pf_qx_ctl pf_qx_ctl; + + pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(vf)); + pf_qx_ctl.s.size = size; + pf_qx_ctl.s.cont_err = true; + writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(vf)); +} + +/* + * Configure VQ priority + */ +static void otx_cpt_cfg_vq_priority(struct otx_cpt_device *cpt, int vf, u32 pri) +{ + union otx_cptx_pf_qx_ctl pf_qx_ctl; + + pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(vf)); + pf_qx_ctl.s.pri = pri; + writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(vf)); +} + +static int otx_cpt_bind_vq_to_grp(struct otx_cpt_device *cpt, u8 q, u8 grp) +{ + struct device *dev = &cpt->pdev->dev; + struct otx_cpt_eng_grp_info *eng_grp; + union otx_cptx_pf_qx_ctl pf_qx_ctl; + struct otx_cpt_ucode *ucode; + + if (q >= cpt->max_vfs) { + dev_err(dev, "Requested queue %d is > than maximum avail %d", + q, cpt->max_vfs); + return -EINVAL; + } + + if (grp >= OTX_CPT_MAX_ENGINE_GROUPS) { + dev_err(dev, "Requested group %d is > than maximum avail %d", + grp, OTX_CPT_MAX_ENGINE_GROUPS); + return -EINVAL; + } + + eng_grp = &cpt->eng_grps.grp[grp]; + if (!eng_grp->is_enabled) { + dev_err(dev, "Requested engine group %d is disabled", grp); + return -EINVAL; + } + + pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(q)); + pf_qx_ctl.s.grp = grp; + writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(q)); + + if (eng_grp->mirror.is_ena) + ucode = &eng_grp->g->grp[eng_grp->mirror.idx].ucode[0]; + else + ucode = &eng_grp->ucode[0]; + + if (otx_cpt_uc_supports_eng_type(ucode, OTX_CPT_SE_TYPES)) + return OTX_CPT_SE_TYPES; + else if (otx_cpt_uc_supports_eng_type(ucode, OTX_CPT_AE_TYPES)) + return OTX_CPT_AE_TYPES; + else + return BAD_OTX_CPTVF_TYPE; +} + +/* Interrupt handler to handle mailbox messages from VFs */ +static void otx_cpt_handle_mbox_intr(struct otx_cpt_device *cpt, int vf) +{ + int vftype = 0; + struct otx_cpt_mbox mbx = {}; + struct device *dev = &cpt->pdev->dev; + /* + * MBOX[0] contains msg + * MBOX[1] contains data + */ + mbx.msg = readq(cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 0)); + mbx.data = readq(cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 1)); + + dump_mbox_msg(&mbx, vf); + + switch (mbx.msg) { + case OTX_CPT_MSG_VF_UP: + mbx.msg = OTX_CPT_MSG_VF_UP; + mbx.data = cpt->vfs_enabled; + otx_cpt_send_msg_to_vf(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_READY: + mbx.msg = OTX_CPT_MSG_READY; + mbx.data = vf; + otx_cpt_send_msg_to_vf(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_VF_DOWN: + /* First msg in VF teardown sequence */ + otx_cpt_mbox_send_ack(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_QLEN: + otx_cpt_cfg_qlen_for_vf(cpt, vf, mbx.data); + otx_cpt_mbox_send_ack(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_QBIND_GRP: + vftype = otx_cpt_bind_vq_to_grp(cpt, vf, (u8)mbx.data); + if ((vftype != OTX_CPT_AE_TYPES) && + (vftype != OTX_CPT_SE_TYPES)) { + dev_err(dev, "VF%d binding to eng group %llu failed", + vf, mbx.data); + otx_cptpf_mbox_send_nack(cpt, vf, &mbx); + } else { + mbx.msg = OTX_CPT_MSG_QBIND_GRP; + mbx.data = vftype; + otx_cpt_send_msg_to_vf(cpt, vf, &mbx); + } + break; + case OTX_CPT_MSG_PF_TYPE: + mbx.msg = OTX_CPT_MSG_PF_TYPE; + mbx.data = cpt->pf_type; + otx_cpt_send_msg_to_vf(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_VQ_PRIORITY: + otx_cpt_cfg_vq_priority(cpt, vf, mbx.data); + otx_cpt_mbox_send_ack(cpt, vf, &mbx); + break; + default: + dev_err(&cpt->pdev->dev, "Invalid msg from VF%d, msg 0x%llx\n", + vf, mbx.msg); + break; + } +} + +void otx_cpt_mbox_intr_handler (struct otx_cpt_device *cpt, int mbx) +{ + u64 intr; + u8 vf; + + intr = readq(cpt->reg_base + OTX_CPT_PF_MBOX_INTX(0)); + pr_debug("PF interrupt mbox%d mask 0x%llx\n", mbx, intr); + for (vf = 0; vf < cpt->max_vfs; vf++) { + if (intr & (1ULL << vf)) { + otx_cpt_handle_mbox_intr(cpt, vf); + otx_cpt_clear_mbox_intr(cpt, vf); + } + } +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c new file mode 100644 index 000000000000..d04baa319592 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c @@ -0,0 +1,1686 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ctype.h> +#include <linux/firmware.h> +#include "otx_cpt_common.h" +#include "otx_cptpf_ucode.h" +#include "otx_cptpf.h" + +#define CSR_DELAY 30 +/* Tar archive defines */ +#define TAR_MAGIC "ustar" +#define TAR_MAGIC_LEN 6 +#define TAR_BLOCK_LEN 512 +#define REGTYPE '0' +#define AREGTYPE '\0' + +/* tar header as defined in POSIX 1003.1-1990. */ +struct tar_hdr_t { + char name[100]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char chksum[8]; + char typeflag; + char linkname[100]; + char magic[6]; + char version[2]; + char uname[32]; + char gname[32]; + char devmajor[8]; + char devminor[8]; + char prefix[155]; +}; + +struct tar_blk_t { + union { + struct tar_hdr_t hdr; + char block[TAR_BLOCK_LEN]; + }; +}; + +struct tar_arch_info_t { + struct list_head ucodes; + const struct firmware *fw; +}; + +static struct otx_cpt_bitmap get_cores_bmap(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp) +{ + struct otx_cpt_bitmap bmap = { {0} }; + bool found = false; + int i; + + if (eng_grp->g->engs_num > OTX_CPT_MAX_ENGINES) { + dev_err(dev, "unsupported number of engines %d on octeontx", + eng_grp->g->engs_num); + return bmap; + } + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (eng_grp->engs[i].type) { + bitmap_or(bmap.bits, bmap.bits, + eng_grp->engs[i].bmap, + eng_grp->g->engs_num); + bmap.size = eng_grp->g->engs_num; + found = true; + } + } + + if (!found) + dev_err(dev, "No engines reserved for engine group %d", + eng_grp->idx); + return bmap; +} + +static int is_eng_type(int val, int eng_type) +{ + return val & (1 << eng_type); +} + +static int dev_supports_eng_type(struct otx_cpt_eng_grps *eng_grps, + int eng_type) +{ + return is_eng_type(eng_grps->eng_types_supported, eng_type); +} + +static void set_ucode_filename(struct otx_cpt_ucode *ucode, + const char *filename) +{ + strlcpy(ucode->filename, filename, OTX_CPT_UCODE_NAME_LENGTH); +} + +static char *get_eng_type_str(int eng_type) +{ + char *str = "unknown"; + + switch (eng_type) { + case OTX_CPT_SE_TYPES: + str = "SE"; + break; + + case OTX_CPT_AE_TYPES: + str = "AE"; + break; + } + return str; +} + +static char *get_ucode_type_str(int ucode_type) +{ + char *str = "unknown"; + + switch (ucode_type) { + case (1 << OTX_CPT_SE_TYPES): + str = "SE"; + break; + + case (1 << OTX_CPT_AE_TYPES): + str = "AE"; + break; + } + return str; +} + +static int get_ucode_type(struct otx_cpt_ucode_hdr *ucode_hdr, int *ucode_type) +{ + char tmp_ver_str[OTX_CPT_UCODE_VER_STR_SZ]; + u32 i, val = 0; + u8 nn; + + strlcpy(tmp_ver_str, ucode_hdr->ver_str, OTX_CPT_UCODE_VER_STR_SZ); + for (i = 0; i < strlen(tmp_ver_str); i++) + tmp_ver_str[i] = tolower(tmp_ver_str[i]); + + nn = ucode_hdr->ver_num.nn; + if (strnstr(tmp_ver_str, "se-", OTX_CPT_UCODE_VER_STR_SZ) && + (nn == OTX_CPT_SE_UC_TYPE1 || nn == OTX_CPT_SE_UC_TYPE2 || + nn == OTX_CPT_SE_UC_TYPE3)) + val |= 1 << OTX_CPT_SE_TYPES; + if (strnstr(tmp_ver_str, "ae", OTX_CPT_UCODE_VER_STR_SZ) && + nn == OTX_CPT_AE_UC_TYPE) + val |= 1 << OTX_CPT_AE_TYPES; + + *ucode_type = val; + + if (!val) + return -EINVAL; + if (is_eng_type(val, OTX_CPT_AE_TYPES) && + is_eng_type(val, OTX_CPT_SE_TYPES)) + return -EINVAL; + return 0; +} + +static int is_mem_zero(const char *ptr, int size) +{ + int i; + + for (i = 0; i < size; i++) { + if (ptr[i]) + return 0; + } + return 1; +} + +static int cpt_set_ucode_base(struct otx_cpt_eng_grp_info *eng_grp, void *obj) +{ + struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj; + dma_addr_t dma_addr; + struct otx_cpt_bitmap bmap; + int i; + + bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp); + if (!bmap.size) + return -EINVAL; + + if (eng_grp->mirror.is_ena) + dma_addr = + eng_grp->g->grp[eng_grp->mirror.idx].ucode[0].align_dma; + else + dma_addr = eng_grp->ucode[0].align_dma; + + /* + * Set UCODE_BASE only for the cores which are not used, + * other cores should have already valid UCODE_BASE set + */ + for_each_set_bit(i, bmap.bits, bmap.size) + if (!eng_grp->g->eng_ref_cnt[i]) + writeq((u64) dma_addr, cpt->reg_base + + OTX_CPT_PF_ENGX_UCODE_BASE(i)); + return 0; +} + +static int cpt_detach_and_disable_cores(struct otx_cpt_eng_grp_info *eng_grp, + void *obj) +{ + struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj; + struct otx_cpt_bitmap bmap = { {0} }; + int timeout = 10; + int i, busy; + u64 reg; + + bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp); + if (!bmap.size) + return -EINVAL; + + /* Detach the cores from group */ + reg = readq(cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx)); + for_each_set_bit(i, bmap.bits, bmap.size) { + if (reg & (1ull << i)) { + eng_grp->g->eng_ref_cnt[i]--; + reg &= ~(1ull << i); + } + } + writeq(reg, cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx)); + + /* Wait for cores to become idle */ + do { + busy = 0; + usleep_range(10000, 20000); + if (timeout-- < 0) + return -EBUSY; + + reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY); + for_each_set_bit(i, bmap.bits, bmap.size) + if (reg & (1ull << i)) { + busy = 1; + break; + } + } while (busy); + + /* Disable the cores only if they are not used anymore */ + reg = readq(cpt->reg_base + OTX_CPT_PF_EXE_CTL); + for_each_set_bit(i, bmap.bits, bmap.size) + if (!eng_grp->g->eng_ref_cnt[i]) + reg &= ~(1ull << i); + writeq(reg, cpt->reg_base + OTX_CPT_PF_EXE_CTL); + + return 0; +} + +static int cpt_attach_and_enable_cores(struct otx_cpt_eng_grp_info *eng_grp, + void *obj) +{ + struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj; + struct otx_cpt_bitmap bmap; + u64 reg; + int i; + + bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp); + if (!bmap.size) + return -EINVAL; + + /* Attach the cores to the group */ + reg = readq(cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx)); + for_each_set_bit(i, bmap.bits, bmap.size) { + if (!(reg & (1ull << i))) { + eng_grp->g->eng_ref_cnt[i]++; + reg |= 1ull << i; + } + } + writeq(reg, cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx)); + + /* Enable the cores */ + reg = readq(cpt->reg_base + OTX_CPT_PF_EXE_CTL); + for_each_set_bit(i, bmap.bits, bmap.size) + reg |= 1ull << i; + writeq(reg, cpt->reg_base + OTX_CPT_PF_EXE_CTL); + + return 0; +} + +static int process_tar_file(struct device *dev, + struct tar_arch_info_t *tar_arch, char *filename, + const u8 *data, u32 size) +{ + struct tar_ucode_info_t *tar_info; + struct otx_cpt_ucode_hdr *ucode_hdr; + int ucode_type, ucode_size; + + /* + * If size is less than microcode header size then don't report + * an error because it might not be microcode file, just process + * next file from archive + */ + if (size < sizeof(struct otx_cpt_ucode_hdr)) + return 0; + + ucode_hdr = (struct otx_cpt_ucode_hdr *) data; + /* + * If microcode version can't be found don't report an error + * because it might not be microcode file, just process next file + */ + if (get_ucode_type(ucode_hdr, &ucode_type)) + return 0; + + ucode_size = ntohl(ucode_hdr->code_length) * 2; + if (!ucode_size || (size < round_up(ucode_size, 16) + + sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) { + dev_err(dev, "Ucode %s invalid size", filename); + return -EINVAL; + } + + tar_info = kzalloc(sizeof(struct tar_ucode_info_t), GFP_KERNEL); + if (!tar_info) + return -ENOMEM; + + tar_info->ucode_ptr = data; + set_ucode_filename(&tar_info->ucode, filename); + memcpy(tar_info->ucode.ver_str, ucode_hdr->ver_str, + OTX_CPT_UCODE_VER_STR_SZ); + tar_info->ucode.ver_num = ucode_hdr->ver_num; + tar_info->ucode.type = ucode_type; + tar_info->ucode.size = ucode_size; + list_add_tail(&tar_info->list, &tar_arch->ucodes); + + return 0; +} + +static void release_tar_archive(struct tar_arch_info_t *tar_arch) +{ + struct tar_ucode_info_t *curr, *temp; + + if (!tar_arch) + return; + + list_for_each_entry_safe(curr, temp, &tar_arch->ucodes, list) { + list_del(&curr->list); + kfree(curr); + } + + if (tar_arch->fw) + release_firmware(tar_arch->fw); + kfree(tar_arch); +} + +static struct tar_ucode_info_t *get_uc_from_tar_archive( + struct tar_arch_info_t *tar_arch, + int ucode_type) +{ + struct tar_ucode_info_t *curr, *uc_found = NULL; + + list_for_each_entry(curr, &tar_arch->ucodes, list) { + if (!is_eng_type(curr->ucode.type, ucode_type)) + continue; + + if (!uc_found) { + uc_found = curr; + continue; + } + + switch (ucode_type) { + case OTX_CPT_AE_TYPES: + break; + + case OTX_CPT_SE_TYPES: + if (uc_found->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE2 || + (uc_found->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE3 + && curr->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE1)) + uc_found = curr; + break; + } + } + + return uc_found; +} + +static void print_tar_dbg_info(struct tar_arch_info_t *tar_arch, + char *tar_filename) +{ + struct tar_ucode_info_t *curr; + + pr_debug("Tar archive filename %s", tar_filename); + pr_debug("Tar archive pointer %p, size %ld", tar_arch->fw->data, + tar_arch->fw->size); + list_for_each_entry(curr, &tar_arch->ucodes, list) { + pr_debug("Ucode filename %s", curr->ucode.filename); + pr_debug("Ucode version string %s", curr->ucode.ver_str); + pr_debug("Ucode version %d.%d.%d.%d", + curr->ucode.ver_num.nn, curr->ucode.ver_num.xx, + curr->ucode.ver_num.yy, curr->ucode.ver_num.zz); + pr_debug("Ucode type (%d) %s", curr->ucode.type, + get_ucode_type_str(curr->ucode.type)); + pr_debug("Ucode size %d", curr->ucode.size); + pr_debug("Ucode ptr %p\n", curr->ucode_ptr); + } +} + +static struct tar_arch_info_t *load_tar_archive(struct device *dev, + char *tar_filename) +{ + struct tar_arch_info_t *tar_arch = NULL; + struct tar_blk_t *tar_blk; + unsigned int cur_size; + size_t tar_offs = 0; + size_t tar_size; + int ret; + + tar_arch = kzalloc(sizeof(struct tar_arch_info_t), GFP_KERNEL); + if (!tar_arch) + return NULL; + + INIT_LIST_HEAD(&tar_arch->ucodes); + + /* Load tar archive */ + ret = request_firmware(&tar_arch->fw, tar_filename, dev); + if (ret) + goto release_tar_arch; + + if (tar_arch->fw->size < TAR_BLOCK_LEN) { + dev_err(dev, "Invalid tar archive %s ", tar_filename); + goto release_tar_arch; + } + + tar_size = tar_arch->fw->size; + tar_blk = (struct tar_blk_t *) tar_arch->fw->data; + if (strncmp(tar_blk->hdr.magic, TAR_MAGIC, TAR_MAGIC_LEN - 1)) { + dev_err(dev, "Unsupported format of tar archive %s", + tar_filename); + goto release_tar_arch; + } + + while (1) { + /* Read current file size */ + ret = kstrtouint(tar_blk->hdr.size, 8, &cur_size); + if (ret) + goto release_tar_arch; + + if (tar_offs + cur_size > tar_size || + tar_offs + 2*TAR_BLOCK_LEN > tar_size) { + dev_err(dev, "Invalid tar archive %s ", tar_filename); + goto release_tar_arch; + } + + tar_offs += TAR_BLOCK_LEN; + if (tar_blk->hdr.typeflag == REGTYPE || + tar_blk->hdr.typeflag == AREGTYPE) { + ret = process_tar_file(dev, tar_arch, + tar_blk->hdr.name, + &tar_arch->fw->data[tar_offs], + cur_size); + if (ret) + goto release_tar_arch; + } + + tar_offs += (cur_size/TAR_BLOCK_LEN) * TAR_BLOCK_LEN; + if (cur_size % TAR_BLOCK_LEN) + tar_offs += TAR_BLOCK_LEN; + + /* Check for the end of the archive */ + if (tar_offs + 2*TAR_BLOCK_LEN > tar_size) { + dev_err(dev, "Invalid tar archive %s ", tar_filename); + goto release_tar_arch; + } + + if (is_mem_zero(&tar_arch->fw->data[tar_offs], + 2*TAR_BLOCK_LEN)) + break; + + /* Read next block from tar archive */ + tar_blk = (struct tar_blk_t *) &tar_arch->fw->data[tar_offs]; + } + + print_tar_dbg_info(tar_arch, tar_filename); + return tar_arch; +release_tar_arch: + release_tar_archive(tar_arch); + return NULL; +} + +static struct otx_cpt_engs_rsvd *find_engines_by_type( + struct otx_cpt_eng_grp_info *eng_grp, + int eng_type) +{ + int i; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!eng_grp->engs[i].type) + continue; + + if (eng_grp->engs[i].type == eng_type) + return &eng_grp->engs[i]; + } + return NULL; +} + +int otx_cpt_uc_supports_eng_type(struct otx_cpt_ucode *ucode, int eng_type) +{ + return is_eng_type(ucode->type, eng_type); +} +EXPORT_SYMBOL_GPL(otx_cpt_uc_supports_eng_type); + +int otx_cpt_eng_grp_has_eng_type(struct otx_cpt_eng_grp_info *eng_grp, + int eng_type) +{ + struct otx_cpt_engs_rsvd *engs; + + engs = find_engines_by_type(eng_grp, eng_type); + + return (engs != NULL ? 1 : 0); +} +EXPORT_SYMBOL_GPL(otx_cpt_eng_grp_has_eng_type); + +static void print_ucode_info(struct otx_cpt_eng_grp_info *eng_grp, + char *buf, int size) +{ + if (eng_grp->mirror.is_ena) { + scnprintf(buf, size, "%s (shared with engine_group%d)", + eng_grp->g->grp[eng_grp->mirror.idx].ucode[0].ver_str, + eng_grp->mirror.idx); + } else { + scnprintf(buf, size, "%s", eng_grp->ucode[0].ver_str); + } +} + +static void print_engs_info(struct otx_cpt_eng_grp_info *eng_grp, + char *buf, int size, int idx) +{ + struct otx_cpt_engs_rsvd *mirrored_engs = NULL; + struct otx_cpt_engs_rsvd *engs; + int len, i; + + buf[0] = '\0'; + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + if (idx != -1 && idx != i) + continue; + + if (eng_grp->mirror.is_ena) + mirrored_engs = find_engines_by_type( + &eng_grp->g->grp[eng_grp->mirror.idx], + engs->type); + if (i > 0 && idx == -1) { + len = strlen(buf); + scnprintf(buf+len, size-len, ", "); + } + + len = strlen(buf); + scnprintf(buf+len, size-len, "%d %s ", mirrored_engs ? + engs->count + mirrored_engs->count : engs->count, + get_eng_type_str(engs->type)); + if (mirrored_engs) { + len = strlen(buf); + scnprintf(buf+len, size-len, + "(%d shared with engine_group%d) ", + engs->count <= 0 ? engs->count + + mirrored_engs->count : mirrored_engs->count, + eng_grp->mirror.idx); + } + } +} + +static void print_ucode_dbg_info(struct otx_cpt_ucode *ucode) +{ + pr_debug("Ucode info"); + pr_debug("Ucode version string %s", ucode->ver_str); + pr_debug("Ucode version %d.%d.%d.%d", ucode->ver_num.nn, + ucode->ver_num.xx, ucode->ver_num.yy, ucode->ver_num.zz); + pr_debug("Ucode type %s", get_ucode_type_str(ucode->type)); + pr_debug("Ucode size %d", ucode->size); + pr_debug("Ucode virt address %16.16llx", (u64)ucode->align_va); + pr_debug("Ucode phys address %16.16llx\n", ucode->align_dma); +} + +static void cpt_print_engines_mask(struct otx_cpt_eng_grp_info *eng_grp, + struct device *dev, char *buf, int size) +{ + struct otx_cpt_bitmap bmap; + u32 mask[2]; + + bmap = get_cores_bmap(dev, eng_grp); + if (!bmap.size) { + scnprintf(buf, size, "unknown"); + return; + } + bitmap_to_arr32(mask, bmap.bits, bmap.size); + scnprintf(buf, size, "%8.8x %8.8x", mask[1], mask[0]); +} + + +static void print_dbg_info(struct device *dev, + struct otx_cpt_eng_grps *eng_grps) +{ + char engs_info[2*OTX_CPT_UCODE_NAME_LENGTH]; + struct otx_cpt_eng_grp_info *mirrored_grp; + char engs_mask[OTX_CPT_UCODE_NAME_LENGTH]; + struct otx_cpt_eng_grp_info *grp; + struct otx_cpt_engs_rsvd *engs; + u32 mask[4]; + int i, j; + + pr_debug("Engine groups global info"); + pr_debug("max SE %d, max AE %d", + eng_grps->avail.max_se_cnt, eng_grps->avail.max_ae_cnt); + pr_debug("free SE %d", eng_grps->avail.se_cnt); + pr_debug("free AE %d", eng_grps->avail.ae_cnt); + + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + pr_debug("engine_group%d, state %s", i, grp->is_enabled ? + "enabled" : "disabled"); + if (grp->is_enabled) { + mirrored_grp = &eng_grps->grp[grp->mirror.idx]; + pr_debug("Ucode0 filename %s, version %s", + grp->mirror.is_ena ? + mirrored_grp->ucode[0].filename : + grp->ucode[0].filename, + grp->mirror.is_ena ? + mirrored_grp->ucode[0].ver_str : + grp->ucode[0].ver_str); + } + + for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) { + engs = &grp->engs[j]; + if (engs->type) { + print_engs_info(grp, engs_info, + 2*OTX_CPT_UCODE_NAME_LENGTH, j); + pr_debug("Slot%d: %s", j, engs_info); + bitmap_to_arr32(mask, engs->bmap, + eng_grps->engs_num); + pr_debug("Mask: %8.8x %8.8x %8.8x %8.8x", + mask[3], mask[2], mask[1], mask[0]); + } else + pr_debug("Slot%d not used", j); + } + if (grp->is_enabled) { + cpt_print_engines_mask(grp, dev, engs_mask, + OTX_CPT_UCODE_NAME_LENGTH); + pr_debug("Cmask: %s", engs_mask); + } + } +} + +static int update_engines_avail_count(struct device *dev, + struct otx_cpt_engs_available *avail, + struct otx_cpt_engs_rsvd *engs, int val) +{ + switch (engs->type) { + case OTX_CPT_SE_TYPES: + avail->se_cnt += val; + break; + + case OTX_CPT_AE_TYPES: + avail->ae_cnt += val; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", engs->type); + return -EINVAL; + } + + return 0; +} + +static int update_engines_offset(struct device *dev, + struct otx_cpt_engs_available *avail, + struct otx_cpt_engs_rsvd *engs) +{ + switch (engs->type) { + case OTX_CPT_SE_TYPES: + engs->offset = 0; + break; + + case OTX_CPT_AE_TYPES: + engs->offset = avail->max_se_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", engs->type); + return -EINVAL; + } + + return 0; +} + +static int release_engines(struct device *dev, struct otx_cpt_eng_grp_info *grp) +{ + int i, ret = 0; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!grp->engs[i].type) + continue; + + if (grp->engs[i].count > 0) { + ret = update_engines_avail_count(dev, &grp->g->avail, + &grp->engs[i], + grp->engs[i].count); + if (ret) + return ret; + } + + grp->engs[i].type = 0; + grp->engs[i].count = 0; + grp->engs[i].offset = 0; + grp->engs[i].ucode = NULL; + bitmap_zero(grp->engs[i].bmap, grp->g->engs_num); + } + + return 0; +} + +static int do_reserve_engines(struct device *dev, + struct otx_cpt_eng_grp_info *grp, + struct otx_cpt_engines *req_engs) +{ + struct otx_cpt_engs_rsvd *engs = NULL; + int i, ret; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!grp->engs[i].type) { + engs = &grp->engs[i]; + break; + } + } + + if (!engs) + return -ENOMEM; + + engs->type = req_engs->type; + engs->count = req_engs->count; + + ret = update_engines_offset(dev, &grp->g->avail, engs); + if (ret) + return ret; + + if (engs->count > 0) { + ret = update_engines_avail_count(dev, &grp->g->avail, engs, + -engs->count); + if (ret) + return ret; + } + + return 0; +} + +static int check_engines_availability(struct device *dev, + struct otx_cpt_eng_grp_info *grp, + struct otx_cpt_engines *req_eng) +{ + int avail_cnt = 0; + + switch (req_eng->type) { + case OTX_CPT_SE_TYPES: + avail_cnt = grp->g->avail.se_cnt; + break; + + case OTX_CPT_AE_TYPES: + avail_cnt = grp->g->avail.ae_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", req_eng->type); + return -EINVAL; + } + + if (avail_cnt < req_eng->count) { + dev_err(dev, + "Error available %s engines %d < than requested %d", + get_eng_type_str(req_eng->type), + avail_cnt, req_eng->count); + return -EBUSY; + } + + return 0; +} + +static int reserve_engines(struct device *dev, struct otx_cpt_eng_grp_info *grp, + struct otx_cpt_engines *req_engs, int req_cnt) +{ + int i, ret; + + /* Validate if a number of requested engines is available */ + for (i = 0; i < req_cnt; i++) { + ret = check_engines_availability(dev, grp, &req_engs[i]); + if (ret) + return ret; + } + + /* Reserve requested engines for this engine group */ + for (i = 0; i < req_cnt; i++) { + ret = do_reserve_engines(dev, grp, &req_engs[i]); + if (ret) + return ret; + } + return 0; +} + +static ssize_t eng_grp_info_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + char ucode_info[2*OTX_CPT_UCODE_NAME_LENGTH]; + char engs_info[2*OTX_CPT_UCODE_NAME_LENGTH]; + char engs_mask[OTX_CPT_UCODE_NAME_LENGTH]; + struct otx_cpt_eng_grp_info *eng_grp; + int ret; + + eng_grp = container_of(attr, struct otx_cpt_eng_grp_info, info_attr); + mutex_lock(&eng_grp->g->lock); + + print_engs_info(eng_grp, engs_info, 2*OTX_CPT_UCODE_NAME_LENGTH, -1); + print_ucode_info(eng_grp, ucode_info, 2*OTX_CPT_UCODE_NAME_LENGTH); + cpt_print_engines_mask(eng_grp, dev, engs_mask, + OTX_CPT_UCODE_NAME_LENGTH); + ret = scnprintf(buf, PAGE_SIZE, + "Microcode : %s\nEngines: %s\nEngines mask: %s\n", + ucode_info, engs_info, engs_mask); + + mutex_unlock(&eng_grp->g->lock); + return ret; +} + +static int create_sysfs_eng_grps_info(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp) +{ + int ret; + + eng_grp->info_attr.show = eng_grp_info_show; + eng_grp->info_attr.store = NULL; + eng_grp->info_attr.attr.name = eng_grp->sysfs_info_name; + eng_grp->info_attr.attr.mode = 0440; + sysfs_attr_init(&eng_grp->info_attr.attr); + ret = device_create_file(dev, &eng_grp->info_attr); + if (ret) + return ret; + + return 0; +} + +static void ucode_unload(struct device *dev, struct otx_cpt_ucode *ucode) +{ + if (ucode->va) { + dma_free_coherent(dev, ucode->size + OTX_CPT_UCODE_ALIGNMENT, + ucode->va, ucode->dma); + ucode->va = NULL; + ucode->align_va = NULL; + ucode->dma = 0; + ucode->align_dma = 0; + ucode->size = 0; + } + + memset(&ucode->ver_str, 0, OTX_CPT_UCODE_VER_STR_SZ); + memset(&ucode->ver_num, 0, sizeof(struct otx_cpt_ucode_ver_num)); + set_ucode_filename(ucode, ""); + ucode->type = 0; +} + +static int copy_ucode_to_dma_mem(struct device *dev, + struct otx_cpt_ucode *ucode, + const u8 *ucode_data) +{ + u32 i; + + /* Allocate DMAable space */ + ucode->va = dma_alloc_coherent(dev, ucode->size + + OTX_CPT_UCODE_ALIGNMENT, + &ucode->dma, GFP_KERNEL); + if (!ucode->va) { + dev_err(dev, "Unable to allocate space for microcode"); + return -ENOMEM; + } + ucode->align_va = PTR_ALIGN(ucode->va, OTX_CPT_UCODE_ALIGNMENT); + ucode->align_dma = PTR_ALIGN(ucode->dma, OTX_CPT_UCODE_ALIGNMENT); + + memcpy((void *) ucode->align_va, (void *) ucode_data + + sizeof(struct otx_cpt_ucode_hdr), ucode->size); + + /* Byte swap 64-bit */ + for (i = 0; i < (ucode->size / 8); i++) + ((u64 *)ucode->align_va)[i] = + cpu_to_be64(((u64 *)ucode->align_va)[i]); + /* Ucode needs 16-bit swap */ + for (i = 0; i < (ucode->size / 2); i++) + ((u16 *)ucode->align_va)[i] = + cpu_to_be16(((u16 *)ucode->align_va)[i]); + return 0; +} + +static int ucode_load(struct device *dev, struct otx_cpt_ucode *ucode, + const char *ucode_filename) +{ + struct otx_cpt_ucode_hdr *ucode_hdr; + const struct firmware *fw; + int ret; + + set_ucode_filename(ucode, ucode_filename); + ret = request_firmware(&fw, ucode->filename, dev); + if (ret) + return ret; + + ucode_hdr = (struct otx_cpt_ucode_hdr *) fw->data; + memcpy(ucode->ver_str, ucode_hdr->ver_str, OTX_CPT_UCODE_VER_STR_SZ); + ucode->ver_num = ucode_hdr->ver_num; + ucode->size = ntohl(ucode_hdr->code_length) * 2; + if (!ucode->size || (fw->size < round_up(ucode->size, 16) + + sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) { + dev_err(dev, "Ucode %s invalid size", ucode_filename); + ret = -EINVAL; + goto release_fw; + } + + ret = get_ucode_type(ucode_hdr, &ucode->type); + if (ret) { + dev_err(dev, "Microcode %s unknown type 0x%x", ucode->filename, + ucode->type); + goto release_fw; + } + + ret = copy_ucode_to_dma_mem(dev, ucode, fw->data); + if (ret) + goto release_fw; + + print_ucode_dbg_info(ucode); +release_fw: + release_firmware(fw); + return ret; +} + +static int enable_eng_grp(struct otx_cpt_eng_grp_info *eng_grp, + void *obj) +{ + int ret; + + ret = cpt_set_ucode_base(eng_grp, obj); + if (ret) + return ret; + + ret = cpt_attach_and_enable_cores(eng_grp, obj); + return ret; +} + +static int disable_eng_grp(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp, + void *obj) +{ + int i, ret; + + ret = cpt_detach_and_disable_cores(eng_grp, obj); + if (ret) + return ret; + + /* Unload ucode used by this engine group */ + ucode_unload(dev, &eng_grp->ucode[0]); + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!eng_grp->engs[i].type) + continue; + + eng_grp->engs[i].ucode = &eng_grp->ucode[0]; + } + + ret = cpt_set_ucode_base(eng_grp, obj); + + return ret; +} + +static void setup_eng_grp_mirroring(struct otx_cpt_eng_grp_info *dst_grp, + struct otx_cpt_eng_grp_info *src_grp) +{ + /* Setup fields for engine group which is mirrored */ + src_grp->mirror.is_ena = false; + src_grp->mirror.idx = 0; + src_grp->mirror.ref_count++; + + /* Setup fields for mirroring engine group */ + dst_grp->mirror.is_ena = true; + dst_grp->mirror.idx = src_grp->idx; + dst_grp->mirror.ref_count = 0; +} + +static void remove_eng_grp_mirroring(struct otx_cpt_eng_grp_info *dst_grp) +{ + struct otx_cpt_eng_grp_info *src_grp; + + if (!dst_grp->mirror.is_ena) + return; + + src_grp = &dst_grp->g->grp[dst_grp->mirror.idx]; + + src_grp->mirror.ref_count--; + dst_grp->mirror.is_ena = false; + dst_grp->mirror.idx = 0; + dst_grp->mirror.ref_count = 0; +} + +static void update_requested_engs(struct otx_cpt_eng_grp_info *mirrored_eng_grp, + struct otx_cpt_engines *engs, int engs_cnt) +{ + struct otx_cpt_engs_rsvd *mirrored_engs; + int i; + + for (i = 0; i < engs_cnt; i++) { + mirrored_engs = find_engines_by_type(mirrored_eng_grp, + engs[i].type); + if (!mirrored_engs) + continue; + + /* + * If mirrored group has this type of engines attached then + * there are 3 scenarios possible: + * 1) mirrored_engs.count == engs[i].count then all engines + * from mirrored engine group will be shared with this engine + * group + * 2) mirrored_engs.count > engs[i].count then only a subset of + * engines from mirrored engine group will be shared with this + * engine group + * 3) mirrored_engs.count < engs[i].count then all engines + * from mirrored engine group will be shared with this group + * and additional engines will be reserved for exclusively use + * by this engine group + */ + engs[i].count -= mirrored_engs->count; + } +} + +static struct otx_cpt_eng_grp_info *find_mirrored_eng_grp( + struct otx_cpt_eng_grp_info *grp) +{ + struct otx_cpt_eng_grps *eng_grps = grp->g; + int i; + + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + if (!eng_grps->grp[i].is_enabled) + continue; + if (eng_grps->grp[i].ucode[0].type) + continue; + if (grp->idx == i) + continue; + if (!strncasecmp(eng_grps->grp[i].ucode[0].ver_str, + grp->ucode[0].ver_str, + OTX_CPT_UCODE_VER_STR_SZ)) + return &eng_grps->grp[i]; + } + + return NULL; +} + +static struct otx_cpt_eng_grp_info *find_unused_eng_grp( + struct otx_cpt_eng_grps *eng_grps) +{ + int i; + + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + if (!eng_grps->grp[i].is_enabled) + return &eng_grps->grp[i]; + } + return NULL; +} + +static int eng_grp_update_masks(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp) +{ + struct otx_cpt_engs_rsvd *engs, *mirrored_engs; + struct otx_cpt_bitmap tmp_bmap = { {0} }; + int i, j, cnt, max_cnt; + int bit; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + if (engs->count <= 0) + continue; + + switch (engs->type) { + case OTX_CPT_SE_TYPES: + max_cnt = eng_grp->g->avail.max_se_cnt; + break; + + case OTX_CPT_AE_TYPES: + max_cnt = eng_grp->g->avail.max_ae_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d", engs->type); + return -EINVAL; + } + + cnt = engs->count; + WARN_ON(engs->offset + max_cnt > OTX_CPT_MAX_ENGINES); + bitmap_zero(tmp_bmap.bits, eng_grp->g->engs_num); + for (j = engs->offset; j < engs->offset + max_cnt; j++) { + if (!eng_grp->g->eng_ref_cnt[j]) { + bitmap_set(tmp_bmap.bits, j, 1); + cnt--; + if (!cnt) + break; + } + } + + if (cnt) + return -ENOSPC; + + bitmap_copy(engs->bmap, tmp_bmap.bits, eng_grp->g->engs_num); + } + + if (!eng_grp->mirror.is_ena) + return 0; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + + mirrored_engs = find_engines_by_type( + &eng_grp->g->grp[eng_grp->mirror.idx], + engs->type); + WARN_ON(!mirrored_engs && engs->count <= 0); + if (!mirrored_engs) + continue; + + bitmap_copy(tmp_bmap.bits, mirrored_engs->bmap, + eng_grp->g->engs_num); + if (engs->count < 0) { + bit = find_first_bit(mirrored_engs->bmap, + eng_grp->g->engs_num); + bitmap_clear(tmp_bmap.bits, bit, -engs->count); + } + bitmap_or(engs->bmap, engs->bmap, tmp_bmap.bits, + eng_grp->g->engs_num); + } + return 0; +} + +static int delete_engine_group(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp) +{ + int i, ret; + + if (!eng_grp->is_enabled) + return -EINVAL; + + if (eng_grp->mirror.ref_count) { + dev_err(dev, "Can't delete engine_group%d as it is used by:", + eng_grp->idx); + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + if (eng_grp->g->grp[i].mirror.is_ena && + eng_grp->g->grp[i].mirror.idx == eng_grp->idx) + dev_err(dev, "engine_group%d", i); + } + return -EINVAL; + } + + /* Removing engine group mirroring if enabled */ + remove_eng_grp_mirroring(eng_grp); + + /* Disable engine group */ + ret = disable_eng_grp(dev, eng_grp, eng_grp->g->obj); + if (ret) + return ret; + + /* Release all engines held by this engine group */ + ret = release_engines(dev, eng_grp); + if (ret) + return ret; + + device_remove_file(dev, &eng_grp->info_attr); + eng_grp->is_enabled = false; + + return 0; +} + +static int validate_1_ucode_scenario(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp, + struct otx_cpt_engines *engs, int engs_cnt) +{ + int i; + + /* Verify that ucode loaded supports requested engine types */ + for (i = 0; i < engs_cnt; i++) { + if (!otx_cpt_uc_supports_eng_type(&eng_grp->ucode[0], + engs[i].type)) { + dev_err(dev, + "Microcode %s does not support %s engines", + eng_grp->ucode[0].filename, + get_eng_type_str(engs[i].type)); + return -EINVAL; + } + } + return 0; +} + +static void update_ucode_ptrs(struct otx_cpt_eng_grp_info *eng_grp) +{ + struct otx_cpt_ucode *ucode; + + if (eng_grp->mirror.is_ena) + ucode = &eng_grp->g->grp[eng_grp->mirror.idx].ucode[0]; + else + ucode = &eng_grp->ucode[0]; + WARN_ON(!eng_grp->engs[0].type); + eng_grp->engs[0].ucode = ucode; +} + +static int create_engine_group(struct device *dev, + struct otx_cpt_eng_grps *eng_grps, + struct otx_cpt_engines *engs, int engs_cnt, + void *ucode_data[], int ucodes_cnt, + bool use_uc_from_tar_arch) +{ + struct otx_cpt_eng_grp_info *mirrored_eng_grp; + struct tar_ucode_info_t *tar_info; + struct otx_cpt_eng_grp_info *eng_grp; + int i, ret = 0; + + if (ucodes_cnt > OTX_CPT_MAX_ETYPES_PER_GRP) + return -EINVAL; + + /* Validate if requested engine types are supported by this device */ + for (i = 0; i < engs_cnt; i++) + if (!dev_supports_eng_type(eng_grps, engs[i].type)) { + dev_err(dev, "Device does not support %s engines", + get_eng_type_str(engs[i].type)); + return -EPERM; + } + + /* Find engine group which is not used */ + eng_grp = find_unused_eng_grp(eng_grps); + if (!eng_grp) { + dev_err(dev, "Error all engine groups are being used"); + return -ENOSPC; + } + + /* Load ucode */ + for (i = 0; i < ucodes_cnt; i++) { + if (use_uc_from_tar_arch) { + tar_info = (struct tar_ucode_info_t *) ucode_data[i]; + eng_grp->ucode[i] = tar_info->ucode; + ret = copy_ucode_to_dma_mem(dev, &eng_grp->ucode[i], + tar_info->ucode_ptr); + } else + ret = ucode_load(dev, &eng_grp->ucode[i], + (char *) ucode_data[i]); + if (ret) + goto err_ucode_unload; + } + + /* Validate scenario where 1 ucode is used */ + ret = validate_1_ucode_scenario(dev, eng_grp, engs, engs_cnt); + if (ret) + goto err_ucode_unload; + + /* Check if this group mirrors another existing engine group */ + mirrored_eng_grp = find_mirrored_eng_grp(eng_grp); + if (mirrored_eng_grp) { + /* Setup mirroring */ + setup_eng_grp_mirroring(eng_grp, mirrored_eng_grp); + + /* + * Update count of requested engines because some + * of them might be shared with mirrored group + */ + update_requested_engs(mirrored_eng_grp, engs, engs_cnt); + } + + /* Reserve engines */ + ret = reserve_engines(dev, eng_grp, engs, engs_cnt); + if (ret) + goto err_ucode_unload; + + /* Update ucode pointers used by engines */ + update_ucode_ptrs(eng_grp); + + /* Update engine masks used by this group */ + ret = eng_grp_update_masks(dev, eng_grp); + if (ret) + goto err_release_engs; + + /* Create sysfs entry for engine group info */ + ret = create_sysfs_eng_grps_info(dev, eng_grp); + if (ret) + goto err_release_engs; + + /* Enable engine group */ + ret = enable_eng_grp(eng_grp, eng_grps->obj); + if (ret) + goto err_release_engs; + + /* + * If this engine group mirrors another engine group + * then we need to unload ucode as we will use ucode + * from mirrored engine group + */ + if (eng_grp->mirror.is_ena) + ucode_unload(dev, &eng_grp->ucode[0]); + + eng_grp->is_enabled = true; + if (eng_grp->mirror.is_ena) + dev_info(dev, + "Engine_group%d: reuse microcode %s from group %d", + eng_grp->idx, mirrored_eng_grp->ucode[0].ver_str, + mirrored_eng_grp->idx); + else + dev_info(dev, "Engine_group%d: microcode loaded %s", + eng_grp->idx, eng_grp->ucode[0].ver_str); + + return 0; + +err_release_engs: + release_engines(dev, eng_grp); +err_ucode_unload: + ucode_unload(dev, &eng_grp->ucode[0]); + return ret; +} + +static ssize_t ucode_load_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cpt_engines engs[OTX_CPT_MAX_ETYPES_PER_GRP] = { {0} }; + char *ucode_filename[OTX_CPT_MAX_ETYPES_PER_GRP]; + char tmp_buf[OTX_CPT_UCODE_NAME_LENGTH] = { 0 }; + char *start, *val, *err_msg, *tmp; + struct otx_cpt_eng_grps *eng_grps; + int grp_idx = 0, ret = -EINVAL; + bool has_se, has_ie, has_ae; + int del_grp_idx = -1; + int ucode_idx = 0; + + if (strlen(buf) > OTX_CPT_UCODE_NAME_LENGTH) + return -EINVAL; + + eng_grps = container_of(attr, struct otx_cpt_eng_grps, ucode_load_attr); + err_msg = "Invalid engine group format"; + strlcpy(tmp_buf, buf, OTX_CPT_UCODE_NAME_LENGTH); + start = tmp_buf; + + has_se = has_ie = has_ae = false; + + for (;;) { + val = strsep(&start, ";"); + if (!val) + break; + val = strim(val); + if (!*val) + continue; + + if (!strncasecmp(val, "engine_group", 12)) { + if (del_grp_idx != -1) + goto err_print; + tmp = strim(strsep(&val, ":")); + if (!val) + goto err_print; + if (strlen(tmp) != 13) + goto err_print; + if (kstrtoint((tmp + 12), 10, &del_grp_idx)) + goto err_print; + val = strim(val); + if (strncasecmp(val, "null", 4)) + goto err_print; + if (strlen(val) != 4) + goto err_print; + } else if (!strncasecmp(val, "se", 2) && strchr(val, ':')) { + if (has_se || ucode_idx) + goto err_print; + tmp = strim(strsep(&val, ":")); + if (!val) + goto err_print; + if (strlen(tmp) != 2) + goto err_print; + if (kstrtoint(strim(val), 10, &engs[grp_idx].count)) + goto err_print; + engs[grp_idx++].type = OTX_CPT_SE_TYPES; + has_se = true; + } else if (!strncasecmp(val, "ae", 2) && strchr(val, ':')) { + if (has_ae || ucode_idx) + goto err_print; + tmp = strim(strsep(&val, ":")); + if (!val) + goto err_print; + if (strlen(tmp) != 2) + goto err_print; + if (kstrtoint(strim(val), 10, &engs[grp_idx].count)) + goto err_print; + engs[grp_idx++].type = OTX_CPT_AE_TYPES; + has_ae = true; + } else { + if (ucode_idx > 1) + goto err_print; + if (!strlen(val)) + goto err_print; + if (strnstr(val, " ", strlen(val))) + goto err_print; + ucode_filename[ucode_idx++] = val; + } + } + + /* Validate input parameters */ + if (del_grp_idx == -1) { + if (!(grp_idx && ucode_idx)) + goto err_print; + + if (ucode_idx > 1 && grp_idx < 2) + goto err_print; + + if (grp_idx > OTX_CPT_MAX_ETYPES_PER_GRP) { + err_msg = "Error max 2 engine types can be attached"; + goto err_print; + } + + } else { + if (del_grp_idx < 0 || + del_grp_idx >= OTX_CPT_MAX_ENGINE_GROUPS) { + dev_err(dev, "Invalid engine group index %d", + del_grp_idx); + ret = -EINVAL; + return ret; + } + + if (!eng_grps->grp[del_grp_idx].is_enabled) { + dev_err(dev, "Error engine_group%d is not configured", + del_grp_idx); + ret = -EINVAL; + return ret; + } + + if (grp_idx || ucode_idx) + goto err_print; + } + + mutex_lock(&eng_grps->lock); + + if (eng_grps->is_rdonly) { + dev_err(dev, "Disable VFs before modifying engine groups\n"); + ret = -EACCES; + goto err_unlock; + } + + if (del_grp_idx == -1) + /* create engine group */ + ret = create_engine_group(dev, eng_grps, engs, grp_idx, + (void **) ucode_filename, + ucode_idx, false); + else + /* delete engine group */ + ret = delete_engine_group(dev, &eng_grps->grp[del_grp_idx]); + if (ret) + goto err_unlock; + + print_dbg_info(dev, eng_grps); +err_unlock: + mutex_unlock(&eng_grps->lock); + return ret ? ret : count; +err_print: + dev_err(dev, "%s\n", err_msg); + + return ret; +} + +int otx_cpt_try_create_default_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps, + int pf_type) +{ + struct tar_ucode_info_t *tar_info[OTX_CPT_MAX_ETYPES_PER_GRP] = { 0 }; + struct otx_cpt_engines engs[OTX_CPT_MAX_ETYPES_PER_GRP] = { {0} }; + struct tar_arch_info_t *tar_arch = NULL; + char *tar_filename; + int i, ret = 0; + + mutex_lock(&eng_grps->lock); + + /* + * We don't create engine group for kernel crypto if attempt to create + * it was already made (when user enabled VFs for the first time) + */ + if (eng_grps->is_first_try) + goto unlock_mutex; + eng_grps->is_first_try = true; + + /* We create group for kcrypto only if no groups are configured */ + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) + if (eng_grps->grp[i].is_enabled) + goto unlock_mutex; + + switch (pf_type) { + case OTX_CPT_AE: + case OTX_CPT_SE: + tar_filename = OTX_CPT_UCODE_TAR_FILE_NAME; + break; + + default: + dev_err(&pdev->dev, "Unknown PF type %d\n", pf_type); + ret = -EINVAL; + goto unlock_mutex; + } + + tar_arch = load_tar_archive(&pdev->dev, tar_filename); + if (!tar_arch) + goto unlock_mutex; + + /* + * If device supports SE engines and there is SE microcode in tar + * archive try to create engine group with SE engines for kernel + * crypto functionality (symmetric crypto) + */ + tar_info[0] = get_uc_from_tar_archive(tar_arch, OTX_CPT_SE_TYPES); + if (tar_info[0] && + dev_supports_eng_type(eng_grps, OTX_CPT_SE_TYPES)) { + + engs[0].type = OTX_CPT_SE_TYPES; + engs[0].count = eng_grps->avail.max_se_cnt; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) tar_info, 1, true); + if (ret) + goto release_tar_arch; + } + /* + * If device supports AE engines and there is AE microcode in tar + * archive try to create engine group with AE engines for asymmetric + * crypto functionality. + */ + tar_info[0] = get_uc_from_tar_archive(tar_arch, OTX_CPT_AE_TYPES); + if (tar_info[0] && + dev_supports_eng_type(eng_grps, OTX_CPT_AE_TYPES)) { + + engs[0].type = OTX_CPT_AE_TYPES; + engs[0].count = eng_grps->avail.max_ae_cnt; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) tar_info, 1, true); + if (ret) + goto release_tar_arch; + } + + print_dbg_info(&pdev->dev, eng_grps); +release_tar_arch: + release_tar_archive(tar_arch); +unlock_mutex: + mutex_unlock(&eng_grps->lock); + return ret; +} + +void otx_cpt_set_eng_grps_is_rdonly(struct otx_cpt_eng_grps *eng_grps, + bool is_rdonly) +{ + mutex_lock(&eng_grps->lock); + + eng_grps->is_rdonly = is_rdonly; + + mutex_unlock(&eng_grps->lock); +} + +void otx_cpt_disable_all_cores(struct otx_cpt_device *cpt) +{ + int grp, timeout = 100; + u64 reg; + + /* Disengage the cores from groups */ + for (grp = 0; grp < OTX_CPT_MAX_ENGINE_GROUPS; grp++) { + writeq(0, cpt->reg_base + OTX_CPT_PF_GX_EN(grp)); + udelay(CSR_DELAY); + } + + reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY); + while (reg) { + udelay(CSR_DELAY); + reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY); + if (timeout--) { + dev_warn(&cpt->pdev->dev, "Cores still busy"); + break; + } + } + + /* Disable the cores */ + writeq(0, cpt->reg_base + OTX_CPT_PF_EXE_CTL); +} + +void otx_cpt_cleanup_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps) +{ + struct otx_cpt_eng_grp_info *grp; + int i, j; + + mutex_lock(&eng_grps->lock); + if (eng_grps->is_ucode_load_created) { + device_remove_file(&pdev->dev, + &eng_grps->ucode_load_attr); + eng_grps->is_ucode_load_created = false; + } + + /* First delete all mirroring engine groups */ + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) + if (eng_grps->grp[i].mirror.is_ena) + delete_engine_group(&pdev->dev, &eng_grps->grp[i]); + + /* Delete remaining engine groups */ + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) + delete_engine_group(&pdev->dev, &eng_grps->grp[i]); + + /* Release memory */ + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) { + kfree(grp->engs[j].bmap); + grp->engs[j].bmap = NULL; + } + } + + mutex_unlock(&eng_grps->lock); +} + +int otx_cpt_init_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps, int pf_type) +{ + struct otx_cpt_eng_grp_info *grp; + int i, j, ret = 0; + + mutex_init(&eng_grps->lock); + eng_grps->obj = pci_get_drvdata(pdev); + eng_grps->avail.se_cnt = eng_grps->avail.max_se_cnt; + eng_grps->avail.ae_cnt = eng_grps->avail.max_ae_cnt; + + eng_grps->engs_num = eng_grps->avail.max_se_cnt + + eng_grps->avail.max_ae_cnt; + if (eng_grps->engs_num > OTX_CPT_MAX_ENGINES) { + dev_err(&pdev->dev, + "Number of engines %d > than max supported %d", + eng_grps->engs_num, OTX_CPT_MAX_ENGINES); + ret = -EINVAL; + goto err; + } + + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + grp->g = eng_grps; + grp->idx = i; + + snprintf(grp->sysfs_info_name, OTX_CPT_UCODE_NAME_LENGTH, + "engine_group%d", i); + for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) { + grp->engs[j].bmap = + kcalloc(BITS_TO_LONGS(eng_grps->engs_num), + sizeof(long), GFP_KERNEL); + if (!grp->engs[j].bmap) { + ret = -ENOMEM; + goto err; + } + } + } + + switch (pf_type) { + case OTX_CPT_SE: + /* OcteonTX 83XX SE CPT PF has only SE engines attached */ + eng_grps->eng_types_supported = 1 << OTX_CPT_SE_TYPES; + break; + + case OTX_CPT_AE: + /* OcteonTX 83XX AE CPT PF has only AE engines attached */ + eng_grps->eng_types_supported = 1 << OTX_CPT_AE_TYPES; + break; + + default: + dev_err(&pdev->dev, "Unknown PF type %d\n", pf_type); + ret = -EINVAL; + goto err; + } + + eng_grps->ucode_load_attr.show = NULL; + eng_grps->ucode_load_attr.store = ucode_load_store; + eng_grps->ucode_load_attr.attr.name = "ucode_load"; + eng_grps->ucode_load_attr.attr.mode = 0220; + sysfs_attr_init(&eng_grps->ucode_load_attr.attr); + ret = device_create_file(&pdev->dev, + &eng_grps->ucode_load_attr); + if (ret) + goto err; + eng_grps->is_ucode_load_created = true; + + print_dbg_info(&pdev->dev, eng_grps); + return ret; +err: + otx_cpt_cleanup_eng_grps(pdev, eng_grps); + return ret; +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h new file mode 100644 index 000000000000..14f02b60d0c2 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPTPF_UCODE_H +#define __OTX_CPTPF_UCODE_H + +#include <linux/pci.h> +#include <linux/types.h> +#include <linux/module.h> +#include "otx_cpt_hw_types.h" + +/* CPT ucode name maximum length */ +#define OTX_CPT_UCODE_NAME_LENGTH 64 +/* + * On OcteonTX 83xx platform, only one type of engines is allowed to be + * attached to an engine group. + */ +#define OTX_CPT_MAX_ETYPES_PER_GRP 1 + +/* Default tar archive file names */ +#define OTX_CPT_UCODE_TAR_FILE_NAME "cpt8x-mc.tar" + +/* CPT ucode alignment */ +#define OTX_CPT_UCODE_ALIGNMENT 128 + +/* CPT ucode signature size */ +#define OTX_CPT_UCODE_SIGN_LEN 256 + +/* Microcode version string length */ +#define OTX_CPT_UCODE_VER_STR_SZ 44 + +/* Maximum number of supported engines/cores on OcteonTX 83XX platform */ +#define OTX_CPT_MAX_ENGINES 64 + +#define OTX_CPT_ENGS_BITMASK_LEN (OTX_CPT_MAX_ENGINES/(BITS_PER_BYTE * \ + sizeof(unsigned long))) + +/* Microcode types */ +enum otx_cpt_ucode_type { + OTX_CPT_AE_UC_TYPE = 1, /* AE-MAIN */ + OTX_CPT_SE_UC_TYPE1 = 20, /* SE-MAIN - combination of 21 and 22 */ + OTX_CPT_SE_UC_TYPE2 = 21, /* Fast Path IPSec + AirCrypto */ + OTX_CPT_SE_UC_TYPE3 = 22, /* + * Hash + HMAC + FlexiCrypto + RNG + Full + * Feature IPSec + AirCrypto + Kasumi + */ +}; + +struct otx_cpt_bitmap { + unsigned long bits[OTX_CPT_ENGS_BITMASK_LEN]; + int size; +}; + +struct otx_cpt_engines { + int type; + int count; +}; + +/* Microcode version number */ +struct otx_cpt_ucode_ver_num { + u8 nn; + u8 xx; + u8 yy; + u8 zz; +}; + +struct otx_cpt_ucode_hdr { + struct otx_cpt_ucode_ver_num ver_num; + u8 ver_str[OTX_CPT_UCODE_VER_STR_SZ]; + u32 code_length; + u32 padding[3]; +}; + +struct otx_cpt_ucode { + u8 ver_str[OTX_CPT_UCODE_VER_STR_SZ];/* + * ucode version in readable format + */ + struct otx_cpt_ucode_ver_num ver_num;/* ucode version number */ + char filename[OTX_CPT_UCODE_NAME_LENGTH]; /* ucode filename */ + dma_addr_t dma; /* phys address of ucode image */ + dma_addr_t align_dma; /* aligned phys address of ucode image */ + void *va; /* virt address of ucode image */ + void *align_va; /* aligned virt address of ucode image */ + u32 size; /* ucode image size */ + int type; /* ucode image type SE or AE */ +}; + +struct tar_ucode_info_t { + struct list_head list; + struct otx_cpt_ucode ucode;/* microcode information */ + const u8 *ucode_ptr; /* pointer to microcode in tar archive */ +}; + +/* Maximum and current number of engines available for all engine groups */ +struct otx_cpt_engs_available { + int max_se_cnt; + int max_ae_cnt; + int se_cnt; + int ae_cnt; +}; + +/* Engines reserved to an engine group */ +struct otx_cpt_engs_rsvd { + int type; /* engine type */ + int count; /* number of engines attached */ + int offset; /* constant offset of engine type in the bitmap */ + unsigned long *bmap; /* attached engines bitmap */ + struct otx_cpt_ucode *ucode; /* ucode used by these engines */ +}; + +struct otx_cpt_mirror_info { + int is_ena; /* + * is mirroring enabled, it is set only for engine + * group which mirrors another engine group + */ + int idx; /* + * index of engine group which is mirrored by this + * group, set only for engine group which mirrors + * another group + */ + int ref_count; /* + * number of times this engine group is mirrored by + * other groups, this is set only for engine group + * which is mirrored by other group(s) + */ +}; + +struct otx_cpt_eng_grp_info { + struct otx_cpt_eng_grps *g; /* pointer to engine_groups structure */ + struct device_attribute info_attr; /* group info entry attr */ + /* engines attached */ + struct otx_cpt_engs_rsvd engs[OTX_CPT_MAX_ETYPES_PER_GRP]; + /* Microcode information */ + struct otx_cpt_ucode ucode[OTX_CPT_MAX_ETYPES_PER_GRP]; + /* sysfs info entry name */ + char sysfs_info_name[OTX_CPT_UCODE_NAME_LENGTH]; + /* engine group mirroring information */ + struct otx_cpt_mirror_info mirror; + int idx; /* engine group index */ + bool is_enabled; /* + * is engine group enabled, engine group is enabled + * when it has engines attached and ucode loaded + */ +}; + +struct otx_cpt_eng_grps { + struct otx_cpt_eng_grp_info grp[OTX_CPT_MAX_ENGINE_GROUPS]; + struct device_attribute ucode_load_attr;/* ucode load attr */ + struct otx_cpt_engs_available avail; + struct mutex lock; + void *obj; + int engs_num; /* total number of engines supported */ + int eng_types_supported; /* engine types supported SE, AE */ + u8 eng_ref_cnt[OTX_CPT_MAX_ENGINES];/* engines reference count */ + bool is_ucode_load_created; /* is ucode_load sysfs entry created */ + bool is_first_try; /* is this first try to create kcrypto engine grp */ + bool is_rdonly; /* do engine groups configuration can be modified */ +}; + +int otx_cpt_init_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps, int pf_type); +void otx_cpt_cleanup_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps); +int otx_cpt_try_create_default_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps, + int pf_type); +void otx_cpt_set_eng_grps_is_rdonly(struct otx_cpt_eng_grps *eng_grps, + bool is_rdonly); +int otx_cpt_uc_supports_eng_type(struct otx_cpt_ucode *ucode, int eng_type); +int otx_cpt_eng_grp_has_eng_type(struct otx_cpt_eng_grp_info *eng_grp, + int eng_type); + +#endif /* __OTX_CPTPF_UCODE_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf.h b/drivers/crypto/marvell/octeontx/otx_cptvf.h new file mode 100644 index 000000000000..dd02f21659af --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPTVF_H +#define __OTX_CPTVF_H + +#include <linux/list.h> +#include <linux/interrupt.h> +#include <linux/device.h> +#include "otx_cpt_common.h" +#include "otx_cptvf_reqmgr.h" + +/* Flags to indicate the features supported */ +#define OTX_CPT_FLAG_DEVICE_READY BIT(1) +#define otx_cpt_device_ready(cpt) ((cpt)->flags & OTX_CPT_FLAG_DEVICE_READY) +/* Default command queue length */ +#define OTX_CPT_CMD_QLEN (4*2046) +#define OTX_CPT_CMD_QCHUNK_SIZE 1023 +#define OTX_CPT_NUM_QS_PER_VF 1 + +struct otx_cpt_cmd_chunk { + u8 *head; + dma_addr_t dma_addr; + u32 size; /* Chunk size, max OTX_CPT_INST_CHUNK_MAX_SIZE */ + struct list_head nextchunk; +}; + +struct otx_cpt_cmd_queue { + u32 idx; /* Command queue host write idx */ + u32 num_chunks; /* Number of command chunks */ + struct otx_cpt_cmd_chunk *qhead;/* + * Command queue head, instructions + * are inserted here + */ + struct otx_cpt_cmd_chunk *base; + struct list_head chead; +}; + +struct otx_cpt_cmd_qinfo { + u32 qchunksize; /* Command queue chunk size */ + struct otx_cpt_cmd_queue queue[OTX_CPT_NUM_QS_PER_VF]; +}; + +struct otx_cpt_pending_qinfo { + u32 num_queues; /* Number of queues supported */ + struct otx_cpt_pending_queue queue[OTX_CPT_NUM_QS_PER_VF]; +}; + +#define for_each_pending_queue(qinfo, q, i) \ + for (i = 0, q = &qinfo->queue[i]; i < qinfo->num_queues; i++, \ + q = &qinfo->queue[i]) + +struct otx_cptvf_wqe { + struct tasklet_struct twork; + struct otx_cptvf *cptvf; +}; + +struct otx_cptvf_wqe_info { + struct otx_cptvf_wqe vq_wqe[OTX_CPT_NUM_QS_PER_VF]; +}; + +struct otx_cptvf { + u16 flags; /* Flags to hold device status bits */ + u8 vfid; /* Device Index 0...OTX_CPT_MAX_VF_NUM */ + u8 num_vfs; /* Number of enabled VFs */ + u8 vftype; /* VF type of SE_TYPE(2) or AE_TYPE(1) */ + u8 vfgrp; /* VF group (0 - 8) */ + u8 node; /* Operating node: Bits (46:44) in BAR0 address */ + u8 priority; /* + * VF priority ring: 1-High proirity round + * robin ring;0-Low priority round robin ring; + */ + struct pci_dev *pdev; /* Pci device handle */ + void __iomem *reg_base; /* Register start address */ + void *wqe_info; /* BH worker info */ + /* MSI-X */ + cpumask_var_t affinity_mask[OTX_CPT_VF_MSIX_VECTORS]; + /* Command and Pending queues */ + u32 qsize; + u32 num_queues; + struct otx_cpt_cmd_qinfo cqinfo; /* Command queue information */ + struct otx_cpt_pending_qinfo pqinfo; /* Pending queue information */ + /* VF-PF mailbox communication */ + bool pf_acked; + bool pf_nacked; +}; + +int otx_cptvf_send_vf_up(struct otx_cptvf *cptvf); +int otx_cptvf_send_vf_down(struct otx_cptvf *cptvf); +int otx_cptvf_send_vf_to_grp_msg(struct otx_cptvf *cptvf, int group); +int otx_cptvf_send_vf_priority_msg(struct otx_cptvf *cptvf); +int otx_cptvf_send_vq_size_msg(struct otx_cptvf *cptvf); +int otx_cptvf_check_pf_ready(struct otx_cptvf *cptvf); +void otx_cptvf_handle_mbox_intr(struct otx_cptvf *cptvf); +void otx_cptvf_write_vq_doorbell(struct otx_cptvf *cptvf, u32 val); + +#endif /* __OTX_CPTVF_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c new file mode 100644 index 000000000000..946fb62949b2 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c @@ -0,0 +1,1744 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <crypto/aes.h> +#include <crypto/authenc.h> +#include <crypto/cryptd.h> +#include <crypto/des.h> +#include <crypto/internal/aead.h> +#include <crypto/sha.h> +#include <crypto/xts.h> +#include <crypto/scatterwalk.h> +#include <linux/rtnetlink.h> +#include <linux/sort.h> +#include <linux/module.h> +#include "otx_cptvf.h" +#include "otx_cptvf_algs.h" +#include "otx_cptvf_reqmgr.h" + +#define CPT_MAX_VF_NUM 64 +/* Size of salt in AES GCM mode */ +#define AES_GCM_SALT_SIZE 4 +/* Size of IV in AES GCM mode */ +#define AES_GCM_IV_SIZE 8 +/* Size of ICV (Integrity Check Value) in AES GCM mode */ +#define AES_GCM_ICV_SIZE 16 +/* Offset of IV in AES GCM mode */ +#define AES_GCM_IV_OFFSET 8 +#define CONTROL_WORD_LEN 8 +#define KEY2_OFFSET 48 +#define DMA_MODE_FLAG(dma_mode) \ + (((dma_mode) == OTX_CPT_DMA_GATHER_SCATTER) ? (1 << 7) : 0) + +/* Truncated SHA digest size */ +#define SHA1_TRUNC_DIGEST_SIZE 12 +#define SHA256_TRUNC_DIGEST_SIZE 16 +#define SHA384_TRUNC_DIGEST_SIZE 24 +#define SHA512_TRUNC_DIGEST_SIZE 32 + +static DEFINE_MUTEX(mutex); +static int is_crypto_registered; + +struct cpt_device_desc { + enum otx_cptpf_type pf_type; + struct pci_dev *dev; + int num_queues; +}; + +struct cpt_device_table { + atomic_t count; + struct cpt_device_desc desc[CPT_MAX_VF_NUM]; +}; + +static struct cpt_device_table se_devices = { + .count = ATOMIC_INIT(0) +}; + +static struct cpt_device_table ae_devices = { + .count = ATOMIC_INIT(0) +}; + +static inline int get_se_device(struct pci_dev **pdev, int *cpu_num) +{ + int count, ret = 0; + + count = atomic_read(&se_devices.count); + if (count < 1) + return -ENODEV; + + *cpu_num = get_cpu(); + + if (se_devices.desc[0].pf_type == OTX_CPT_SE) { + /* + * On OcteonTX platform there is one CPT instruction queue bound + * to each VF. We get maximum performance if one CPT queue + * is available for each cpu otherwise CPT queues need to be + * shared between cpus. + */ + if (*cpu_num >= count) + *cpu_num %= count; + *pdev = se_devices.desc[*cpu_num].dev; + } else { + pr_err("Unknown PF type %d\n", se_devices.desc[0].pf_type); + ret = -EINVAL; + } + put_cpu(); + + return ret; +} + +static inline int validate_hmac_cipher_null(struct otx_cpt_req_info *cpt_req) +{ + struct otx_cpt_req_ctx *rctx; + struct aead_request *req; + struct crypto_aead *tfm; + + req = container_of(cpt_req->areq, struct aead_request, base); + tfm = crypto_aead_reqtfm(req); + rctx = aead_request_ctx(req); + if (memcmp(rctx->fctx.hmac.s.hmac_calc, + rctx->fctx.hmac.s.hmac_recv, + crypto_aead_authsize(tfm)) != 0) + return -EBADMSG; + + return 0; +} + +static void otx_cpt_aead_callback(int status, void *arg1, void *arg2) +{ + struct otx_cpt_info_buffer *cpt_info = arg2; + struct crypto_async_request *areq = arg1; + struct otx_cpt_req_info *cpt_req; + struct pci_dev *pdev; + + cpt_req = cpt_info->req; + if (!status) { + /* + * When selected cipher is NULL we need to manually + * verify whether calculated hmac value matches + * received hmac value + */ + if (cpt_req->req_type == OTX_CPT_AEAD_ENC_DEC_NULL_REQ && + !cpt_req->is_enc) + status = validate_hmac_cipher_null(cpt_req); + } + if (cpt_info) { + pdev = cpt_info->pdev; + do_request_cleanup(pdev, cpt_info); + } + if (areq) + areq->complete(areq, status); +} + +static void output_iv_copyback(struct crypto_async_request *areq) +{ + struct otx_cpt_req_info *req_info; + struct skcipher_request *sreq; + struct crypto_skcipher *stfm; + struct otx_cpt_req_ctx *rctx; + struct otx_cpt_enc_ctx *ctx; + u32 start, ivsize; + + sreq = container_of(areq, struct skcipher_request, base); + stfm = crypto_skcipher_reqtfm(sreq); + ctx = crypto_skcipher_ctx(stfm); + if (ctx->cipher_type == OTX_CPT_AES_CBC || + ctx->cipher_type == OTX_CPT_DES3_CBC) { + rctx = skcipher_request_ctx(sreq); + req_info = &rctx->cpt_req; + ivsize = crypto_skcipher_ivsize(stfm); + start = sreq->cryptlen - ivsize; + + if (req_info->is_enc) { + scatterwalk_map_and_copy(sreq->iv, sreq->dst, start, + ivsize, 0); + } else { + if (sreq->src != sreq->dst) { + scatterwalk_map_and_copy(sreq->iv, sreq->src, + start, ivsize, 0); + } else { + memcpy(sreq->iv, req_info->iv_out, ivsize); + kfree(req_info->iv_out); + } + } + } +} + +static void otx_cpt_skcipher_callback(int status, void *arg1, void *arg2) +{ + struct otx_cpt_info_buffer *cpt_info = arg2; + struct crypto_async_request *areq = arg1; + struct pci_dev *pdev; + + if (areq) { + if (!status) + output_iv_copyback(areq); + if (cpt_info) { + pdev = cpt_info->pdev; + do_request_cleanup(pdev, cpt_info); + } + areq->complete(areq, status); + } +} + +static inline void update_input_data(struct otx_cpt_req_info *req_info, + struct scatterlist *inp_sg, + u32 nbytes, u32 *argcnt) +{ + req_info->req.dlen += nbytes; + + while (nbytes) { + u32 len = min(nbytes, inp_sg->length); + u8 *ptr = sg_virt(inp_sg); + + req_info->in[*argcnt].vptr = (void *)ptr; + req_info->in[*argcnt].size = len; + nbytes -= len; + ++(*argcnt); + inp_sg = sg_next(inp_sg); + } +} + +static inline void update_output_data(struct otx_cpt_req_info *req_info, + struct scatterlist *outp_sg, + u32 offset, u32 nbytes, u32 *argcnt) +{ + req_info->rlen += nbytes; + + while (nbytes) { + u32 len = min(nbytes, outp_sg->length - offset); + u8 *ptr = sg_virt(outp_sg); + + req_info->out[*argcnt].vptr = (void *) (ptr + offset); + req_info->out[*argcnt].size = len; + nbytes -= len; + ++(*argcnt); + offset = 0; + outp_sg = sg_next(outp_sg); + } +} + +static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc, + u32 *argcnt) +{ + struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + struct crypto_tfm *tfm = crypto_skcipher_tfm(stfm); + struct otx_cpt_enc_ctx *ctx = crypto_tfm_ctx(tfm); + struct otx_cpt_fc_ctx *fctx = &rctx->fctx; + int ivsize = crypto_skcipher_ivsize(stfm); + u32 start = req->cryptlen - ivsize; + u64 *ctrl_flags = NULL; + gfp_t flags; + + flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER; + req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ; + + req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_FC | + DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER); + if (enc) { + req_info->req.opcode.s.minor = 2; + } else { + req_info->req.opcode.s.minor = 3; + if ((ctx->cipher_type == OTX_CPT_AES_CBC || + ctx->cipher_type == OTX_CPT_DES3_CBC) && + req->src == req->dst) { + req_info->iv_out = kmalloc(ivsize, flags); + if (!req_info->iv_out) + return -ENOMEM; + + scatterwalk_map_and_copy(req_info->iv_out, req->src, + start, ivsize, 0); + } + } + /* Encryption data length */ + req_info->req.param1 = req->cryptlen; + /* Authentication data length */ + req_info->req.param2 = 0; + + fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type; + fctx->enc.enc_ctrl.e.aes_key = ctx->key_type; + fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_CPTR; + + if (ctx->cipher_type == OTX_CPT_AES_XTS) + memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len * 2); + else + memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len); + + memcpy(fctx->enc.encr_iv, req->iv, crypto_skcipher_ivsize(stfm)); + + ctrl_flags = (u64 *)&fctx->enc.enc_ctrl.flags; + *ctrl_flags = cpu_to_be64(*ctrl_flags); + + /* + * Storing Packet Data Information in offset + * Control Word First 8 bytes + */ + req_info->in[*argcnt].vptr = (u8 *)&rctx->ctrl_word; + req_info->in[*argcnt].size = CONTROL_WORD_LEN; + req_info->req.dlen += CONTROL_WORD_LEN; + ++(*argcnt); + + req_info->in[*argcnt].vptr = (u8 *)fctx; + req_info->in[*argcnt].size = sizeof(struct otx_cpt_fc_ctx); + req_info->req.dlen += sizeof(struct otx_cpt_fc_ctx); + + ++(*argcnt); + + return 0; +} + +static inline u32 create_input_list(struct skcipher_request *req, u32 enc, + u32 enc_iv_len) +{ + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0; + int ret; + + ret = create_ctx_hdr(req, enc, &argcnt); + if (ret) + return ret; + + update_input_data(req_info, req->src, req->cryptlen, &argcnt); + req_info->incnt = argcnt; + + return 0; +} + +static inline void create_output_list(struct skcipher_request *req, + u32 enc_iv_len) +{ + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0; + + /* + * OUTPUT Buffer Processing + * AES encryption/decryption output would be + * received in the following format + * + * ------IV--------|------ENCRYPTED/DECRYPTED DATA-----| + * [ 16 Bytes/ [ Request Enc/Dec/ DATA Len AES CBC ] + */ + update_output_data(req_info, req->dst, 0, req->cryptlen, &argcnt); + req_info->outcnt = argcnt; +} + +static inline int cpt_enc_dec(struct skcipher_request *req, u32 enc) +{ + struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 enc_iv_len = crypto_skcipher_ivsize(stfm); + struct pci_dev *pdev; + int status, cpu_num; + + /* Validate that request doesn't exceed maximum CPT supported size */ + if (req->cryptlen > OTX_CPT_MAX_REQ_SIZE) + return -E2BIG; + + /* Clear control words */ + rctx->ctrl_word.flags = 0; + rctx->fctx.enc.enc_ctrl.flags = 0; + + status = create_input_list(req, enc, enc_iv_len); + if (status) + return status; + create_output_list(req, enc_iv_len); + + status = get_se_device(&pdev, &cpu_num); + if (status) + return status; + + req_info->callback = (void *)otx_cpt_skcipher_callback; + req_info->areq = &req->base; + req_info->req_type = OTX_CPT_ENC_DEC_REQ; + req_info->is_enc = enc; + req_info->is_trunc_hmac = false; + req_info->ctrl.s.grp = 0; + + /* + * We perform an asynchronous send and once + * the request is completed the driver would + * intimate through registered call back functions + */ + status = otx_cpt_do_request(pdev, req_info, cpu_num); + + return status; +} + +static int otx_cpt_skcipher_encrypt(struct skcipher_request *req) +{ + return cpt_enc_dec(req, true); +} + +static int otx_cpt_skcipher_decrypt(struct skcipher_request *req) +{ + return cpt_enc_dec(req, false); +} + +static int otx_cpt_skcipher_xts_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + const u8 *key2 = key + (keylen / 2); + const u8 *key1 = key; + int ret; + + ret = xts_check_key(crypto_skcipher_tfm(tfm), key, keylen); + if (ret) + return ret; + ctx->key_len = keylen; + memcpy(ctx->enc_key, key1, keylen / 2); + memcpy(ctx->enc_key + KEY2_OFFSET, key2, keylen / 2); + ctx->cipher_type = OTX_CPT_AES_XTS; + switch (ctx->key_len) { + case 2 * AES_KEYSIZE_128: + ctx->key_type = OTX_CPT_AES_128_BIT; + break; + case 2 * AES_KEYSIZE_256: + ctx->key_type = OTX_CPT_AES_256_BIT; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int cpt_des_setkey(struct crypto_skcipher *tfm, const u8 *key, + u32 keylen, u8 cipher_type) +{ + struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (keylen != DES3_EDE_KEY_SIZE) + return -EINVAL; + + ctx->key_len = keylen; + ctx->cipher_type = cipher_type; + + memcpy(ctx->enc_key, key, keylen); + + return 0; +} + +static int cpt_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, + u32 keylen, u8 cipher_type) +{ + struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + switch (keylen) { + case AES_KEYSIZE_128: + ctx->key_type = OTX_CPT_AES_128_BIT; + break; + case AES_KEYSIZE_192: + ctx->key_type = OTX_CPT_AES_192_BIT; + break; + case AES_KEYSIZE_256: + ctx->key_type = OTX_CPT_AES_256_BIT; + break; + default: + return -EINVAL; + } + ctx->key_len = keylen; + ctx->cipher_type = cipher_type; + + memcpy(ctx->enc_key, key, keylen); + + return 0; +} + +static int otx_cpt_skcipher_cbc_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_CBC); +} + +static int otx_cpt_skcipher_ecb_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_ECB); +} + +static int otx_cpt_skcipher_cfb_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_CFB); +} + +static int otx_cpt_skcipher_cbc_des3_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_des_setkey(tfm, key, keylen, OTX_CPT_DES3_CBC); +} + +static int otx_cpt_skcipher_ecb_des3_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_des_setkey(tfm, key, keylen, OTX_CPT_DES3_ECB); +} + +static int otx_cpt_enc_dec_init(struct crypto_skcipher *tfm) +{ + struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + memset(ctx, 0, sizeof(*ctx)); + /* + * Additional memory for skcipher_request is + * allocated since the cryptd daemon uses + * this memory for request_ctx information + */ + crypto_skcipher_set_reqsize(tfm, sizeof(struct otx_cpt_req_ctx) + + sizeof(struct skcipher_request)); + + return 0; +} + +static int cpt_aead_init(struct crypto_aead *tfm, u8 cipher_type, u8 mac_type) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + + ctx->cipher_type = cipher_type; + ctx->mac_type = mac_type; + + /* + * When selected cipher is NULL we use HMAC opcode instead of + * FLEXICRYPTO opcode therefore we don't need to use HASH algorithms + * for calculating ipad and opad + */ + if (ctx->cipher_type != OTX_CPT_CIPHER_NULL) { + switch (ctx->mac_type) { + case OTX_CPT_SHA1: + ctx->hashalg = crypto_alloc_shash("sha1", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX_CPT_SHA256: + ctx->hashalg = crypto_alloc_shash("sha256", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX_CPT_SHA384: + ctx->hashalg = crypto_alloc_shash("sha384", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX_CPT_SHA512: + ctx->hashalg = crypto_alloc_shash("sha512", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + } + } + + crypto_aead_set_reqsize(tfm, sizeof(struct otx_cpt_req_ctx)); + + return 0; +} + +static int otx_cpt_aead_cbc_aes_sha1_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA1); +} + +static int otx_cpt_aead_cbc_aes_sha256_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA256); +} + +static int otx_cpt_aead_cbc_aes_sha384_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA384); +} + +static int otx_cpt_aead_cbc_aes_sha512_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA512); +} + +static int otx_cpt_aead_ecb_null_sha1_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA1); +} + +static int otx_cpt_aead_ecb_null_sha256_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA256); +} + +static int otx_cpt_aead_ecb_null_sha384_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA384); +} + +static int otx_cpt_aead_ecb_null_sha512_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA512); +} + +static int otx_cpt_aead_gcm_aes_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_GCM, OTX_CPT_MAC_NULL); +} + +static void otx_cpt_aead_exit(struct crypto_aead *tfm) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + + kfree(ctx->ipad); + kfree(ctx->opad); + if (ctx->hashalg) + crypto_free_shash(ctx->hashalg); + kfree(ctx->sdesc); +} + +/* + * This is the Integrity Check Value validation (aka the authentication tag + * length) + */ +static int otx_cpt_aead_set_authsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + + switch (ctx->mac_type) { + case OTX_CPT_SHA1: + if (authsize != SHA1_DIGEST_SIZE && + authsize != SHA1_TRUNC_DIGEST_SIZE) + return -EINVAL; + + if (authsize == SHA1_TRUNC_DIGEST_SIZE) + ctx->is_trunc_hmac = true; + break; + + case OTX_CPT_SHA256: + if (authsize != SHA256_DIGEST_SIZE && + authsize != SHA256_TRUNC_DIGEST_SIZE) + return -EINVAL; + + if (authsize == SHA256_TRUNC_DIGEST_SIZE) + ctx->is_trunc_hmac = true; + break; + + case OTX_CPT_SHA384: + if (authsize != SHA384_DIGEST_SIZE && + authsize != SHA384_TRUNC_DIGEST_SIZE) + return -EINVAL; + + if (authsize == SHA384_TRUNC_DIGEST_SIZE) + ctx->is_trunc_hmac = true; + break; + + case OTX_CPT_SHA512: + if (authsize != SHA512_DIGEST_SIZE && + authsize != SHA512_TRUNC_DIGEST_SIZE) + return -EINVAL; + + if (authsize == SHA512_TRUNC_DIGEST_SIZE) + ctx->is_trunc_hmac = true; + break; + + case OTX_CPT_MAC_NULL: + if (ctx->cipher_type == OTX_CPT_AES_GCM) { + if (authsize != AES_GCM_ICV_SIZE) + return -EINVAL; + } else + return -EINVAL; + break; + + default: + return -EINVAL; + } + + tfm->authsize = authsize; + return 0; +} + +static struct otx_cpt_sdesc *alloc_sdesc(struct crypto_shash *alg) +{ + struct otx_cpt_sdesc *sdesc; + int size; + + size = sizeof(struct shash_desc) + crypto_shash_descsize(alg); + sdesc = kmalloc(size, GFP_KERNEL); + if (!sdesc) + return NULL; + + sdesc->shash.tfm = alg; + + return sdesc; +} + +static inline void swap_data32(void *buf, u32 len) +{ + u32 *store = (u32 *) buf; + int i = 0; + + for (i = 0 ; i < len/sizeof(u32); i++, store++) + *store = cpu_to_be32(*store); +} + +static inline void swap_data64(void *buf, u32 len) +{ + u64 *store = (u64 *) buf; + int i = 0; + + for (i = 0 ; i < len/sizeof(u64); i++, store++) + *store = cpu_to_be64(*store); +} + +static int copy_pad(u8 mac_type, u8 *out_pad, u8 *in_pad) +{ + struct sha512_state *sha512; + struct sha256_state *sha256; + struct sha1_state *sha1; + + switch (mac_type) { + case OTX_CPT_SHA1: + sha1 = (struct sha1_state *) in_pad; + swap_data32(sha1->state, SHA1_DIGEST_SIZE); + memcpy(out_pad, &sha1->state, SHA1_DIGEST_SIZE); + break; + + case OTX_CPT_SHA256: + sha256 = (struct sha256_state *) in_pad; + swap_data32(sha256->state, SHA256_DIGEST_SIZE); + memcpy(out_pad, &sha256->state, SHA256_DIGEST_SIZE); + break; + + case OTX_CPT_SHA384: + case OTX_CPT_SHA512: + sha512 = (struct sha512_state *) in_pad; + swap_data64(sha512->state, SHA512_DIGEST_SIZE); + memcpy(out_pad, &sha512->state, SHA512_DIGEST_SIZE); + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int aead_hmac_init(struct crypto_aead *cipher) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + int state_size = crypto_shash_statesize(ctx->hashalg); + int ds = crypto_shash_digestsize(ctx->hashalg); + int bs = crypto_shash_blocksize(ctx->hashalg); + int authkeylen = ctx->auth_key_len; + u8 *ipad = NULL, *opad = NULL; + int ret = 0, icount = 0; + + ctx->sdesc = alloc_sdesc(ctx->hashalg); + if (!ctx->sdesc) + return -ENOMEM; + + ctx->ipad = kzalloc(bs, GFP_KERNEL); + if (!ctx->ipad) { + ret = -ENOMEM; + goto calc_fail; + } + + ctx->opad = kzalloc(bs, GFP_KERNEL); + if (!ctx->opad) { + ret = -ENOMEM; + goto calc_fail; + } + + ipad = kzalloc(state_size, GFP_KERNEL); + if (!ipad) { + ret = -ENOMEM; + goto calc_fail; + } + + opad = kzalloc(state_size, GFP_KERNEL); + if (!opad) { + ret = -ENOMEM; + goto calc_fail; + } + + if (authkeylen > bs) { + ret = crypto_shash_digest(&ctx->sdesc->shash, ctx->key, + authkeylen, ipad); + if (ret) + goto calc_fail; + + authkeylen = ds; + } else { + memcpy(ipad, ctx->key, authkeylen); + } + + memset(ipad + authkeylen, 0, bs - authkeylen); + memcpy(opad, ipad, bs); + + for (icount = 0; icount < bs; icount++) { + ipad[icount] ^= 0x36; + opad[icount] ^= 0x5c; + } + + /* + * Partial Hash calculated from the software + * algorithm is retrieved for IPAD & OPAD + */ + + /* IPAD Calculation */ + crypto_shash_init(&ctx->sdesc->shash); + crypto_shash_update(&ctx->sdesc->shash, ipad, bs); + crypto_shash_export(&ctx->sdesc->shash, ipad); + ret = copy_pad(ctx->mac_type, ctx->ipad, ipad); + if (ret) + goto calc_fail; + + /* OPAD Calculation */ + crypto_shash_init(&ctx->sdesc->shash); + crypto_shash_update(&ctx->sdesc->shash, opad, bs); + crypto_shash_export(&ctx->sdesc->shash, opad); + ret = copy_pad(ctx->mac_type, ctx->opad, opad); + if (ret) + goto calc_fail; + + kfree(ipad); + kfree(opad); + + return 0; + +calc_fail: + kfree(ctx->ipad); + ctx->ipad = NULL; + kfree(ctx->opad); + ctx->opad = NULL; + kfree(ipad); + kfree(opad); + kfree(ctx->sdesc); + ctx->sdesc = NULL; + + return ret; +} + +static int otx_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct crypto_authenc_key_param *param; + int enckeylen = 0, authkeylen = 0; + struct rtattr *rta = (void *)key; + int status = -EINVAL; + + if (!RTA_OK(rta, keylen)) + goto badkey; + + if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) + goto badkey; + + if (RTA_PAYLOAD(rta) < sizeof(*param)) + goto badkey; + + param = RTA_DATA(rta); + enckeylen = be32_to_cpu(param->enckeylen); + key += RTA_ALIGN(rta->rta_len); + keylen -= RTA_ALIGN(rta->rta_len); + if (keylen < enckeylen) + goto badkey; + + if (keylen > OTX_CPT_MAX_KEY_SIZE) + goto badkey; + + authkeylen = keylen - enckeylen; + memcpy(ctx->key, key, keylen); + + switch (enckeylen) { + case AES_KEYSIZE_128: + ctx->key_type = OTX_CPT_AES_128_BIT; + break; + case AES_KEYSIZE_192: + ctx->key_type = OTX_CPT_AES_192_BIT; + break; + case AES_KEYSIZE_256: + ctx->key_type = OTX_CPT_AES_256_BIT; + break; + default: + /* Invalid key length */ + goto badkey; + } + + ctx->enc_key_len = enckeylen; + ctx->auth_key_len = authkeylen; + + status = aead_hmac_init(cipher); + if (status) + goto badkey; + + return 0; +badkey: + return status; +} + +static int otx_cpt_aead_ecb_null_sha_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct crypto_authenc_key_param *param; + struct rtattr *rta = (void *)key; + int enckeylen = 0; + + if (!RTA_OK(rta, keylen)) + goto badkey; + + if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) + goto badkey; + + if (RTA_PAYLOAD(rta) < sizeof(*param)) + goto badkey; + + param = RTA_DATA(rta); + enckeylen = be32_to_cpu(param->enckeylen); + key += RTA_ALIGN(rta->rta_len); + keylen -= RTA_ALIGN(rta->rta_len); + if (enckeylen != 0) + goto badkey; + + if (keylen > OTX_CPT_MAX_KEY_SIZE) + goto badkey; + + memcpy(ctx->key, key, keylen); + ctx->enc_key_len = enckeylen; + ctx->auth_key_len = keylen; + return 0; +badkey: + return -EINVAL; +} + +static int otx_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + + /* + * For aes gcm we expect to get encryption key (16, 24, 32 bytes) + * and salt (4 bytes) + */ + switch (keylen) { + case AES_KEYSIZE_128 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX_CPT_AES_128_BIT; + ctx->enc_key_len = AES_KEYSIZE_128; + break; + case AES_KEYSIZE_192 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX_CPT_AES_192_BIT; + ctx->enc_key_len = AES_KEYSIZE_192; + break; + case AES_KEYSIZE_256 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX_CPT_AES_256_BIT; + ctx->enc_key_len = AES_KEYSIZE_256; + break; + default: + /* Invalid key and salt length */ + return -EINVAL; + } + + /* Store encryption key and salt */ + memcpy(ctx->key, key, keylen); + + return 0; +} + +static inline u32 create_aead_ctx_hdr(struct aead_request *req, u32 enc, + u32 *argcnt) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + struct otx_cpt_fc_ctx *fctx = &rctx->fctx; + int mac_len = crypto_aead_authsize(tfm); + int ds; + + rctx->ctrl_word.e.enc_data_offset = req->assoclen; + + switch (ctx->cipher_type) { + case OTX_CPT_AES_CBC: + fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_CPTR; + /* Copy encryption key to context */ + memcpy(fctx->enc.encr_key, ctx->key + ctx->auth_key_len, + ctx->enc_key_len); + /* Copy IV to context */ + memcpy(fctx->enc.encr_iv, req->iv, crypto_aead_ivsize(tfm)); + + ds = crypto_shash_digestsize(ctx->hashalg); + if (ctx->mac_type == OTX_CPT_SHA384) + ds = SHA512_DIGEST_SIZE; + if (ctx->ipad) + memcpy(fctx->hmac.e.ipad, ctx->ipad, ds); + if (ctx->opad) + memcpy(fctx->hmac.e.opad, ctx->opad, ds); + break; + + case OTX_CPT_AES_GCM: + fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_DPTR; + /* Copy encryption key to context */ + memcpy(fctx->enc.encr_key, ctx->key, ctx->enc_key_len); + /* Copy salt to context */ + memcpy(fctx->enc.encr_iv, ctx->key + ctx->enc_key_len, + AES_GCM_SALT_SIZE); + + rctx->ctrl_word.e.iv_offset = req->assoclen - AES_GCM_IV_OFFSET; + break; + + default: + /* Unknown cipher type */ + return -EINVAL; + } + rctx->ctrl_word.flags = cpu_to_be64(rctx->ctrl_word.flags); + + req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER; + req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ; + req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_FC | + DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER); + if (enc) { + req_info->req.opcode.s.minor = 2; + req_info->req.param1 = req->cryptlen; + req_info->req.param2 = req->cryptlen + req->assoclen; + } else { + req_info->req.opcode.s.minor = 3; + req_info->req.param1 = req->cryptlen - mac_len; + req_info->req.param2 = req->cryptlen + req->assoclen - mac_len; + } + + fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type; + fctx->enc.enc_ctrl.e.aes_key = ctx->key_type; + fctx->enc.enc_ctrl.e.mac_type = ctx->mac_type; + fctx->enc.enc_ctrl.e.mac_len = mac_len; + fctx->enc.enc_ctrl.flags = cpu_to_be64(fctx->enc.enc_ctrl.flags); + + /* + * Storing Packet Data Information in offset + * Control Word First 8 bytes + */ + req_info->in[*argcnt].vptr = (u8 *)&rctx->ctrl_word; + req_info->in[*argcnt].size = CONTROL_WORD_LEN; + req_info->req.dlen += CONTROL_WORD_LEN; + ++(*argcnt); + + req_info->in[*argcnt].vptr = (u8 *)fctx; + req_info->in[*argcnt].size = sizeof(struct otx_cpt_fc_ctx); + req_info->req.dlen += sizeof(struct otx_cpt_fc_ctx); + ++(*argcnt); + + return 0; +} + +static inline u32 create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt, + u32 enc) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + + req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER; + req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ; + req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_HMAC | + DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER); + req_info->is_trunc_hmac = ctx->is_trunc_hmac; + + req_info->req.opcode.s.minor = 0; + req_info->req.param1 = ctx->auth_key_len; + req_info->req.param2 = ctx->mac_type << 8; + + /* Add authentication key */ + req_info->in[*argcnt].vptr = ctx->key; + req_info->in[*argcnt].size = round_up(ctx->auth_key_len, 8); + req_info->req.dlen += round_up(ctx->auth_key_len, 8); + ++(*argcnt); + + return 0; +} + +static inline u32 create_aead_input_list(struct aead_request *req, u32 enc) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 inputlen = req->cryptlen + req->assoclen; + u32 status, argcnt = 0; + + status = create_aead_ctx_hdr(req, enc, &argcnt); + if (status) + return status; + update_input_data(req_info, req->src, inputlen, &argcnt); + req_info->incnt = argcnt; + + return 0; +} + +static inline u32 create_aead_output_list(struct aead_request *req, u32 enc, + u32 mac_len) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0, outputlen = 0; + + if (enc) + outputlen = req->cryptlen + req->assoclen + mac_len; + else + outputlen = req->cryptlen + req->assoclen - mac_len; + + update_output_data(req_info, req->dst, 0, outputlen, &argcnt); + req_info->outcnt = argcnt; + + return 0; +} + +static inline u32 create_aead_null_input_list(struct aead_request *req, + u32 enc, u32 mac_len) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 inputlen, argcnt = 0; + + if (enc) + inputlen = req->cryptlen + req->assoclen; + else + inputlen = req->cryptlen + req->assoclen - mac_len; + + create_hmac_ctx_hdr(req, &argcnt, enc); + update_input_data(req_info, req->src, inputlen, &argcnt); + req_info->incnt = argcnt; + + return 0; +} + +static inline u32 create_aead_null_output_list(struct aead_request *req, + u32 enc, u32 mac_len) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + struct scatterlist *dst; + u8 *ptr = NULL; + int argcnt = 0, status, offset; + u32 inputlen; + + if (enc) + inputlen = req->cryptlen + req->assoclen; + else + inputlen = req->cryptlen + req->assoclen - mac_len; + + /* + * If source and destination are different + * then copy payload to destination + */ + if (req->src != req->dst) { + + ptr = kmalloc(inputlen, (req_info->areq->flags & + CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC); + if (!ptr) { + status = -ENOMEM; + goto error; + } + + status = sg_copy_to_buffer(req->src, sg_nents(req->src), ptr, + inputlen); + if (status != inputlen) { + status = -EINVAL; + goto error; + } + status = sg_copy_from_buffer(req->dst, sg_nents(req->dst), ptr, + inputlen); + if (status != inputlen) { + status = -EINVAL; + goto error; + } + kfree(ptr); + } + + if (enc) { + /* + * In an encryption scenario hmac needs + * to be appended after payload + */ + dst = req->dst; + offset = inputlen; + while (offset >= dst->length) { + offset -= dst->length; + dst = sg_next(dst); + if (!dst) { + status = -ENOENT; + goto error; + } + } + + update_output_data(req_info, dst, offset, mac_len, &argcnt); + } else { + /* + * In a decryption scenario calculated hmac for received + * payload needs to be compare with hmac received + */ + status = sg_copy_buffer(req->src, sg_nents(req->src), + rctx->fctx.hmac.s.hmac_recv, mac_len, + inputlen, true); + if (status != mac_len) { + status = -EINVAL; + goto error; + } + + req_info->out[argcnt].vptr = rctx->fctx.hmac.s.hmac_calc; + req_info->out[argcnt].size = mac_len; + argcnt++; + } + + req_info->outcnt = argcnt; + return 0; +error: + kfree(ptr); + return status; +} + +static u32 cpt_aead_enc_dec(struct aead_request *req, u8 reg_type, u8 enc) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct pci_dev *pdev; + u32 status, cpu_num; + + /* Clear control words */ + rctx->ctrl_word.flags = 0; + rctx->fctx.enc.enc_ctrl.flags = 0; + + req_info->callback = otx_cpt_aead_callback; + req_info->areq = &req->base; + req_info->req_type = reg_type; + req_info->is_enc = enc; + req_info->is_trunc_hmac = false; + + switch (reg_type) { + case OTX_CPT_AEAD_ENC_DEC_REQ: + status = create_aead_input_list(req, enc); + if (status) + return status; + status = create_aead_output_list(req, enc, + crypto_aead_authsize(tfm)); + if (status) + return status; + break; + + case OTX_CPT_AEAD_ENC_DEC_NULL_REQ: + status = create_aead_null_input_list(req, enc, + crypto_aead_authsize(tfm)); + if (status) + return status; + status = create_aead_null_output_list(req, enc, + crypto_aead_authsize(tfm)); + if (status) + return status; + break; + + default: + return -EINVAL; + } + + /* Validate that request doesn't exceed maximum CPT supported size */ + if (req_info->req.param1 > OTX_CPT_MAX_REQ_SIZE || + req_info->req.param2 > OTX_CPT_MAX_REQ_SIZE) + return -E2BIG; + + status = get_se_device(&pdev, &cpu_num); + if (status) + return status; + + req_info->ctrl.s.grp = 0; + + status = otx_cpt_do_request(pdev, req_info, cpu_num); + /* + * We perform an asynchronous send and once + * the request is completed the driver would + * intimate through registered call back functions + */ + return status; +} + +static int otx_cpt_aead_encrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_REQ, true); +} + +static int otx_cpt_aead_decrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_REQ, false); +} + +static int otx_cpt_aead_null_encrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_NULL_REQ, true); +} + +static int otx_cpt_aead_null_decrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_NULL_REQ, false); +} + +static struct skcipher_alg otx_cpt_skciphers[] = { { + .base.cra_name = "xts(aes)", + .base.cra_driver_name = "cpt_xts_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .setkey = otx_cpt_skcipher_xts_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "cbc(aes)", + .base.cra_driver_name = "cpt_cbc_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = otx_cpt_skcipher_cbc_aes_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "ecb(aes)", + .base.cra_driver_name = "cpt_ecb_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .ivsize = 0, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = otx_cpt_skcipher_ecb_aes_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "cfb(aes)", + .base.cra_driver_name = "cpt_cfb_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = otx_cpt_skcipher_cfb_aes_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "cbc(des3_ede)", + .base.cra_driver_name = "cpt_cbc_des3_ede", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_des3_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = otx_cpt_skcipher_cbc_des3_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "ecb(des3_ede)", + .base.cra_driver_name = "cpt_ecb_des3_ede", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_des3_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = 0, + .setkey = otx_cpt_skcipher_ecb_des3_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +} }; + +static struct aead_alg otx_cpt_aeads[] = { { + .base = { + .cra_name = "authenc(hmac(sha1),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha1_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_cbc_aes_sha1_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha256),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha256_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_cbc_aes_sha256_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha384),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha384_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_cbc_aes_sha384_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA384_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha512),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha512_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_cbc_aes_sha512_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA512_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha1),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha1_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_ecb_null_sha1_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_null_encrypt, + .decrypt = otx_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA1_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha256),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha256_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_ecb_null_sha256_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_null_encrypt, + .decrypt = otx_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA256_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha384),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha384_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_ecb_null_sha384_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_null_encrypt, + .decrypt = otx_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA384_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha512),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha512_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_ecb_null_sha512_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_null_encrypt, + .decrypt = otx_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA512_DIGEST_SIZE, +}, { + .base = { + .cra_name = "rfc4106(gcm(aes))", + .cra_driver_name = "cpt_rfc4106_gcm_aes", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_gcm_aes_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_gcm_aes_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_GCM_IV_SIZE, + .maxauthsize = AES_GCM_ICV_SIZE, +} }; + +static inline int is_any_alg_used(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(otx_cpt_skciphers); i++) + if (refcount_read(&otx_cpt_skciphers[i].base.cra_refcnt) != 1) + return true; + for (i = 0; i < ARRAY_SIZE(otx_cpt_aeads); i++) + if (refcount_read(&otx_cpt_aeads[i].base.cra_refcnt) != 1) + return true; + return false; +} + +static inline int cpt_register_algs(void) +{ + int i, err = 0; + + if (!IS_ENABLED(CONFIG_DM_CRYPT)) { + for (i = 0; i < ARRAY_SIZE(otx_cpt_skciphers); i++) + otx_cpt_skciphers[i].base.cra_flags &= ~CRYPTO_ALG_DEAD; + + err = crypto_register_skciphers(otx_cpt_skciphers, + ARRAY_SIZE(otx_cpt_skciphers)); + if (err) + return err; + } + + for (i = 0; i < ARRAY_SIZE(otx_cpt_aeads); i++) + otx_cpt_aeads[i].base.cra_flags &= ~CRYPTO_ALG_DEAD; + + err = crypto_register_aeads(otx_cpt_aeads, ARRAY_SIZE(otx_cpt_aeads)); + if (err) { + crypto_unregister_skciphers(otx_cpt_skciphers, + ARRAY_SIZE(otx_cpt_skciphers)); + return err; + } + + return 0; +} + +static inline void cpt_unregister_algs(void) +{ + crypto_unregister_skciphers(otx_cpt_skciphers, + ARRAY_SIZE(otx_cpt_skciphers)); + crypto_unregister_aeads(otx_cpt_aeads, ARRAY_SIZE(otx_cpt_aeads)); +} + +static int compare_func(const void *lptr, const void *rptr) +{ + struct cpt_device_desc *ldesc = (struct cpt_device_desc *) lptr; + struct cpt_device_desc *rdesc = (struct cpt_device_desc *) rptr; + + if (ldesc->dev->devfn < rdesc->dev->devfn) + return -1; + if (ldesc->dev->devfn > rdesc->dev->devfn) + return 1; + return 0; +} + +static void swap_func(void *lptr, void *rptr, int size) +{ + struct cpt_device_desc *ldesc = (struct cpt_device_desc *) lptr; + struct cpt_device_desc *rdesc = (struct cpt_device_desc *) rptr; + struct cpt_device_desc desc; + + desc = *ldesc; + *ldesc = *rdesc; + *rdesc = desc; +} + +int otx_cpt_crypto_init(struct pci_dev *pdev, struct module *mod, + enum otx_cptpf_type pf_type, + enum otx_cptvf_type engine_type, + int num_queues, int num_devices) +{ + int ret = 0; + int count; + + mutex_lock(&mutex); + switch (engine_type) { + case OTX_CPT_SE_TYPES: + count = atomic_read(&se_devices.count); + if (count >= CPT_MAX_VF_NUM) { + dev_err(&pdev->dev, "No space to add a new device"); + ret = -ENOSPC; + goto err; + } + se_devices.desc[count].pf_type = pf_type; + se_devices.desc[count].num_queues = num_queues; + se_devices.desc[count++].dev = pdev; + atomic_inc(&se_devices.count); + + if (atomic_read(&se_devices.count) == num_devices && + is_crypto_registered == false) { + if (cpt_register_algs()) { + dev_err(&pdev->dev, + "Error in registering crypto algorithms\n"); + ret = -EINVAL; + goto err; + } + try_module_get(mod); + is_crypto_registered = true; + } + sort(se_devices.desc, count, sizeof(struct cpt_device_desc), + compare_func, swap_func); + break; + + case OTX_CPT_AE_TYPES: + count = atomic_read(&ae_devices.count); + if (count >= CPT_MAX_VF_NUM) { + dev_err(&pdev->dev, "No space to a add new device"); + ret = -ENOSPC; + goto err; + } + ae_devices.desc[count].pf_type = pf_type; + ae_devices.desc[count].num_queues = num_queues; + ae_devices.desc[count++].dev = pdev; + atomic_inc(&ae_devices.count); + sort(ae_devices.desc, count, sizeof(struct cpt_device_desc), + compare_func, swap_func); + break; + + default: + dev_err(&pdev->dev, "Unknown VF type %d\n", engine_type); + ret = BAD_OTX_CPTVF_TYPE; + } +err: + mutex_unlock(&mutex); + return ret; +} + +void otx_cpt_crypto_exit(struct pci_dev *pdev, struct module *mod, + enum otx_cptvf_type engine_type) +{ + struct cpt_device_table *dev_tbl; + bool dev_found = false; + int i, j, count; + + mutex_lock(&mutex); + + dev_tbl = (engine_type == OTX_CPT_AE_TYPES) ? &ae_devices : &se_devices; + count = atomic_read(&dev_tbl->count); + for (i = 0; i < count; i++) + if (pdev == dev_tbl->desc[i].dev) { + for (j = i; j < count-1; j++) + dev_tbl->desc[j] = dev_tbl->desc[j+1]; + dev_found = true; + break; + } + + if (!dev_found) { + dev_err(&pdev->dev, "%s device not found", __func__); + goto exit; + } + + if (engine_type != OTX_CPT_AE_TYPES) { + if (atomic_dec_and_test(&se_devices.count) && + !is_any_alg_used()) { + cpt_unregister_algs(); + module_put(mod); + is_crypto_registered = false; + } + } else + atomic_dec(&ae_devices.count); +exit: + mutex_unlock(&mutex); +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h new file mode 100644 index 000000000000..67cc0025f5d5 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h @@ -0,0 +1,188 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPT_ALGS_H +#define __OTX_CPT_ALGS_H + +#include <crypto/hash.h> +#include "otx_cpt_common.h" + +#define OTX_CPT_MAX_ENC_KEY_SIZE 32 +#define OTX_CPT_MAX_HASH_KEY_SIZE 64 +#define OTX_CPT_MAX_KEY_SIZE (OTX_CPT_MAX_ENC_KEY_SIZE + \ + OTX_CPT_MAX_HASH_KEY_SIZE) +enum otx_cpt_request_type { + OTX_CPT_ENC_DEC_REQ = 0x1, + OTX_CPT_AEAD_ENC_DEC_REQ = 0x2, + OTX_CPT_AEAD_ENC_DEC_NULL_REQ = 0x3, + OTX_CPT_PASSTHROUGH_REQ = 0x4 +}; + +enum otx_cpt_major_opcodes { + OTX_CPT_MAJOR_OP_MISC = 0x01, + OTX_CPT_MAJOR_OP_FC = 0x33, + OTX_CPT_MAJOR_OP_HMAC = 0x35, +}; + +enum otx_cpt_req_type { + OTX_CPT_AE_CORE_REQ, + OTX_CPT_SE_CORE_REQ +}; + +enum otx_cpt_cipher_type { + OTX_CPT_CIPHER_NULL = 0x0, + OTX_CPT_DES3_CBC = 0x1, + OTX_CPT_DES3_ECB = 0x2, + OTX_CPT_AES_CBC = 0x3, + OTX_CPT_AES_ECB = 0x4, + OTX_CPT_AES_CFB = 0x5, + OTX_CPT_AES_CTR = 0x6, + OTX_CPT_AES_GCM = 0x7, + OTX_CPT_AES_XTS = 0x8 +}; + +enum otx_cpt_mac_type { + OTX_CPT_MAC_NULL = 0x0, + OTX_CPT_MD5 = 0x1, + OTX_CPT_SHA1 = 0x2, + OTX_CPT_SHA224 = 0x3, + OTX_CPT_SHA256 = 0x4, + OTX_CPT_SHA384 = 0x5, + OTX_CPT_SHA512 = 0x6, + OTX_CPT_GMAC = 0x7 +}; + +enum otx_cpt_aes_key_len { + OTX_CPT_AES_128_BIT = 0x1, + OTX_CPT_AES_192_BIT = 0x2, + OTX_CPT_AES_256_BIT = 0x3 +}; + +union otx_cpt_encr_ctrl { + u64 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 enc_cipher:4; + u64 reserved1:1; + u64 aes_key:2; + u64 iv_source:1; + u64 mac_type:4; + u64 reserved2:3; + u64 auth_input_type:1; + u64 mac_len:8; + u64 reserved3:8; + u64 encr_offset:16; + u64 iv_offset:8; + u64 auth_offset:8; +#else + u64 auth_offset:8; + u64 iv_offset:8; + u64 encr_offset:16; + u64 reserved3:8; + u64 mac_len:8; + u64 auth_input_type:1; + u64 reserved2:3; + u64 mac_type:4; + u64 iv_source:1; + u64 aes_key:2; + u64 reserved1:1; + u64 enc_cipher:4; +#endif + } e; +}; + +struct otx_cpt_cipher { + const char *name; + u8 value; +}; + +struct otx_cpt_enc_context { + union otx_cpt_encr_ctrl enc_ctrl; + u8 encr_key[32]; + u8 encr_iv[16]; +}; + +union otx_cpt_fchmac_ctx { + struct { + u8 ipad[64]; + u8 opad[64]; + } e; + struct { + u8 hmac_calc[64]; /* HMAC calculated */ + u8 hmac_recv[64]; /* HMAC received */ + } s; +}; + +struct otx_cpt_fc_ctx { + struct otx_cpt_enc_context enc; + union otx_cpt_fchmac_ctx hmac; +}; + +struct otx_cpt_enc_ctx { + u32 key_len; + u8 enc_key[OTX_CPT_MAX_KEY_SIZE]; + u8 cipher_type; + u8 key_type; +}; + +struct otx_cpt_des3_ctx { + u32 key_len; + u8 des3_key[OTX_CPT_MAX_KEY_SIZE]; +}; + +union otx_cpt_offset_ctrl_word { + u64 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved:32; + u64 enc_data_offset:16; + u64 iv_offset:8; + u64 auth_offset:8; +#else + u64 auth_offset:8; + u64 iv_offset:8; + u64 enc_data_offset:16; + u64 reserved:32; +#endif + } e; +}; + +struct otx_cpt_req_ctx { + struct otx_cpt_req_info cpt_req; + union otx_cpt_offset_ctrl_word ctrl_word; + struct otx_cpt_fc_ctx fctx; +}; + +struct otx_cpt_sdesc { + struct shash_desc shash; +}; + +struct otx_cpt_aead_ctx { + u8 key[OTX_CPT_MAX_KEY_SIZE]; + struct crypto_shash *hashalg; + struct otx_cpt_sdesc *sdesc; + u8 *ipad; + u8 *opad; + u32 enc_key_len; + u32 auth_key_len; + u8 cipher_type; + u8 mac_type; + u8 key_type; + u8 is_trunc_hmac; +}; +int otx_cpt_crypto_init(struct pci_dev *pdev, struct module *mod, + enum otx_cptpf_type pf_type, + enum otx_cptvf_type engine_type, + int num_queues, int num_devices); +void otx_cpt_crypto_exit(struct pci_dev *pdev, struct module *mod, + enum otx_cptvf_type engine_type); +void otx_cpt_callback(int status, void *arg, void *req); + +#endif /* __OTX_CPT_ALGS_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c new file mode 100644 index 000000000000..a91860b5dc77 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c @@ -0,0 +1,985 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/interrupt.h> +#include <linux/module.h> +#include "otx_cptvf.h" +#include "otx_cptvf_algs.h" +#include "otx_cptvf_reqmgr.h" + +#define DRV_NAME "octeontx-cptvf" +#define DRV_VERSION "1.0" + +static void vq_work_handler(unsigned long data) +{ + struct otx_cptvf_wqe_info *cwqe_info = + (struct otx_cptvf_wqe_info *) data; + + otx_cpt_post_process(&cwqe_info->vq_wqe[0]); +} + +static int init_worker_threads(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct otx_cptvf_wqe_info *cwqe_info; + int i; + + cwqe_info = kzalloc(sizeof(*cwqe_info), GFP_KERNEL); + if (!cwqe_info) + return -ENOMEM; + + if (cptvf->num_queues) { + dev_dbg(&pdev->dev, "Creating VQ worker threads (%d)\n", + cptvf->num_queues); + } + + for (i = 0; i < cptvf->num_queues; i++) { + tasklet_init(&cwqe_info->vq_wqe[i].twork, vq_work_handler, + (u64)cwqe_info); + cwqe_info->vq_wqe[i].cptvf = cptvf; + } + cptvf->wqe_info = cwqe_info; + + return 0; +} + +static void cleanup_worker_threads(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct otx_cptvf_wqe_info *cwqe_info; + int i; + + cwqe_info = (struct otx_cptvf_wqe_info *)cptvf->wqe_info; + if (!cwqe_info) + return; + + if (cptvf->num_queues) { + dev_dbg(&pdev->dev, "Cleaning VQ worker threads (%u)\n", + cptvf->num_queues); + } + + for (i = 0; i < cptvf->num_queues; i++) + tasklet_kill(&cwqe_info->vq_wqe[i].twork); + + kzfree(cwqe_info); + cptvf->wqe_info = NULL; +} + +static void free_pending_queues(struct otx_cpt_pending_qinfo *pqinfo) +{ + struct otx_cpt_pending_queue *queue; + int i; + + for_each_pending_queue(pqinfo, queue, i) { + if (!queue->head) + continue; + + /* free single queue */ + kzfree((queue->head)); + queue->front = 0; + queue->rear = 0; + queue->qlen = 0; + } + pqinfo->num_queues = 0; +} + +static int alloc_pending_queues(struct otx_cpt_pending_qinfo *pqinfo, u32 qlen, + u32 num_queues) +{ + struct otx_cpt_pending_queue *queue = NULL; + size_t size; + int ret; + u32 i; + + pqinfo->num_queues = num_queues; + size = (qlen * sizeof(struct otx_cpt_pending_entry)); + + for_each_pending_queue(pqinfo, queue, i) { + queue->head = kzalloc((size), GFP_KERNEL); + if (!queue->head) { + ret = -ENOMEM; + goto pending_qfail; + } + + queue->pending_count = 0; + queue->front = 0; + queue->rear = 0; + queue->qlen = qlen; + + /* init queue spin lock */ + spin_lock_init(&queue->lock); + } + return 0; + +pending_qfail: + free_pending_queues(pqinfo); + + return ret; +} + +static int init_pending_queues(struct otx_cptvf *cptvf, u32 qlen, + u32 num_queues) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret; + + if (!num_queues) + return 0; + + ret = alloc_pending_queues(&cptvf->pqinfo, qlen, num_queues); + if (ret) { + dev_err(&pdev->dev, "Failed to setup pending queues (%u)\n", + num_queues); + return ret; + } + return 0; +} + +static void cleanup_pending_queues(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (!cptvf->num_queues) + return; + + dev_dbg(&pdev->dev, "Cleaning VQ pending queue (%u)\n", + cptvf->num_queues); + free_pending_queues(&cptvf->pqinfo); +} + +static void free_command_queues(struct otx_cptvf *cptvf, + struct otx_cpt_cmd_qinfo *cqinfo) +{ + struct otx_cpt_cmd_queue *queue = NULL; + struct otx_cpt_cmd_chunk *chunk = NULL; + struct pci_dev *pdev = cptvf->pdev; + int i; + + /* clean up for each queue */ + for (i = 0; i < cptvf->num_queues; i++) { + queue = &cqinfo->queue[i]; + + while (!list_empty(&cqinfo->queue[i].chead)) { + chunk = list_first_entry(&cqinfo->queue[i].chead, + struct otx_cpt_cmd_chunk, nextchunk); + + dma_free_coherent(&pdev->dev, chunk->size, + chunk->head, + chunk->dma_addr); + chunk->head = NULL; + chunk->dma_addr = 0; + list_del(&chunk->nextchunk); + kzfree(chunk); + } + queue->num_chunks = 0; + queue->idx = 0; + + } +} + +static int alloc_command_queues(struct otx_cptvf *cptvf, + struct otx_cpt_cmd_qinfo *cqinfo, + u32 qlen) +{ + struct otx_cpt_cmd_chunk *curr, *first, *last; + struct otx_cpt_cmd_queue *queue = NULL; + struct pci_dev *pdev = cptvf->pdev; + size_t q_size, c_size, rem_q_size; + u32 qcsize_bytes; + int i; + + + /* Qsize in dwords, needed for SADDR config, 1-next chunk pointer */ + cptvf->qsize = min(qlen, cqinfo->qchunksize) * + OTX_CPT_NEXT_CHUNK_PTR_SIZE + 1; + /* Qsize in bytes to create space for alignment */ + q_size = qlen * OTX_CPT_INST_SIZE; + + qcsize_bytes = cqinfo->qchunksize * OTX_CPT_INST_SIZE; + + /* per queue initialization */ + for (i = 0; i < cptvf->num_queues; i++) { + c_size = 0; + rem_q_size = q_size; + first = NULL; + last = NULL; + + queue = &cqinfo->queue[i]; + INIT_LIST_HEAD(&queue->chead); + do { + curr = kzalloc(sizeof(*curr), GFP_KERNEL); + if (!curr) + goto cmd_qfail; + + c_size = (rem_q_size > qcsize_bytes) ? qcsize_bytes : + rem_q_size; + curr->head = dma_alloc_coherent(&pdev->dev, + c_size + OTX_CPT_NEXT_CHUNK_PTR_SIZE, + &curr->dma_addr, GFP_KERNEL); + if (!curr->head) { + dev_err(&pdev->dev, + "Command Q (%d) chunk (%d) allocation failed\n", + i, queue->num_chunks); + goto free_curr; + } + curr->size = c_size; + + if (queue->num_chunks == 0) { + first = curr; + queue->base = first; + } + list_add_tail(&curr->nextchunk, + &cqinfo->queue[i].chead); + + queue->num_chunks++; + rem_q_size -= c_size; + if (last) + *((u64 *)(&last->head[last->size])) = + (u64)curr->dma_addr; + + last = curr; + } while (rem_q_size); + + /* + * Make the queue circular, tie back last chunk entry to head + */ + curr = first; + *((u64 *)(&last->head[last->size])) = (u64)curr->dma_addr; + queue->qhead = curr; + } + return 0; +free_curr: + kfree(curr); +cmd_qfail: + free_command_queues(cptvf, cqinfo); + return -ENOMEM; +} + +static int init_command_queues(struct otx_cptvf *cptvf, u32 qlen) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret; + + /* setup command queues */ + ret = alloc_command_queues(cptvf, &cptvf->cqinfo, qlen); + if (ret) { + dev_err(&pdev->dev, "Failed to allocate command queues (%u)\n", + cptvf->num_queues); + return ret; + } + return ret; +} + +static void cleanup_command_queues(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (!cptvf->num_queues) + return; + + dev_dbg(&pdev->dev, "Cleaning VQ command queue (%u)\n", + cptvf->num_queues); + free_command_queues(cptvf, &cptvf->cqinfo); +} + +static void cptvf_sw_cleanup(struct otx_cptvf *cptvf) +{ + cleanup_worker_threads(cptvf); + cleanup_pending_queues(cptvf); + cleanup_command_queues(cptvf); +} + +static int cptvf_sw_init(struct otx_cptvf *cptvf, u32 qlen, u32 num_queues) +{ + struct pci_dev *pdev = cptvf->pdev; + u32 max_dev_queues = 0; + int ret; + + max_dev_queues = OTX_CPT_NUM_QS_PER_VF; + /* possible cpus */ + num_queues = min_t(u32, num_queues, max_dev_queues); + cptvf->num_queues = num_queues; + + ret = init_command_queues(cptvf, qlen); + if (ret) { + dev_err(&pdev->dev, "Failed to setup command queues (%u)\n", + num_queues); + return ret; + } + + ret = init_pending_queues(cptvf, qlen, num_queues); + if (ret) { + dev_err(&pdev->dev, "Failed to setup pending queues (%u)\n", + num_queues); + goto setup_pqfail; + } + + /* Create worker threads for BH processing */ + ret = init_worker_threads(cptvf); + if (ret) { + dev_err(&pdev->dev, "Failed to setup worker threads\n"); + goto init_work_fail; + } + return 0; + +init_work_fail: + cleanup_worker_threads(cptvf); + cleanup_pending_queues(cptvf); + +setup_pqfail: + cleanup_command_queues(cptvf); + + return ret; +} + +static void cptvf_free_irq_affinity(struct otx_cptvf *cptvf, int vec) +{ + irq_set_affinity_hint(pci_irq_vector(cptvf->pdev, vec), NULL); + free_cpumask_var(cptvf->affinity_mask[vec]); +} + +static void cptvf_write_vq_ctl(struct otx_cptvf *cptvf, bool val) +{ + union otx_cptx_vqx_ctl vqx_ctl; + + vqx_ctl.u = readq(cptvf->reg_base + OTX_CPT_VQX_CTL(0)); + vqx_ctl.s.ena = val; + writeq(vqx_ctl.u, cptvf->reg_base + OTX_CPT_VQX_CTL(0)); +} + +void otx_cptvf_write_vq_doorbell(struct otx_cptvf *cptvf, u32 val) +{ + union otx_cptx_vqx_doorbell vqx_dbell; + + vqx_dbell.u = readq(cptvf->reg_base + OTX_CPT_VQX_DOORBELL(0)); + vqx_dbell.s.dbell_cnt = val * 8; /* Num of Instructions * 8 words */ + writeq(vqx_dbell.u, cptvf->reg_base + OTX_CPT_VQX_DOORBELL(0)); +} + +static void cptvf_write_vq_inprog(struct otx_cptvf *cptvf, u8 val) +{ + union otx_cptx_vqx_inprog vqx_inprg; + + vqx_inprg.u = readq(cptvf->reg_base + OTX_CPT_VQX_INPROG(0)); + vqx_inprg.s.inflight = val; + writeq(vqx_inprg.u, cptvf->reg_base + OTX_CPT_VQX_INPROG(0)); +} + +static void cptvf_write_vq_done_numwait(struct otx_cptvf *cptvf, u32 val) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + vqx_dwait.s.num_wait = val; + writeq(vqx_dwait.u, cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); +} + +static u32 cptvf_read_vq_done_numwait(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + return vqx_dwait.s.num_wait; +} + +static void cptvf_write_vq_done_timewait(struct otx_cptvf *cptvf, u16 time) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + vqx_dwait.s.time_wait = time; + writeq(vqx_dwait.u, cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); +} + + +static u16 cptvf_read_vq_done_timewait(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + return vqx_dwait.s.time_wait; +} + +static void cptvf_enable_swerr_interrupts(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_ena_w1s vqx_misc_ena; + + vqx_misc_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); + /* Enable SWERR interrupts for the requested VF */ + vqx_misc_ena.s.swerr = 1; + writeq(vqx_misc_ena.u, cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); +} + +static void cptvf_enable_mbox_interrupts(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_ena_w1s vqx_misc_ena; + + vqx_misc_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); + /* Enable MBOX interrupt for the requested VF */ + vqx_misc_ena.s.mbox = 1; + writeq(vqx_misc_ena.u, cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); +} + +static void cptvf_enable_done_interrupts(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done_ena_w1s vqx_done_ena; + + vqx_done_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_ENA_W1S(0)); + /* Enable DONE interrupt for the requested VF */ + vqx_done_ena.s.done = 1; + writeq(vqx_done_ena.u, cptvf->reg_base + OTX_CPT_VQX_DONE_ENA_W1S(0)); +} + +static void cptvf_clear_dovf_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.dovf = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_irde_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.irde = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_nwrp_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.nwrp = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_mbox_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.mbox = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_swerr_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.swerr = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static u64 cptvf_read_vf_misc_intr_status(struct otx_cptvf *cptvf) +{ + return readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static irqreturn_t cptvf_misc_intr_handler(int __always_unused irq, + void *arg) +{ + struct otx_cptvf *cptvf = arg; + struct pci_dev *pdev = cptvf->pdev; + u64 intr; + + intr = cptvf_read_vf_misc_intr_status(cptvf); + /* Check for MISC interrupt types */ + if (likely(intr & OTX_CPT_VF_INTR_MBOX_MASK)) { + dev_dbg(&pdev->dev, "Mailbox interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + otx_cptvf_handle_mbox_intr(cptvf); + cptvf_clear_mbox_intr(cptvf); + } else if (unlikely(intr & OTX_CPT_VF_INTR_DOVF_MASK)) { + cptvf_clear_dovf_intr(cptvf); + /* Clear doorbell count */ + otx_cptvf_write_vq_doorbell(cptvf, 0); + dev_err(&pdev->dev, + "Doorbell overflow error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & OTX_CPT_VF_INTR_IRDE_MASK)) { + cptvf_clear_irde_intr(cptvf); + dev_err(&pdev->dev, + "Instruction NCB read error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & OTX_CPT_VF_INTR_NWRP_MASK)) { + cptvf_clear_nwrp_intr(cptvf); + dev_err(&pdev->dev, + "NCB response write error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & OTX_CPT_VF_INTR_SERR_MASK)) { + cptvf_clear_swerr_intr(cptvf); + dev_err(&pdev->dev, + "Software error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else { + dev_err(&pdev->dev, "Unhandled interrupt in OTX_CPT VF %d\n", + cptvf->vfid); + } + + return IRQ_HANDLED; +} + +static inline struct otx_cptvf_wqe *get_cptvf_vq_wqe(struct otx_cptvf *cptvf, + int qno) +{ + struct otx_cptvf_wqe_info *nwqe_info; + + if (unlikely(qno >= cptvf->num_queues)) + return NULL; + nwqe_info = (struct otx_cptvf_wqe_info *)cptvf->wqe_info; + + return &nwqe_info->vq_wqe[qno]; +} + +static inline u32 cptvf_read_vq_done_count(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done vqx_done; + + vqx_done.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE(0)); + return vqx_done.s.done; +} + +static inline void cptvf_write_vq_done_ack(struct otx_cptvf *cptvf, + u32 ackcnt) +{ + union otx_cptx_vqx_done_ack vqx_dack_cnt; + + vqx_dack_cnt.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_ACK(0)); + vqx_dack_cnt.s.done_ack = ackcnt; + writeq(vqx_dack_cnt.u, cptvf->reg_base + OTX_CPT_VQX_DONE_ACK(0)); +} + +static irqreturn_t cptvf_done_intr_handler(int __always_unused irq, + void *cptvf_dev) +{ + struct otx_cptvf *cptvf = (struct otx_cptvf *)cptvf_dev; + struct pci_dev *pdev = cptvf->pdev; + /* Read the number of completions */ + u32 intr = cptvf_read_vq_done_count(cptvf); + + if (intr) { + struct otx_cptvf_wqe *wqe; + + /* + * Acknowledge the number of scheduled completions for + * processing + */ + cptvf_write_vq_done_ack(cptvf, intr); + wqe = get_cptvf_vq_wqe(cptvf, 0); + if (unlikely(!wqe)) { + dev_err(&pdev->dev, "No work to schedule for VF (%d)", + cptvf->vfid); + return IRQ_NONE; + } + tasklet_hi_schedule(&wqe->twork); + } + + return IRQ_HANDLED; +} + +static void cptvf_set_irq_affinity(struct otx_cptvf *cptvf, int vec) +{ + struct pci_dev *pdev = cptvf->pdev; + int cpu; + + if (!zalloc_cpumask_var(&cptvf->affinity_mask[vec], + GFP_KERNEL)) { + dev_err(&pdev->dev, + "Allocation failed for affinity_mask for VF %d", + cptvf->vfid); + return; + } + + cpu = cptvf->vfid % num_online_cpus(); + cpumask_set_cpu(cpumask_local_spread(cpu, cptvf->node), + cptvf->affinity_mask[vec]); + irq_set_affinity_hint(pci_irq_vector(pdev, vec), + cptvf->affinity_mask[vec]); +} + +static void cptvf_write_vq_saddr(struct otx_cptvf *cptvf, u64 val) +{ + union otx_cptx_vqx_saddr vqx_saddr; + + vqx_saddr.u = val; + writeq(vqx_saddr.u, cptvf->reg_base + OTX_CPT_VQX_SADDR(0)); +} + +static void cptvf_device_init(struct otx_cptvf *cptvf) +{ + u64 base_addr = 0; + + /* Disable the VQ */ + cptvf_write_vq_ctl(cptvf, 0); + /* Reset the doorbell */ + otx_cptvf_write_vq_doorbell(cptvf, 0); + /* Clear inflight */ + cptvf_write_vq_inprog(cptvf, 0); + /* Write VQ SADDR */ + base_addr = (u64)(cptvf->cqinfo.queue[0].qhead->dma_addr); + cptvf_write_vq_saddr(cptvf, base_addr); + /* Configure timerhold / coalescence */ + cptvf_write_vq_done_timewait(cptvf, OTX_CPT_TIMER_HOLD); + cptvf_write_vq_done_numwait(cptvf, OTX_CPT_COUNT_HOLD); + /* Enable the VQ */ + cptvf_write_vq_ctl(cptvf, 1); + /* Flag the VF ready */ + cptvf->flags |= OTX_CPT_FLAG_DEVICE_READY; +} + +static ssize_t vf_type_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + char *msg; + + switch (cptvf->vftype) { + case OTX_CPT_AE_TYPES: + msg = "AE"; + break; + + case OTX_CPT_SE_TYPES: + msg = "SE"; + break; + + default: + msg = "Invalid"; + } + + return scnprintf(buf, PAGE_SIZE, "%s\n", msg); +} + +static ssize_t vf_engine_group_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", cptvf->vfgrp); +} + +static ssize_t vf_engine_group_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + int val, ret; + + ret = kstrtoint(buf, 10, &val); + if (ret) + return ret; + + if (val < 0) + return -EINVAL; + + if (val >= OTX_CPT_MAX_ENGINE_GROUPS) { + dev_err(dev, "Engine group >= than max available groups %d", + OTX_CPT_MAX_ENGINE_GROUPS); + return -EINVAL; + } + + ret = otx_cptvf_send_vf_to_grp_msg(cptvf, val); + if (ret) + return ret; + + return count; +} + +static ssize_t vf_coalesc_time_wait_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + cptvf_read_vq_done_timewait(cptvf)); +} + +static ssize_t vf_coalesc_num_wait_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + cptvf_read_vq_done_numwait(cptvf)); +} + +static ssize_t vf_coalesc_time_wait_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + long val; + int ret; + + ret = kstrtol(buf, 10, &val); + if (ret != 0) + return ret; + + if (val < OTX_CPT_COALESC_MIN_TIME_WAIT || + val > OTX_CPT_COALESC_MAX_TIME_WAIT) + return -EINVAL; + + cptvf_write_vq_done_timewait(cptvf, val); + return count; +} + +static ssize_t vf_coalesc_num_wait_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + long val; + int ret; + + ret = kstrtol(buf, 10, &val); + if (ret != 0) + return ret; + + if (val < OTX_CPT_COALESC_MIN_NUM_WAIT || + val > OTX_CPT_COALESC_MAX_NUM_WAIT) + return -EINVAL; + + cptvf_write_vq_done_numwait(cptvf, val); + return count; +} + +static DEVICE_ATTR_RO(vf_type); +static DEVICE_ATTR_RW(vf_engine_group); +static DEVICE_ATTR_RW(vf_coalesc_time_wait); +static DEVICE_ATTR_RW(vf_coalesc_num_wait); + +static struct attribute *otx_cptvf_attrs[] = { + &dev_attr_vf_type.attr, + &dev_attr_vf_engine_group.attr, + &dev_attr_vf_coalesc_time_wait.attr, + &dev_attr_vf_coalesc_num_wait.attr, + NULL +}; + +static const struct attribute_group otx_cptvf_sysfs_group = { + .attrs = otx_cptvf_attrs, +}; + +static int otx_cptvf_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct otx_cptvf *cptvf; + int err; + + cptvf = devm_kzalloc(dev, sizeof(*cptvf), GFP_KERNEL); + if (!cptvf) + return -ENOMEM; + + pci_set_drvdata(pdev, cptvf); + cptvf->pdev = pdev; + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + goto clear_drvdata; + } + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto disable_device; + } + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto release_regions; + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); + goto release_regions; + } + + /* MAP PF's configuration registers */ + cptvf->reg_base = pci_iomap(pdev, OTX_CPT_VF_PCI_CFG_BAR, 0); + if (!cptvf->reg_base) { + dev_err(dev, "Cannot map config register space, aborting\n"); + err = -ENOMEM; + goto release_regions; + } + + cptvf->node = dev_to_node(&pdev->dev); + err = pci_alloc_irq_vectors(pdev, OTX_CPT_VF_MSIX_VECTORS, + OTX_CPT_VF_MSIX_VECTORS, PCI_IRQ_MSIX); + if (err < 0) { + dev_err(dev, "Request for #%d msix vectors failed\n", + OTX_CPT_VF_MSIX_VECTORS); + goto unmap_region; + } + + err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), + cptvf_misc_intr_handler, 0, "CPT VF misc intr", + cptvf); + if (err) { + dev_err(dev, "Failed to request misc irq"); + goto free_vectors; + } + + /* Enable mailbox interrupt */ + cptvf_enable_mbox_interrupts(cptvf); + cptvf_enable_swerr_interrupts(cptvf); + + /* Check cpt pf status, gets chip ID / device Id from PF if ready */ + err = otx_cptvf_check_pf_ready(cptvf); + if (err) + goto free_misc_irq; + + /* CPT VF software resources initialization */ + cptvf->cqinfo.qchunksize = OTX_CPT_CMD_QCHUNK_SIZE; + err = cptvf_sw_init(cptvf, OTX_CPT_CMD_QLEN, OTX_CPT_NUM_QS_PER_VF); + if (err) { + dev_err(dev, "cptvf_sw_init() failed"); + goto free_misc_irq; + } + /* Convey VQ LEN to PF */ + err = otx_cptvf_send_vq_size_msg(cptvf); + if (err) + goto sw_cleanup; + + /* CPT VF device initialization */ + cptvf_device_init(cptvf); + /* Send msg to PF to assign currnet Q to required group */ + err = otx_cptvf_send_vf_to_grp_msg(cptvf, cptvf->vfgrp); + if (err) + goto sw_cleanup; + + cptvf->priority = 1; + err = otx_cptvf_send_vf_priority_msg(cptvf); + if (err) + goto sw_cleanup; + + err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), + cptvf_done_intr_handler, 0, "CPT VF done intr", + cptvf); + if (err) { + dev_err(dev, "Failed to request done irq\n"); + goto free_done_irq; + } + + /* Enable done interrupt */ + cptvf_enable_done_interrupts(cptvf); + + /* Set irq affinity masks */ + cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); + cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + + err = otx_cptvf_send_vf_up(cptvf); + if (err) + goto free_irq_affinity; + + /* Initialize algorithms and set ops */ + err = otx_cpt_crypto_init(pdev, THIS_MODULE, + cptvf->vftype == OTX_CPT_SE_TYPES ? OTX_CPT_SE : OTX_CPT_AE, + cptvf->vftype, 1, cptvf->num_vfs); + if (err) { + dev_err(dev, "Failed to register crypto algs\n"); + goto free_irq_affinity; + } + + err = sysfs_create_group(&dev->kobj, &otx_cptvf_sysfs_group); + if (err) { + dev_err(dev, "Creating sysfs entries failed\n"); + goto crypto_exit; + } + + return 0; + +crypto_exit: + otx_cpt_crypto_exit(pdev, THIS_MODULE, cptvf->vftype); +free_irq_affinity: + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); +free_done_irq: + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), cptvf); +sw_cleanup: + cptvf_sw_cleanup(cptvf); +free_misc_irq: + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf); +free_vectors: + pci_free_irq_vectors(cptvf->pdev); +unmap_region: + pci_iounmap(pdev, cptvf->reg_base); +release_regions: + pci_release_regions(pdev); +disable_device: + pci_disable_device(pdev); +clear_drvdata: + pci_set_drvdata(pdev, NULL); + + return err; +} + +static void otx_cptvf_remove(struct pci_dev *pdev) +{ + struct otx_cptvf *cptvf = pci_get_drvdata(pdev); + + if (!cptvf) { + dev_err(&pdev->dev, "Invalid CPT-VF device\n"); + return; + } + + /* Convey DOWN to PF */ + if (otx_cptvf_send_vf_down(cptvf)) { + dev_err(&pdev->dev, "PF not responding to DOWN msg"); + } else { + sysfs_remove_group(&pdev->dev.kobj, &otx_cptvf_sysfs_group); + otx_cpt_crypto_exit(pdev, THIS_MODULE, cptvf->vftype); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), cptvf); + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf); + cptvf_sw_cleanup(cptvf); + pci_free_irq_vectors(cptvf->pdev); + pci_iounmap(pdev, cptvf->reg_base); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + } +} + +/* Supported devices */ +static const struct pci_device_id otx_cptvf_id_table[] = { + {PCI_VDEVICE(CAVIUM, OTX_CPT_PCI_VF_DEVICE_ID), 0}, + { 0, } /* end of table */ +}; + +static struct pci_driver otx_cptvf_pci_driver = { + .name = DRV_NAME, + .id_table = otx_cptvf_id_table, + .probe = otx_cptvf_probe, + .remove = otx_cptvf_remove, +}; + +module_pci_driver(otx_cptvf_pci_driver); + +MODULE_AUTHOR("Marvell International Ltd."); +MODULE_DESCRIPTION("Marvell OcteonTX CPT Virtual Function Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, otx_cptvf_id_table); diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c b/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c new file mode 100644 index 000000000000..5663787c7a62 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/delay.h> +#include "otx_cptvf.h" + +#define CPT_MBOX_MSG_TIMEOUT 2000 + +static char *get_mbox_opcode_str(int msg_opcode) +{ + char *str = "Unknown"; + + switch (msg_opcode) { + case OTX_CPT_MSG_VF_UP: + str = "UP"; + break; + + case OTX_CPT_MSG_VF_DOWN: + str = "DOWN"; + break; + + case OTX_CPT_MSG_READY: + str = "READY"; + break; + + case OTX_CPT_MSG_QLEN: + str = "QLEN"; + break; + + case OTX_CPT_MSG_QBIND_GRP: + str = "QBIND_GRP"; + break; + + case OTX_CPT_MSG_VQ_PRIORITY: + str = "VQ_PRIORITY"; + break; + + case OTX_CPT_MSG_PF_TYPE: + str = "PF_TYPE"; + break; + + case OTX_CPT_MSG_ACK: + str = "ACK"; + break; + + case OTX_CPT_MSG_NACK: + str = "NACK"; + break; + } + return str; +} + +static void dump_mbox_msg(struct otx_cpt_mbox *mbox_msg, int vf_id) +{ + char raw_data_str[OTX_CPT_MAX_MBOX_DATA_STR_SIZE]; + + hex_dump_to_buffer(mbox_msg, sizeof(struct otx_cpt_mbox), 16, 8, + raw_data_str, OTX_CPT_MAX_MBOX_DATA_STR_SIZE, false); + if (vf_id >= 0) + pr_debug("MBOX msg %s received from VF%d raw_data %s", + get_mbox_opcode_str(mbox_msg->msg), vf_id, + raw_data_str); + else + pr_debug("MBOX msg %s received from PF raw_data %s", + get_mbox_opcode_str(mbox_msg->msg), raw_data_str); +} + +static void cptvf_send_msg_to_pf(struct otx_cptvf *cptvf, + struct otx_cpt_mbox *mbx) +{ + /* Writing mbox(1) causes interrupt */ + writeq(mbx->msg, cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 0)); + writeq(mbx->data, cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 1)); +} + +/* Interrupt handler to handle mailbox messages from VFs */ +void otx_cptvf_handle_mbox_intr(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + + /* + * MBOX[0] contains msg + * MBOX[1] contains data + */ + mbx.msg = readq(cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 0)); + mbx.data = readq(cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 1)); + + dump_mbox_msg(&mbx, -1); + + switch (mbx.msg) { + case OTX_CPT_MSG_VF_UP: + cptvf->pf_acked = true; + cptvf->num_vfs = mbx.data; + break; + case OTX_CPT_MSG_READY: + cptvf->pf_acked = true; + cptvf->vfid = mbx.data; + dev_dbg(&cptvf->pdev->dev, "Received VFID %d\n", cptvf->vfid); + break; + case OTX_CPT_MSG_QBIND_GRP: + cptvf->pf_acked = true; + cptvf->vftype = mbx.data; + dev_dbg(&cptvf->pdev->dev, "VF %d type %s group %d\n", + cptvf->vfid, + ((mbx.data == OTX_CPT_SE_TYPES) ? "SE" : "AE"), + cptvf->vfgrp); + break; + case OTX_CPT_MSG_ACK: + cptvf->pf_acked = true; + break; + case OTX_CPT_MSG_NACK: + cptvf->pf_nacked = true; + break; + default: + dev_err(&cptvf->pdev->dev, "Invalid msg from PF, msg 0x%llx\n", + mbx.msg); + break; + } +} + +static int cptvf_send_msg_to_pf_timeout(struct otx_cptvf *cptvf, + struct otx_cpt_mbox *mbx) +{ + int timeout = CPT_MBOX_MSG_TIMEOUT; + int sleep = 10; + + cptvf->pf_acked = false; + cptvf->pf_nacked = false; + cptvf_send_msg_to_pf(cptvf, mbx); + /* Wait for previous message to be acked, timeout 2sec */ + while (!cptvf->pf_acked) { + if (cptvf->pf_nacked) + return -EINVAL; + msleep(sleep); + if (cptvf->pf_acked) + break; + timeout -= sleep; + if (!timeout) { + dev_err(&cptvf->pdev->dev, + "PF didn't ack to mbox msg %llx from VF%u\n", + mbx->msg, cptvf->vfid); + return -EBUSY; + } + } + return 0; +} + +/* + * Checks if VF is able to comminicate with PF + * and also gets the CPT number this VF is associated to. + */ +int otx_cptvf_check_pf_ready(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_READY; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + + return ret; +} + +/* + * Communicate VQs size to PF to program CPT(0)_PF_Q(0-15)_CTL of the VF. + * Must be ACKed. + */ +int otx_cptvf_send_vq_size_msg(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_QLEN; + mbx.data = cptvf->qsize; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + + return ret; +} + +/* + * Communicate VF group required to PF and get the VQ binded to that group + */ +int otx_cptvf_send_vf_to_grp_msg(struct otx_cptvf *cptvf, int group) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_QBIND_GRP; + /* Convey group of the VF */ + mbx.data = group; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + if (ret) + return ret; + cptvf->vfgrp = group; + + return 0; +} + +/* + * Communicate VF group required to PF and get the VQ binded to that group + */ +int otx_cptvf_send_vf_priority_msg(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_VQ_PRIORITY; + /* Convey group of the VF */ + mbx.data = cptvf->priority; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + + return ret; +} + +/* + * Communicate to PF that VF is UP and running + */ +int otx_cptvf_send_vf_up(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_VF_UP; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + + return ret; +} + +/* + * Communicate to PF that VF is DOWN and running + */ +int otx_cptvf_send_vf_down(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_VF_DOWN; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + + return ret; +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c new file mode 100644 index 000000000000..df839b880354 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c @@ -0,0 +1,612 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "otx_cptvf.h" +#include "otx_cptvf_algs.h" + +/* Completion code size and initial value */ +#define COMPLETION_CODE_SIZE 8 +#define COMPLETION_CODE_INIT 0 + +/* SG list header size in bytes */ +#define SG_LIST_HDR_SIZE 8 + +/* Default timeout when waiting for free pending entry in us */ +#define CPT_PENTRY_TIMEOUT 1000 +#define CPT_PENTRY_STEP 50 + +/* Default threshold for stopping and resuming sender requests */ +#define CPT_IQ_STOP_MARGIN 128 +#define CPT_IQ_RESUME_MARGIN 512 + +#define CPT_DMA_ALIGN 128 + +void otx_cpt_dump_sg_list(struct pci_dev *pdev, struct otx_cpt_req_info *req) +{ + int i; + + pr_debug("Gather list size %d\n", req->incnt); + for (i = 0; i < req->incnt; i++) { + pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i, + req->in[i].size, req->in[i].vptr, + (void *) req->in[i].dma_addr); + pr_debug("Buffer hexdump (%d bytes)\n", + req->in[i].size); + print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, + req->in[i].vptr, req->in[i].size, false); + } + + pr_debug("Scatter list size %d\n", req->outcnt); + for (i = 0; i < req->outcnt; i++) { + pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i, + req->out[i].size, req->out[i].vptr, + (void *) req->out[i].dma_addr); + pr_debug("Buffer hexdump (%d bytes)\n", req->out[i].size); + print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, + req->out[i].vptr, req->out[i].size, false); + } +} + +static inline struct otx_cpt_pending_entry *get_free_pending_entry( + struct otx_cpt_pending_queue *q, + int qlen) +{ + struct otx_cpt_pending_entry *ent = NULL; + + ent = &q->head[q->rear]; + if (unlikely(ent->busy)) + return NULL; + + q->rear++; + if (unlikely(q->rear == qlen)) + q->rear = 0; + + return ent; +} + +static inline u32 modulo_inc(u32 index, u32 length, u32 inc) +{ + if (WARN_ON(inc > length)) + inc = length; + + index += inc; + if (unlikely(index >= length)) + index -= length; + + return index; +} + +static inline void free_pentry(struct otx_cpt_pending_entry *pentry) +{ + pentry->completion_addr = NULL; + pentry->info = NULL; + pentry->callback = NULL; + pentry->areq = NULL; + pentry->resume_sender = false; + pentry->busy = false; +} + +static inline int setup_sgio_components(struct pci_dev *pdev, + struct otx_cpt_buf_ptr *list, + int buf_count, u8 *buffer) +{ + struct otx_cpt_sglist_component *sg_ptr = NULL; + int ret = 0, i, j; + int components; + + if (unlikely(!list)) { + dev_err(&pdev->dev, "Input list pointer is NULL\n"); + return -EFAULT; + } + + for (i = 0; i < buf_count; i++) { + if (likely(list[i].vptr)) { + list[i].dma_addr = dma_map_single(&pdev->dev, + list[i].vptr, + list[i].size, + DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, + list[i].dma_addr))) { + dev_err(&pdev->dev, "Dma mapping failed\n"); + ret = -EIO; + goto sg_cleanup; + } + } + } + + components = buf_count / 4; + sg_ptr = (struct otx_cpt_sglist_component *)buffer; + for (i = 0; i < components; i++) { + sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size); + sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size); + sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size); + sg_ptr->u.s.len3 = cpu_to_be16(list[i * 4 + 3].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); + sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); + sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); + sg_ptr->ptr3 = cpu_to_be64(list[i * 4 + 3].dma_addr); + sg_ptr++; + } + components = buf_count % 4; + + switch (components) { + case 3: + sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size); + sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); + /* Fall through */ + case 2: + sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size); + sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); + /* Fall through */ + case 1: + sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); + break; + default: + break; + } + return ret; + +sg_cleanup: + for (j = 0; j < i; j++) { + if (list[j].dma_addr) { + dma_unmap_single(&pdev->dev, list[i].dma_addr, + list[i].size, DMA_BIDIRECTIONAL); + } + + list[j].dma_addr = 0; + } + return ret; +} + +static inline int setup_sgio_list(struct pci_dev *pdev, + struct otx_cpt_info_buffer **pinfo, + struct otx_cpt_req_info *req, gfp_t gfp) +{ + u32 dlen, align_dlen, info_len, rlen; + struct otx_cpt_info_buffer *info; + u16 g_sz_bytes, s_sz_bytes; + int align = CPT_DMA_ALIGN; + u32 total_mem_len; + + if (unlikely(req->incnt > OTX_CPT_MAX_SG_IN_CNT || + req->outcnt > OTX_CPT_MAX_SG_OUT_CNT)) { + dev_err(&pdev->dev, "Error too many sg components\n"); + return -EINVAL; + } + + g_sz_bytes = ((req->incnt + 3) / 4) * + sizeof(struct otx_cpt_sglist_component); + s_sz_bytes = ((req->outcnt + 3) / 4) * + sizeof(struct otx_cpt_sglist_component); + + dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE; + align_dlen = ALIGN(dlen, align); + info_len = ALIGN(sizeof(*info), align); + rlen = ALIGN(sizeof(union otx_cpt_res_s), align); + total_mem_len = align_dlen + info_len + rlen + COMPLETION_CODE_SIZE; + + info = kzalloc(total_mem_len, gfp); + if (unlikely(!info)) { + dev_err(&pdev->dev, "Memory allocation failed\n"); + return -ENOMEM; + } + *pinfo = info; + info->dlen = dlen; + info->in_buffer = (u8 *)info + info_len; + + ((u16 *)info->in_buffer)[0] = req->outcnt; + ((u16 *)info->in_buffer)[1] = req->incnt; + ((u16 *)info->in_buffer)[2] = 0; + ((u16 *)info->in_buffer)[3] = 0; + *(u64 *)info->in_buffer = cpu_to_be64p((u64 *)info->in_buffer); + + /* Setup gather (input) components */ + if (setup_sgio_components(pdev, req->in, req->incnt, + &info->in_buffer[8])) { + dev_err(&pdev->dev, "Failed to setup gather list\n"); + return -EFAULT; + } + + if (setup_sgio_components(pdev, req->out, req->outcnt, + &info->in_buffer[8 + g_sz_bytes])) { + dev_err(&pdev->dev, "Failed to setup scatter list\n"); + return -EFAULT; + } + + info->dma_len = total_mem_len - info_len; + info->dptr_baddr = dma_map_single(&pdev->dev, (void *)info->in_buffer, + info->dma_len, DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) { + dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n"); + return -EIO; + } + /* + * Get buffer for union otx_cpt_res_s response + * structure and its physical address + */ + info->completion_addr = (u64 *)(info->in_buffer + align_dlen); + info->comp_baddr = info->dptr_baddr + align_dlen; + + /* Create and initialize RPTR */ + info->out_buffer = (u8 *)info->completion_addr + rlen; + info->rptr_baddr = info->comp_baddr + rlen; + + *((u64 *) info->out_buffer) = ~((u64) COMPLETION_CODE_INIT); + + return 0; +} + + +static void cpt_fill_inst(union otx_cpt_inst_s *inst, + struct otx_cpt_info_buffer *info, + struct otx_cpt_iq_cmd *cmd) +{ + inst->u[0] = 0x0; + inst->s.doneint = true; + inst->s.res_addr = (u64)info->comp_baddr; + inst->u[2] = 0x0; + inst->s.wq_ptr = 0; + inst->s.ei0 = cmd->cmd.u64; + inst->s.ei1 = cmd->dptr; + inst->s.ei2 = cmd->rptr; + inst->s.ei3 = cmd->cptr.u64; +} + +/* + * On OcteonTX platform the parameter db_count is used as a count for ringing + * door bell. The valid values for db_count are: + * 0 - 1 CPT instruction will be enqueued however CPT will not be informed + * 1 - 1 CPT instruction will be enqueued and CPT will be informed + */ +static void cpt_send_cmd(union otx_cpt_inst_s *cptinst, struct otx_cptvf *cptvf) +{ + struct otx_cpt_cmd_qinfo *qinfo = &cptvf->cqinfo; + struct otx_cpt_cmd_queue *queue; + struct otx_cpt_cmd_chunk *curr; + u8 *ent; + + queue = &qinfo->queue[0]; + /* + * cpt_send_cmd is currently called only from critical section + * therefore no locking is required for accessing instruction queue + */ + ent = &queue->qhead->head[queue->idx * OTX_CPT_INST_SIZE]; + memcpy(ent, (void *) cptinst, OTX_CPT_INST_SIZE); + + if (++queue->idx >= queue->qhead->size / 64) { + curr = queue->qhead; + + if (list_is_last(&curr->nextchunk, &queue->chead)) + queue->qhead = queue->base; + else + queue->qhead = list_next_entry(queue->qhead, nextchunk); + queue->idx = 0; + } + /* make sure all memory stores are done before ringing doorbell */ + smp_wmb(); + otx_cptvf_write_vq_doorbell(cptvf, 1); +} + +static int process_request(struct pci_dev *pdev, struct otx_cpt_req_info *req, + struct otx_cpt_pending_queue *pqueue, + struct otx_cptvf *cptvf) +{ + struct otx_cptvf_request *cpt_req = &req->req; + struct otx_cpt_pending_entry *pentry = NULL; + union otx_cpt_ctrl_info *ctrl = &req->ctrl; + struct otx_cpt_info_buffer *info = NULL; + union otx_cpt_res_s *result = NULL; + struct otx_cpt_iq_cmd iq_cmd; + union otx_cpt_inst_s cptinst; + int retry, ret = 0; + u8 resume_sender; + gfp_t gfp; + + gfp = (req->areq->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : + GFP_ATOMIC; + ret = setup_sgio_list(pdev, &info, req, gfp); + if (unlikely(ret)) { + dev_err(&pdev->dev, "Setting up SG list failed"); + goto request_cleanup; + } + cpt_req->dlen = info->dlen; + + result = (union otx_cpt_res_s *) info->completion_addr; + result->s.compcode = COMPLETION_CODE_INIT; + + spin_lock_bh(&pqueue->lock); + pentry = get_free_pending_entry(pqueue, pqueue->qlen); + retry = CPT_PENTRY_TIMEOUT / CPT_PENTRY_STEP; + while (unlikely(!pentry) && retry--) { + spin_unlock_bh(&pqueue->lock); + udelay(CPT_PENTRY_STEP); + spin_lock_bh(&pqueue->lock); + pentry = get_free_pending_entry(pqueue, pqueue->qlen); + } + + if (unlikely(!pentry)) { + ret = -ENOSPC; + spin_unlock_bh(&pqueue->lock); + goto request_cleanup; + } + + /* + * Check if we are close to filling in entire pending queue, + * if so then tell the sender to stop/sleep by returning -EBUSY + * We do it only for context which can sleep (GFP_KERNEL) + */ + if (gfp == GFP_KERNEL && + pqueue->pending_count > (pqueue->qlen - CPT_IQ_STOP_MARGIN)) { + pentry->resume_sender = true; + } else + pentry->resume_sender = false; + resume_sender = pentry->resume_sender; + pqueue->pending_count++; + + pentry->completion_addr = info->completion_addr; + pentry->info = info; + pentry->callback = req->callback; + pentry->areq = req->areq; + pentry->busy = true; + info->pentry = pentry; + info->time_in = jiffies; + info->req = req; + + /* Fill in the command */ + iq_cmd.cmd.u64 = 0; + iq_cmd.cmd.s.opcode = cpu_to_be16(cpt_req->opcode.flags); + iq_cmd.cmd.s.param1 = cpu_to_be16(cpt_req->param1); + iq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2); + iq_cmd.cmd.s.dlen = cpu_to_be16(cpt_req->dlen); + + /* 64-bit swap for microcode data reads, not needed for addresses*/ + iq_cmd.cmd.u64 = cpu_to_be64(iq_cmd.cmd.u64); + iq_cmd.dptr = info->dptr_baddr; + iq_cmd.rptr = info->rptr_baddr; + iq_cmd.cptr.u64 = 0; + iq_cmd.cptr.s.grp = ctrl->s.grp; + + /* Fill in the CPT_INST_S type command for HW interpretation */ + cpt_fill_inst(&cptinst, info, &iq_cmd); + + /* Print debug info if enabled */ + otx_cpt_dump_sg_list(pdev, req); + pr_debug("Cpt_inst_s hexdump (%d bytes)\n", OTX_CPT_INST_SIZE); + print_hex_dump_debug("", 0, 16, 1, &cptinst, OTX_CPT_INST_SIZE, false); + pr_debug("Dptr hexdump (%d bytes)\n", cpt_req->dlen); + print_hex_dump_debug("", 0, 16, 1, info->in_buffer, + cpt_req->dlen, false); + + /* Send CPT command */ + cpt_send_cmd(&cptinst, cptvf); + + /* + * We allocate and prepare pending queue entry in critical section + * together with submitting CPT instruction to CPT instruction queue + * to make sure that order of CPT requests is the same in both + * pending and instruction queues + */ + spin_unlock_bh(&pqueue->lock); + + ret = resume_sender ? -EBUSY : -EINPROGRESS; + return ret; + +request_cleanup: + do_request_cleanup(pdev, info); + return ret; +} + +int otx_cpt_do_request(struct pci_dev *pdev, struct otx_cpt_req_info *req, + int cpu_num) +{ + struct otx_cptvf *cptvf = pci_get_drvdata(pdev); + + if (!otx_cpt_device_ready(cptvf)) { + dev_err(&pdev->dev, "CPT Device is not ready"); + return -ENODEV; + } + + if ((cptvf->vftype == OTX_CPT_SE_TYPES) && (!req->ctrl.s.se_req)) { + dev_err(&pdev->dev, "CPTVF-%d of SE TYPE got AE request", + cptvf->vfid); + return -EINVAL; + } else if ((cptvf->vftype == OTX_CPT_AE_TYPES) && + (req->ctrl.s.se_req)) { + dev_err(&pdev->dev, "CPTVF-%d of AE TYPE got SE request", + cptvf->vfid); + return -EINVAL; + } + + return process_request(pdev, req, &cptvf->pqinfo.queue[0], cptvf); +} + +static int cpt_process_ccode(struct pci_dev *pdev, + union otx_cpt_res_s *cpt_status, + struct otx_cpt_info_buffer *cpt_info, + struct otx_cpt_req_info *req, u32 *res_code) +{ + u8 ccode = cpt_status->s.compcode; + union otx_cpt_error_code ecode; + + ecode.u = be64_to_cpu(*((u64 *) cpt_info->out_buffer)); + switch (ccode) { + case CPT_COMP_E_FAULT: + dev_err(&pdev->dev, + "Request failed with DMA fault\n"); + otx_cpt_dump_sg_list(pdev, req); + break; + + case CPT_COMP_E_SWERR: + dev_err(&pdev->dev, + "Request failed with software error code %d\n", + ecode.s.ccode); + otx_cpt_dump_sg_list(pdev, req); + break; + + case CPT_COMP_E_HWERR: + dev_err(&pdev->dev, + "Request failed with hardware error\n"); + otx_cpt_dump_sg_list(pdev, req); + break; + + case COMPLETION_CODE_INIT: + /* check for timeout */ + if (time_after_eq(jiffies, cpt_info->time_in + + OTX_CPT_COMMAND_TIMEOUT * HZ)) + dev_warn(&pdev->dev, "Request timed out 0x%p", req); + else if (cpt_info->extra_time < OTX_CPT_TIME_IN_RESET_COUNT) { + cpt_info->time_in = jiffies; + cpt_info->extra_time++; + } + return 1; + + case CPT_COMP_E_GOOD: + /* Check microcode completion code */ + if (ecode.s.ccode) { + /* + * If requested hmac is truncated and ucode returns + * s/g write length error then we report success + * because ucode writes as many bytes of calculated + * hmac as available in gather buffer and reports + * s/g write length error if number of bytes in gather + * buffer is less than full hmac size. + */ + if (req->is_trunc_hmac && + ecode.s.ccode == ERR_SCATTER_GATHER_WRITE_LENGTH) { + *res_code = 0; + break; + } + + dev_err(&pdev->dev, + "Request failed with software error code 0x%x\n", + ecode.s.ccode); + otx_cpt_dump_sg_list(pdev, req); + break; + } + + /* Request has been processed with success */ + *res_code = 0; + break; + + default: + dev_err(&pdev->dev, "Request returned invalid status\n"); + break; + } + + return 0; +} + +static inline void process_pending_queue(struct pci_dev *pdev, + struct otx_cpt_pending_queue *pqueue) +{ + void (*callback)(int status, void *arg1, void *arg2); + struct otx_cpt_pending_entry *resume_pentry = NULL; + struct otx_cpt_pending_entry *pentry = NULL; + struct otx_cpt_info_buffer *cpt_info = NULL; + union otx_cpt_res_s *cpt_status = NULL; + struct otx_cpt_req_info *req = NULL; + struct crypto_async_request *areq; + u32 res_code, resume_index; + + while (1) { + spin_lock_bh(&pqueue->lock); + pentry = &pqueue->head[pqueue->front]; + + if (WARN_ON(!pentry)) { + spin_unlock_bh(&pqueue->lock); + break; + } + + res_code = -EINVAL; + if (unlikely(!pentry->busy)) { + spin_unlock_bh(&pqueue->lock); + break; + } + + if (unlikely(!pentry->callback)) { + dev_err(&pdev->dev, "Callback NULL\n"); + goto process_pentry; + } + + cpt_info = pentry->info; + if (unlikely(!cpt_info)) { + dev_err(&pdev->dev, "Pending entry post arg NULL\n"); + goto process_pentry; + } + + req = cpt_info->req; + if (unlikely(!req)) { + dev_err(&pdev->dev, "Request NULL\n"); + goto process_pentry; + } + + cpt_status = (union otx_cpt_res_s *) pentry->completion_addr; + if (unlikely(!cpt_status)) { + dev_err(&pdev->dev, "Completion address NULL\n"); + goto process_pentry; + } + + if (cpt_process_ccode(pdev, cpt_status, cpt_info, req, + &res_code)) { + spin_unlock_bh(&pqueue->lock); + return; + } + cpt_info->pdev = pdev; + +process_pentry: + /* + * Check if we should inform sending side to resume + * We do it CPT_IQ_RESUME_MARGIN elements in advance before + * pending queue becomes empty + */ + resume_index = modulo_inc(pqueue->front, pqueue->qlen, + CPT_IQ_RESUME_MARGIN); + resume_pentry = &pqueue->head[resume_index]; + if (resume_pentry && + resume_pentry->resume_sender) { + resume_pentry->resume_sender = false; + callback = resume_pentry->callback; + areq = resume_pentry->areq; + + if (callback) { + spin_unlock_bh(&pqueue->lock); + + /* + * EINPROGRESS is an indication for sending + * side that it can resume sending requests + */ + callback(-EINPROGRESS, areq, cpt_info); + spin_lock_bh(&pqueue->lock); + } + } + + callback = pentry->callback; + areq = pentry->areq; + free_pentry(pentry); + + pqueue->pending_count--; + pqueue->front = modulo_inc(pqueue->front, pqueue->qlen, 1); + spin_unlock_bh(&pqueue->lock); + + /* + * Call callback after current pending entry has been + * processed, we don't do it if the callback pointer is + * invalid. + */ + if (callback) + callback(res_code, areq, cpt_info); + } +} + +void otx_cpt_post_process(struct otx_cptvf_wqe *wqe) +{ + process_pending_queue(wqe->cptvf->pdev, &wqe->cptvf->pqinfo.queue[0]); +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h new file mode 100644 index 000000000000..a4c9ff730b13 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h @@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPTVF_REQUEST_MANAGER_H +#define __OTX_CPTVF_REQUEST_MANAGER_H + +#include <linux/types.h> +#include <linux/crypto.h> +#include <linux/pci.h> +#include "otx_cpt_hw_types.h" + +/* + * Maximum total number of SG buffers is 100, we divide it equally + * between input and output + */ +#define OTX_CPT_MAX_SG_IN_CNT 50 +#define OTX_CPT_MAX_SG_OUT_CNT 50 + +/* DMA mode direct or SG */ +#define OTX_CPT_DMA_DIRECT_DIRECT 0 +#define OTX_CPT_DMA_GATHER_SCATTER 1 + +/* Context source CPTR or DPTR */ +#define OTX_CPT_FROM_CPTR 0 +#define OTX_CPT_FROM_DPTR 1 + +/* CPT instruction queue alignment */ +#define OTX_CPT_INST_Q_ALIGNMENT 128 +#define OTX_CPT_MAX_REQ_SIZE 65535 + +/* Default command timeout in seconds */ +#define OTX_CPT_COMMAND_TIMEOUT 4 +#define OTX_CPT_TIMER_HOLD 0x03F +#define OTX_CPT_COUNT_HOLD 32 +#define OTX_CPT_TIME_IN_RESET_COUNT 5 + +/* Minimum and maximum values for interrupt coalescing */ +#define OTX_CPT_COALESC_MIN_TIME_WAIT 0x0 +#define OTX_CPT_COALESC_MAX_TIME_WAIT ((1<<16)-1) +#define OTX_CPT_COALESC_MIN_NUM_WAIT 0x0 +#define OTX_CPT_COALESC_MAX_NUM_WAIT ((1<<20)-1) + +union otx_cpt_opcode_info { + u16 flags; + struct { + u8 major; + u8 minor; + } s; +}; + +struct otx_cptvf_request { + u32 param1; + u32 param2; + u16 dlen; + union otx_cpt_opcode_info opcode; +}; + +struct otx_cpt_buf_ptr { + u8 *vptr; + dma_addr_t dma_addr; + u16 size; +}; + +union otx_cpt_ctrl_info { + u32 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u32 reserved0:26; + u32 grp:3; /* Group bits */ + u32 dma_mode:2; /* DMA mode */ + u32 se_req:1; /* To SE core */ +#else + u32 se_req:1; /* To SE core */ + u32 dma_mode:2; /* DMA mode */ + u32 grp:3; /* Group bits */ + u32 reserved0:26; +#endif + } s; +}; + +/* + * CPT_INST_S software command definitions + * Words EI (0-3) + */ +union otx_cpt_iq_cmd_word0 { + u64 u64; + struct { + u16 opcode; + u16 param1; + u16 param2; + u16 dlen; + } s; +}; + +union otx_cpt_iq_cmd_word3 { + u64 u64; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 grp:3; + u64 cptr:61; +#else + u64 cptr:61; + u64 grp:3; +#endif + } s; +}; + +struct otx_cpt_iq_cmd { + union otx_cpt_iq_cmd_word0 cmd; + u64 dptr; + u64 rptr; + union otx_cpt_iq_cmd_word3 cptr; +}; + +struct otx_cpt_sglist_component { + union { + u64 len; + struct { + u16 len0; + u16 len1; + u16 len2; + u16 len3; + } s; + } u; + u64 ptr0; + u64 ptr1; + u64 ptr2; + u64 ptr3; +}; + +struct otx_cpt_pending_entry { + u64 *completion_addr; /* Completion address */ + struct otx_cpt_info_buffer *info; + /* Kernel async request callback */ + void (*callback)(int status, void *arg1, void *arg2); + struct crypto_async_request *areq; /* Async request callback arg */ + u8 resume_sender; /* Notify sender to resume sending requests */ + u8 busy; /* Entry status (free/busy) */ +}; + +struct otx_cpt_pending_queue { + struct otx_cpt_pending_entry *head; /* Head of the queue */ + u32 front; /* Process work from here */ + u32 rear; /* Append new work here */ + u32 pending_count; /* Pending requests count */ + u32 qlen; /* Queue length */ + spinlock_t lock; /* Queue lock */ +}; + +struct otx_cpt_req_info { + /* Kernel async request callback */ + void (*callback)(int status, void *arg1, void *arg2); + struct crypto_async_request *areq; /* Async request callback arg */ + struct otx_cptvf_request req;/* Request information (core specific) */ + union otx_cpt_ctrl_info ctrl;/* User control information */ + struct otx_cpt_buf_ptr in[OTX_CPT_MAX_SG_IN_CNT]; + struct otx_cpt_buf_ptr out[OTX_CPT_MAX_SG_OUT_CNT]; + u8 *iv_out; /* IV to send back */ + u16 rlen; /* Output length */ + u8 incnt; /* Number of input buffers */ + u8 outcnt; /* Number of output buffers */ + u8 req_type; /* Type of request */ + u8 is_enc; /* Is a request an encryption request */ + u8 is_trunc_hmac;/* Is truncated hmac used */ +}; + +struct otx_cpt_info_buffer { + struct otx_cpt_pending_entry *pentry; + struct otx_cpt_req_info *req; + struct pci_dev *pdev; + u64 *completion_addr; + u8 *out_buffer; + u8 *in_buffer; + dma_addr_t dptr_baddr; + dma_addr_t rptr_baddr; + dma_addr_t comp_baddr; + unsigned long time_in; + u32 dlen; + u32 dma_len; + u8 extra_time; +}; + +static inline void do_request_cleanup(struct pci_dev *pdev, + struct otx_cpt_info_buffer *info) +{ + struct otx_cpt_req_info *req; + int i; + + if (info->dptr_baddr) + dma_unmap_single(&pdev->dev, info->dptr_baddr, + info->dma_len, DMA_BIDIRECTIONAL); + + if (info->req) { + req = info->req; + for (i = 0; i < req->outcnt; i++) { + if (req->out[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->out[i].dma_addr, + req->out[i].size, + DMA_BIDIRECTIONAL); + } + + for (i = 0; i < req->incnt; i++) { + if (req->in[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->in[i].dma_addr, + req->in[i].size, + DMA_BIDIRECTIONAL); + } + } + kzfree(info); +} + +struct otx_cptvf_wqe; +void otx_cpt_dump_sg_list(struct pci_dev *pdev, struct otx_cpt_req_info *req); +void otx_cpt_post_process(struct otx_cptvf_wqe *wqe); +int otx_cpt_do_request(struct pci_dev *pdev, struct otx_cpt_req_info *req, + int cpu_num); + +#endif /* __OTX_CPTVF_REQUEST_MANAGER_H */ diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c index 9e9f48bb7f85..bd6309e57ab8 100644 --- a/drivers/crypto/mediatek/mtk-sha.c +++ b/drivers/crypto/mediatek/mtk-sha.c @@ -107,7 +107,7 @@ struct mtk_sha_ctx { u8 id; u8 buf[SHA_BUF_SIZE] __aligned(sizeof(u32)); - struct mtk_sha_hmac_ctx base[0]; + struct mtk_sha_hmac_ctx base[]; }; struct mtk_sha_drv { diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 435ac1c83df9..d84530293036 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -20,6 +20,7 @@ #include <crypto/sha.h> #include <crypto/internal/hash.h> #include <crypto/internal/skcipher.h> +#include <crypto/scatterwalk.h> #define DCP_MAX_CHANS 4 #define DCP_BUF_SZ PAGE_SIZE @@ -611,49 +612,46 @@ static int dcp_sha_req_to_buf(struct crypto_async_request *arq) struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm); struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req); struct hash_alg_common *halg = crypto_hash_alg_common(tfm); - const int nents = sg_nents(req->src); uint8_t *in_buf = sdcp->coh->sha_in_buf; uint8_t *out_buf = sdcp->coh->sha_out_buf; - uint8_t *src_buf; - struct scatterlist *src; - unsigned int i, len, clen; + unsigned int i, len, clen, oft = 0; int ret; int fin = rctx->fini; if (fin) rctx->fini = 0; - for_each_sg(req->src, src, nents, i) { - src_buf = sg_virt(src); - len = sg_dma_len(src); - - do { - if (actx->fill + len > DCP_BUF_SZ) - clen = DCP_BUF_SZ - actx->fill; - else - clen = len; - - memcpy(in_buf + actx->fill, src_buf, clen); - len -= clen; - src_buf += clen; - actx->fill += clen; + src = req->src; + len = req->nbytes; - /* - * If we filled the buffer and still have some - * more data, submit the buffer. - */ - if (len && actx->fill == DCP_BUF_SZ) { - ret = mxs_dcp_run_sha(req); - if (ret) - return ret; - actx->fill = 0; - rctx->init = 0; - } - } while (len); + while (len) { + if (actx->fill + len > DCP_BUF_SZ) + clen = DCP_BUF_SZ - actx->fill; + else + clen = len; + + scatterwalk_map_and_copy(in_buf + actx->fill, src, oft, clen, + 0); + + len -= clen; + oft += clen; + actx->fill += clen; + + /* + * If we filled the buffer and still have some + * more data, submit the buffer. + */ + if (len && actx->fill == DCP_BUF_SZ) { + ret = mxs_dcp_run_sha(req); + if (ret) + return ret; + actx->fill = 0; + rctx->init = 0; + } } if (fin) { diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h index 91c54289124a..c6233173c612 100644 --- a/drivers/crypto/nx/nx.h +++ b/drivers/crypto/nx/nx.h @@ -37,7 +37,7 @@ struct max_sync_cop { u32 fc; u32 mode; u32 triplets; - struct msc_triplet trip[0]; + struct msc_triplet trip[]; } __packed; struct alg_props { diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 4f915a4ef5b0..e4072cd38585 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -159,7 +159,7 @@ struct omap_sham_reqctx { int sg_len; unsigned int total; /* total request */ - u8 buffer[0] OMAP_ALIGNED; + u8 buffer[] OMAP_ALIGNED; }; struct omap_sham_hmac_ctx { @@ -176,7 +176,7 @@ struct omap_sham_ctx { /* fallback stuff */ struct crypto_shash *fallback; - struct omap_sham_hmac_ctx base[0]; + struct omap_sham_hmac_ctx base[]; }; #define OMAP_SHAM_QUEUE_LENGTH 10 diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 833bb1d3a11b..e14d3dd291f0 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -97,7 +97,7 @@ struct qat_alg_cd { struct icp_qat_hw_cipher_algo_blk cipher; struct icp_qat_hw_auth_algo_blk hash; } qat_enc_cd; - struct qat_dec { /* Decrytp content desc */ + struct qat_dec { /* Decrypt content desc */ struct icp_qat_hw_auth_algo_blk hash; struct icp_qat_hw_cipher_algo_blk cipher; } qat_dec_cd; diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c index 3852d31ce0a4..fb504cee0305 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/qat/qat_common/qat_crypto.c @@ -250,8 +250,7 @@ static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev) char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES]; INIT_LIST_HEAD(&accel_dev->crypto_list); - strlcpy(key, ADF_NUM_CY, sizeof(key)); - if (adf_cfg_get_param_value(accel_dev, SEC, key, val)) + if (adf_cfg_get_param_value(accel_dev, SEC, ADF_NUM_CY, val)) return -EFAULT; if (kstrtoul(val, 0, &num_inst)) diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c index 629e7f34dc09..5006e74c40cd 100644 --- a/drivers/crypto/qce/common.c +++ b/drivers/crypto/qce/common.c @@ -15,8 +15,6 @@ #include "regs-v5.h" #include "sha.h" -#define QCE_SECTOR_SIZE 512 - static inline u32 qce_read(struct qce_device *qce, u32 offset) { return readl(qce->base + offset); diff --git a/drivers/crypto/qce/common.h b/drivers/crypto/qce/common.h index 282d4317470d..9f989cba0f1b 100644 --- a/drivers/crypto/qce/common.h +++ b/drivers/crypto/qce/common.h @@ -12,6 +12,9 @@ #include <crypto/hash.h> #include <crypto/internal/skcipher.h> +/* xts du size */ +#define QCE_SECTOR_SIZE 512 + /* key size in bytes */ #define QCE_SHA_HMAC_KEY_SIZE 64 #define QCE_MAX_CIPHER_KEY_SIZE AES_KEYSIZE_256 diff --git a/drivers/crypto/qce/dma.c b/drivers/crypto/qce/dma.c index 7da893dc00e7..46db5bf366b4 100644 --- a/drivers/crypto/qce/dma.c +++ b/drivers/crypto/qce/dma.c @@ -48,9 +48,10 @@ void qce_dma_release(struct qce_dma_data *dma) struct scatterlist * qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl, - int max_ents) + unsigned int max_len) { struct scatterlist *sg = sgt->sgl, *sg_last = NULL; + unsigned int new_len; while (sg) { if (!sg_page(sg)) @@ -61,13 +62,13 @@ qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl, if (!sg) return ERR_PTR(-EINVAL); - while (new_sgl && sg && max_ents) { - sg_set_page(sg, sg_page(new_sgl), new_sgl->length, - new_sgl->offset); + while (new_sgl && sg && max_len) { + new_len = new_sgl->length > max_len ? max_len : new_sgl->length; + sg_set_page(sg, sg_page(new_sgl), new_len, new_sgl->offset); sg_last = sg; sg = sg_next(sg); new_sgl = sg_next(new_sgl); - max_ents--; + max_len -= new_len; } return sg_last; diff --git a/drivers/crypto/qce/dma.h b/drivers/crypto/qce/dma.h index ed25a0d9829e..786402169360 100644 --- a/drivers/crypto/qce/dma.h +++ b/drivers/crypto/qce/dma.h @@ -43,6 +43,6 @@ void qce_dma_issue_pending(struct qce_dma_data *dma); int qce_dma_terminate_all(struct qce_dma_data *dma); struct scatterlist * qce_sgtable_add(struct sg_table *sgt, struct scatterlist *sg_add, - int max_ents); + unsigned int max_len); #endif /* _DMA_H_ */ diff --git a/drivers/crypto/qce/skcipher.c b/drivers/crypto/qce/skcipher.c index 4217b745f124..9412433f3b21 100644 --- a/drivers/crypto/qce/skcipher.c +++ b/drivers/crypto/qce/skcipher.c @@ -5,6 +5,7 @@ #include <linux/device.h> #include <linux/interrupt.h> +#include <linux/moduleparam.h> #include <linux/types.h> #include <crypto/aes.h> #include <crypto/internal/des.h> @@ -12,6 +13,13 @@ #include "cipher.h" +static unsigned int aes_sw_max_len = CONFIG_CRYPTO_DEV_QCE_SW_MAX_LEN; +module_param(aes_sw_max_len, uint, 0644); +MODULE_PARM_DESC(aes_sw_max_len, + "Only use hardware for AES requests larger than this " + "[0=always use hardware; anything <16 breaks AES-GCM; default=" + __stringify(CONFIG_CRYPTO_DEV_QCE_SW_MAX_LEN)"]"); + static LIST_HEAD(skcipher_algs); static void qce_skcipher_done(void *data) @@ -97,13 +105,14 @@ qce_skcipher_async_req_handle(struct crypto_async_request *async_req) sg_init_one(&rctx->result_sg, qce->dma.result_buf, QCE_RESULT_BUF_SZ); - sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, rctx->dst_nents - 1); + sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, req->cryptlen); if (IS_ERR(sg)) { ret = PTR_ERR(sg); goto error_free; } - sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg, 1); + sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg, + QCE_RESULT_BUF_SZ); if (IS_ERR(sg)) { ret = PTR_ERR(sg); goto error_free; @@ -165,15 +174,10 @@ static int qce_skcipher_setkey(struct crypto_skcipher *ablk, const u8 *key, switch (IS_XTS(flags) ? keylen >> 1 : keylen) { case AES_KEYSIZE_128: case AES_KEYSIZE_256: + memcpy(ctx->enc_key, key, keylen); break; - default: - goto fallback; } - ctx->enc_keylen = keylen; - memcpy(ctx->enc_key, key, keylen); - return 0; -fallback: ret = crypto_sync_skcipher_setkey(ctx->fallback, key, keylen); if (!ret) ctx->enc_keylen = keylen; @@ -223,8 +227,14 @@ static int qce_skcipher_crypt(struct skcipher_request *req, int encrypt) rctx->flags |= encrypt ? QCE_ENCRYPT : QCE_DECRYPT; keylen = IS_XTS(rctx->flags) ? ctx->enc_keylen >> 1 : ctx->enc_keylen; - if (IS_AES(rctx->flags) && keylen != AES_KEYSIZE_128 && - keylen != AES_KEYSIZE_256) { + /* qce is hanging when AES-XTS request len > QCE_SECTOR_SIZE and + * is not a multiple of it; pass such requests to the fallback + */ + if (IS_AES(rctx->flags) && + (((keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256) || + req->cryptlen <= aes_sw_max_len) || + (IS_XTS(rctx->flags) && req->cryptlen > QCE_SECTOR_SIZE && + req->cryptlen % QCE_SECTOR_SIZE))) { SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); skcipher_request_set_sync_tfm(subreq, ctx->fallback); diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index d66e20a2f54c..2a16800d2579 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -369,7 +369,7 @@ struct s5p_hash_reqctx { bool error; u32 bufcnt; - u8 buffer[0]; + u8 buffer[]; }; /** diff --git a/drivers/crypto/xilinx/Makefile b/drivers/crypto/xilinx/Makefile new file mode 100644 index 000000000000..534e32daf76a --- /dev/null +++ b/drivers/crypto/xilinx/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_AES) += zynqmp-aes-gcm.o diff --git a/drivers/crypto/xilinx/zynqmp-aes-gcm.c b/drivers/crypto/xilinx/zynqmp-aes-gcm.c new file mode 100644 index 000000000000..09f7f468eef8 --- /dev/null +++ b/drivers/crypto/xilinx/zynqmp-aes-gcm.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Xilinx ZynqMP AES Driver. + * Copyright (c) 2020 Xilinx Inc. + */ + +#include <crypto/aes.h> +#include <crypto/engine.h> +#include <crypto/gcm.h> +#include <crypto/internal/aead.h> +#include <crypto/scatterwalk.h> + +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> + +#include <linux/firmware/xlnx-zynqmp.h> + +#define ZYNQMP_DMA_BIT_MASK 32U + +#define ZYNQMP_AES_KEY_SIZE AES_KEYSIZE_256 +#define ZYNQMP_AES_AUTH_SIZE 16U +#define ZYNQMP_KEY_SRC_SEL_KEY_LEN 1U +#define ZYNQMP_AES_BLK_SIZE 1U +#define ZYNQMP_AES_MIN_INPUT_BLK_SIZE 4U +#define ZYNQMP_AES_WORD_LEN 4U + +#define ZYNQMP_AES_GCM_TAG_MISMATCH_ERR 0x01 +#define ZYNQMP_AES_WRONG_KEY_SRC_ERR 0x13 +#define ZYNQMP_AES_PUF_NOT_PROGRAMMED 0xE300 + +enum zynqmp_aead_op { + ZYNQMP_AES_DECRYPT = 0, + ZYNQMP_AES_ENCRYPT +}; + +enum zynqmp_aead_keysrc { + ZYNQMP_AES_KUP_KEY = 0, + ZYNQMP_AES_DEV_KEY, + ZYNQMP_AES_PUF_KEY +}; + +struct zynqmp_aead_drv_ctx { + union { + struct aead_alg aead; + } alg; + struct device *dev; + struct crypto_engine *engine; + const struct zynqmp_eemi_ops *eemi_ops; +}; + +struct zynqmp_aead_hw_req { + u64 src; + u64 iv; + u64 key; + u64 dst; + u64 size; + u64 op; + u64 keysrc; +}; + +struct zynqmp_aead_tfm_ctx { + struct crypto_engine_ctx engine_ctx; + struct device *dev; + u8 key[ZYNQMP_AES_KEY_SIZE]; + u8 *iv; + u32 keylen; + u32 authsize; + enum zynqmp_aead_keysrc keysrc; + struct crypto_aead *fbk_cipher; +}; + +struct zynqmp_aead_req_ctx { + enum zynqmp_aead_op op; +}; + +static int zynqmp_aes_aead_cipher(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct zynqmp_aead_tfm_ctx *tfm_ctx = crypto_aead_ctx(aead); + struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req); + struct device *dev = tfm_ctx->dev; + struct aead_alg *alg = crypto_aead_alg(aead); + struct zynqmp_aead_drv_ctx *drv_ctx; + struct zynqmp_aead_hw_req *hwreq; + dma_addr_t dma_addr_data, dma_addr_hw_req; + unsigned int data_size; + unsigned int status; + size_t dma_size; + char *kbuf; + int err; + + drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead); + + if (!drv_ctx->eemi_ops->aes) + return -ENOTSUPP; + + if (tfm_ctx->keysrc == ZYNQMP_AES_KUP_KEY) + dma_size = req->cryptlen + ZYNQMP_AES_KEY_SIZE + + GCM_AES_IV_SIZE; + else + dma_size = req->cryptlen + GCM_AES_IV_SIZE; + + kbuf = dma_alloc_coherent(dev, dma_size, &dma_addr_data, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + hwreq = dma_alloc_coherent(dev, sizeof(struct zynqmp_aead_hw_req), + &dma_addr_hw_req, GFP_KERNEL); + if (!hwreq) { + dma_free_coherent(dev, dma_size, kbuf, dma_addr_data); + return -ENOMEM; + } + + data_size = req->cryptlen; + scatterwalk_map_and_copy(kbuf, req->src, 0, req->cryptlen, 0); + memcpy(kbuf + data_size, req->iv, GCM_AES_IV_SIZE); + + hwreq->src = dma_addr_data; + hwreq->dst = dma_addr_data; + hwreq->iv = hwreq->src + data_size; + hwreq->keysrc = tfm_ctx->keysrc; + hwreq->op = rq_ctx->op; + + if (hwreq->op == ZYNQMP_AES_ENCRYPT) + hwreq->size = data_size; + else + hwreq->size = data_size - ZYNQMP_AES_AUTH_SIZE; + + if (hwreq->keysrc == ZYNQMP_AES_KUP_KEY) { + memcpy(kbuf + data_size + GCM_AES_IV_SIZE, + tfm_ctx->key, ZYNQMP_AES_KEY_SIZE); + + hwreq->key = hwreq->src + data_size + GCM_AES_IV_SIZE; + } else { + hwreq->key = 0; + } + + drv_ctx->eemi_ops->aes(dma_addr_hw_req, &status); + + if (status) { + switch (status) { + case ZYNQMP_AES_GCM_TAG_MISMATCH_ERR: + dev_err(dev, "ERROR: Gcm Tag mismatch\n"); + break; + case ZYNQMP_AES_WRONG_KEY_SRC_ERR: + dev_err(dev, "ERROR: Wrong KeySrc, enable secure mode\n"); + break; + case ZYNQMP_AES_PUF_NOT_PROGRAMMED: + dev_err(dev, "ERROR: PUF is not registered\n"); + break; + default: + dev_err(dev, "ERROR: Unknown error\n"); + break; + } + err = -status; + } else { + if (hwreq->op == ZYNQMP_AES_ENCRYPT) + data_size = data_size + ZYNQMP_AES_AUTH_SIZE; + else + data_size = data_size - ZYNQMP_AES_AUTH_SIZE; + + sg_copy_from_buffer(req->dst, sg_nents(req->dst), + kbuf, data_size); + err = 0; + } + + if (kbuf) { + memzero_explicit(kbuf, dma_size); + dma_free_coherent(dev, dma_size, kbuf, dma_addr_data); + } + if (hwreq) { + memzero_explicit(hwreq, sizeof(struct zynqmp_aead_hw_req)); + dma_free_coherent(dev, sizeof(struct zynqmp_aead_hw_req), + hwreq, dma_addr_hw_req); + } + return err; +} + +static int zynqmp_fallback_check(struct zynqmp_aead_tfm_ctx *tfm_ctx, + struct aead_request *req) +{ + int need_fallback = 0; + struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req); + + if (tfm_ctx->authsize != ZYNQMP_AES_AUTH_SIZE) + need_fallback = 1; + + if (tfm_ctx->keysrc == ZYNQMP_AES_KUP_KEY && + tfm_ctx->keylen != ZYNQMP_AES_KEY_SIZE) { + need_fallback = 1; + } + if (req->assoclen != 0 || + req->cryptlen < ZYNQMP_AES_MIN_INPUT_BLK_SIZE) { + need_fallback = 1; + } + if ((req->cryptlen % ZYNQMP_AES_WORD_LEN) != 0) + need_fallback = 1; + + if (rq_ctx->op == ZYNQMP_AES_DECRYPT && + req->cryptlen <= ZYNQMP_AES_AUTH_SIZE) { + need_fallback = 1; + } + return need_fallback; +} + +static int zynqmp_handle_aes_req(struct crypto_engine *engine, + void *req) +{ + struct aead_request *areq = + container_of(req, struct aead_request, base); + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct zynqmp_aead_tfm_ctx *tfm_ctx = crypto_aead_ctx(aead); + struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(areq); + struct aead_request *subreq = aead_request_ctx(req); + int need_fallback; + int err; + + need_fallback = zynqmp_fallback_check(tfm_ctx, areq); + + if (need_fallback) { + aead_request_set_tfm(subreq, tfm_ctx->fbk_cipher); + + aead_request_set_callback(subreq, areq->base.flags, + NULL, NULL); + aead_request_set_crypt(subreq, areq->src, areq->dst, + areq->cryptlen, areq->iv); + aead_request_set_ad(subreq, areq->assoclen); + if (rq_ctx->op == ZYNQMP_AES_ENCRYPT) + err = crypto_aead_encrypt(subreq); + else + err = crypto_aead_decrypt(subreq); + } else { + err = zynqmp_aes_aead_cipher(areq); + } + + crypto_finalize_aead_request(engine, areq, err); + return 0; +} + +static int zynqmp_aes_aead_setkey(struct crypto_aead *aead, const u8 *key, + unsigned int keylen) +{ + struct crypto_tfm *tfm = crypto_aead_tfm(aead); + struct zynqmp_aead_tfm_ctx *tfm_ctx = + (struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm); + unsigned char keysrc; + + if (keylen == ZYNQMP_KEY_SRC_SEL_KEY_LEN) { + keysrc = *key; + if (keysrc == ZYNQMP_AES_KUP_KEY || + keysrc == ZYNQMP_AES_DEV_KEY || + keysrc == ZYNQMP_AES_PUF_KEY) { + tfm_ctx->keysrc = (enum zynqmp_aead_keysrc)keysrc; + } else { + tfm_ctx->keylen = keylen; + } + } else { + tfm_ctx->keylen = keylen; + if (keylen == ZYNQMP_AES_KEY_SIZE) { + tfm_ctx->keysrc = ZYNQMP_AES_KUP_KEY; + memcpy(tfm_ctx->key, key, keylen); + } + } + + tfm_ctx->fbk_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + tfm_ctx->fbk_cipher->base.crt_flags |= (aead->base.crt_flags & + CRYPTO_TFM_REQ_MASK); + + return crypto_aead_setkey(tfm_ctx->fbk_cipher, key, keylen); +} + +static int zynqmp_aes_aead_setauthsize(struct crypto_aead *aead, + unsigned int authsize) +{ + struct crypto_tfm *tfm = crypto_aead_tfm(aead); + struct zynqmp_aead_tfm_ctx *tfm_ctx = + (struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm); + + tfm_ctx->authsize = authsize; + return crypto_aead_setauthsize(tfm_ctx->fbk_cipher, authsize); +} + +static int zynqmp_aes_aead_encrypt(struct aead_request *req) +{ + struct zynqmp_aead_drv_ctx *drv_ctx; + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct aead_alg *alg = crypto_aead_alg(aead); + struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req); + + rq_ctx->op = ZYNQMP_AES_ENCRYPT; + drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead); + + return crypto_transfer_aead_request_to_engine(drv_ctx->engine, req); +} + +static int zynqmp_aes_aead_decrypt(struct aead_request *req) +{ + struct zynqmp_aead_drv_ctx *drv_ctx; + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct aead_alg *alg = crypto_aead_alg(aead); + struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req); + + rq_ctx->op = ZYNQMP_AES_DECRYPT; + drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead); + + return crypto_transfer_aead_request_to_engine(drv_ctx->engine, req); +} + +static int zynqmp_aes_aead_init(struct crypto_aead *aead) +{ + struct crypto_tfm *tfm = crypto_aead_tfm(aead); + struct zynqmp_aead_tfm_ctx *tfm_ctx = + (struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm); + struct zynqmp_aead_drv_ctx *drv_ctx; + struct aead_alg *alg = crypto_aead_alg(aead); + + drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead); + tfm_ctx->dev = drv_ctx->dev; + + tfm_ctx->engine_ctx.op.do_one_request = zynqmp_handle_aes_req; + tfm_ctx->engine_ctx.op.prepare_request = NULL; + tfm_ctx->engine_ctx.op.unprepare_request = NULL; + + tfm_ctx->fbk_cipher = crypto_alloc_aead(drv_ctx->alg.aead.base.cra_name, + 0, + CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(tfm_ctx->fbk_cipher)) { + pr_err("%s() Error: failed to allocate fallback for %s\n", + __func__, drv_ctx->alg.aead.base.cra_name); + return PTR_ERR(tfm_ctx->fbk_cipher); + } + + crypto_aead_set_reqsize(aead, + max(sizeof(struct zynqmp_aead_req_ctx), + sizeof(struct aead_request) + + crypto_aead_reqsize(tfm_ctx->fbk_cipher))); + return 0; +} + +static void zynqmp_aes_aead_exit(struct crypto_aead *aead) +{ + struct crypto_tfm *tfm = crypto_aead_tfm(aead); + struct zynqmp_aead_tfm_ctx *tfm_ctx = + (struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm); + + if (tfm_ctx->fbk_cipher) { + crypto_free_aead(tfm_ctx->fbk_cipher); + tfm_ctx->fbk_cipher = NULL; + } + memzero_explicit(tfm_ctx, sizeof(struct zynqmp_aead_tfm_ctx)); +} + +static struct zynqmp_aead_drv_ctx aes_drv_ctx = { + .alg.aead = { + .setkey = zynqmp_aes_aead_setkey, + .setauthsize = zynqmp_aes_aead_setauthsize, + .encrypt = zynqmp_aes_aead_encrypt, + .decrypt = zynqmp_aes_aead_decrypt, + .init = zynqmp_aes_aead_init, + .exit = zynqmp_aes_aead_exit, + .ivsize = GCM_AES_IV_SIZE, + .maxauthsize = ZYNQMP_AES_AUTH_SIZE, + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "xilinx-zynqmp-aes-gcm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_AEAD | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = ZYNQMP_AES_BLK_SIZE, + .cra_ctxsize = sizeof(struct zynqmp_aead_tfm_ctx), + .cra_module = THIS_MODULE, + } + } +}; + +static int zynqmp_aes_aead_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + int err; + + /* ZynqMP AES driver supports only one instance */ + if (!aes_drv_ctx.dev) + aes_drv_ctx.dev = dev; + else + return -ENODEV; + + aes_drv_ctx.eemi_ops = zynqmp_pm_get_eemi_ops(); + if (IS_ERR(aes_drv_ctx.eemi_ops)) { + dev_err(dev, "Failed to get ZynqMP EEMI interface\n"); + return PTR_ERR(aes_drv_ctx.eemi_ops); + } + + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(ZYNQMP_DMA_BIT_MASK)); + if (err < 0) { + dev_err(dev, "No usable DMA configuration\n"); + return err; + } + + aes_drv_ctx.engine = crypto_engine_alloc_init(dev, 1); + if (!aes_drv_ctx.engine) { + dev_err(dev, "Cannot alloc AES engine\n"); + err = -ENOMEM; + goto err_engine; + } + + err = crypto_engine_start(aes_drv_ctx.engine); + if (err) { + dev_err(dev, "Cannot start AES engine\n"); + goto err_engine; + } + + err = crypto_register_aead(&aes_drv_ctx.alg.aead); + if (err < 0) { + dev_err(dev, "Failed to register AEAD alg.\n"); + goto err_aead; + } + return 0; + +err_aead: + crypto_unregister_aead(&aes_drv_ctx.alg.aead); + +err_engine: + if (aes_drv_ctx.engine) + crypto_engine_exit(aes_drv_ctx.engine); + + return err; +} + +static int zynqmp_aes_aead_remove(struct platform_device *pdev) +{ + crypto_engine_exit(aes_drv_ctx.engine); + crypto_unregister_aead(&aes_drv_ctx.alg.aead); + + return 0; +} + +static const struct of_device_id zynqmp_aes_dt_ids[] = { + { .compatible = "xlnx,zynqmp-aes" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, zynqmp_aes_dt_ids); + +static struct platform_driver zynqmp_aes_driver = { + .probe = zynqmp_aes_aead_probe, + .remove = zynqmp_aes_aead_remove, + .driver = { + .name = "zynqmp-aes", + .of_match_table = zynqmp_aes_dt_ids, + }, +}; + +module_platform_driver(zynqmp_aes_driver); +MODULE_LICENSE("GPL"); diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c index f8b03dcfd2f7..41b65164a367 100644 --- a/drivers/firmware/xilinx/zynqmp.c +++ b/drivers/firmware/xilinx/zynqmp.c @@ -709,6 +709,30 @@ static int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities, qos, ack, NULL); } +/** + * zynqmp_pm_aes - Access AES hardware to encrypt/decrypt the data using + * AES-GCM core. + * @address: Address of the AesParams structure. + * @out: Returned output value + * + * Return: Returns status, either success or error code. + */ +static int zynqmp_pm_aes_engine(const u64 address, u32 *out) +{ + u32 ret_payload[PAYLOAD_ARG_CNT]; + int ret; + + if (!out) + return -EINVAL; + + ret = zynqmp_pm_invoke_fn(PM_SECURE_AES, upper_32_bits(address), + lower_32_bits(address), + 0, 0, ret_payload); + *out = ret_payload[1]; + + return ret; +} + static const struct zynqmp_eemi_ops eemi_ops = { .get_api_version = zynqmp_pm_get_api_version, .get_chipid = zynqmp_pm_get_chipid, @@ -732,6 +756,7 @@ static const struct zynqmp_eemi_ops eemi_ops = { .set_requirement = zynqmp_pm_set_requirement, .fpga_load = zynqmp_pm_fpga_load, .fpga_get_status = zynqmp_pm_fpga_get_status, + .aes = zynqmp_pm_aes_engine, }; /** diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 7f0d48f406e3..99e151475d8f 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -480,4 +480,5 @@ source "drivers/misc/cxl/Kconfig" source "drivers/misc/ocxl/Kconfig" source "drivers/misc/cardreader/Kconfig" source "drivers/misc/habanalabs/Kconfig" +source "drivers/misc/uacce/Kconfig" endmenu diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index c1860d35dc7e..9abf2923d831 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -56,4 +56,5 @@ obj-$(CONFIG_OCXL) += ocxl/ obj-y += cardreader/ obj-$(CONFIG_PVPANIC) += pvpanic.o obj-$(CONFIG_HABANA_AI) += habanalabs/ +obj-$(CONFIG_UACCE) += uacce/ obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o diff --git a/drivers/misc/uacce/Kconfig b/drivers/misc/uacce/Kconfig new file mode 100644 index 000000000000..5e39b6083b0f --- /dev/null +++ b/drivers/misc/uacce/Kconfig @@ -0,0 +1,13 @@ +config UACCE + tristate "Accelerator Framework for User Land" + depends on IOMMU_API + help + UACCE provides interface for the user process to access the hardware + without interaction with the kernel space in data path. + + The user-space interface is described in + include/uapi/misc/uacce/uacce.h + + See Documentation/misc-devices/uacce.rst for more details. + + If you don't know what to do here, say N. diff --git a/drivers/misc/uacce/Makefile b/drivers/misc/uacce/Makefile new file mode 100644 index 000000000000..5b4374e8b5f2 --- /dev/null +++ b/drivers/misc/uacce/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +obj-$(CONFIG_UACCE) += uacce.o diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c new file mode 100644 index 000000000000..d39307f060bd --- /dev/null +++ b/drivers/misc/uacce/uacce.c @@ -0,0 +1,633 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/compat.h> +#include <linux/dma-mapping.h> +#include <linux/iommu.h> +#include <linux/module.h> +#include <linux/poll.h> +#include <linux/uacce.h> + +static struct class *uacce_class; +static dev_t uacce_devt; +static DEFINE_MUTEX(uacce_mutex); +static DEFINE_XARRAY_ALLOC(uacce_xa); + +static int uacce_start_queue(struct uacce_queue *q) +{ + int ret = 0; + + mutex_lock(&uacce_mutex); + + if (q->state != UACCE_Q_INIT) { + ret = -EINVAL; + goto out_with_lock; + } + + if (q->uacce->ops->start_queue) { + ret = q->uacce->ops->start_queue(q); + if (ret < 0) + goto out_with_lock; + } + + q->state = UACCE_Q_STARTED; + +out_with_lock: + mutex_unlock(&uacce_mutex); + + return ret; +} + +static int uacce_put_queue(struct uacce_queue *q) +{ + struct uacce_device *uacce = q->uacce; + + mutex_lock(&uacce_mutex); + + if (q->state == UACCE_Q_ZOMBIE) + goto out; + + if ((q->state == UACCE_Q_STARTED) && uacce->ops->stop_queue) + uacce->ops->stop_queue(q); + + if ((q->state == UACCE_Q_INIT || q->state == UACCE_Q_STARTED) && + uacce->ops->put_queue) + uacce->ops->put_queue(q); + + q->state = UACCE_Q_ZOMBIE; +out: + mutex_unlock(&uacce_mutex); + + return 0; +} + +static long uacce_fops_unl_ioctl(struct file *filep, + unsigned int cmd, unsigned long arg) +{ + struct uacce_queue *q = filep->private_data; + struct uacce_device *uacce = q->uacce; + + switch (cmd) { + case UACCE_CMD_START_Q: + return uacce_start_queue(q); + + case UACCE_CMD_PUT_Q: + return uacce_put_queue(q); + + default: + if (!uacce->ops->ioctl) + return -EINVAL; + + return uacce->ops->ioctl(q, cmd, arg); + } +} + +#ifdef CONFIG_COMPAT +static long uacce_fops_compat_ioctl(struct file *filep, + unsigned int cmd, unsigned long arg) +{ + arg = (unsigned long)compat_ptr(arg); + + return uacce_fops_unl_ioctl(filep, cmd, arg); +} +#endif + +static int uacce_sva_exit(struct device *dev, struct iommu_sva *handle, + void *data) +{ + struct uacce_mm *uacce_mm = data; + struct uacce_queue *q; + + /* + * No new queue can be added concurrently because no caller can have a + * reference to this mm. But there may be concurrent calls to + * uacce_mm_put(), so we need the lock. + */ + mutex_lock(&uacce_mm->lock); + list_for_each_entry(q, &uacce_mm->queues, list) + uacce_put_queue(q); + uacce_mm->mm = NULL; + mutex_unlock(&uacce_mm->lock); + + return 0; +} + +static struct iommu_sva_ops uacce_sva_ops = { + .mm_exit = uacce_sva_exit, +}; + +static struct uacce_mm *uacce_mm_get(struct uacce_device *uacce, + struct uacce_queue *q, + struct mm_struct *mm) +{ + struct uacce_mm *uacce_mm = NULL; + struct iommu_sva *handle = NULL; + int ret; + + lockdep_assert_held(&uacce->mm_lock); + + list_for_each_entry(uacce_mm, &uacce->mm_list, list) { + if (uacce_mm->mm == mm) { + mutex_lock(&uacce_mm->lock); + list_add(&q->list, &uacce_mm->queues); + mutex_unlock(&uacce_mm->lock); + return uacce_mm; + } + } + + uacce_mm = kzalloc(sizeof(*uacce_mm), GFP_KERNEL); + if (!uacce_mm) + return NULL; + + if (uacce->flags & UACCE_DEV_SVA) { + /* + * Safe to pass an incomplete uacce_mm, since mm_exit cannot + * fire while we hold a reference to the mm. + */ + handle = iommu_sva_bind_device(uacce->parent, mm, uacce_mm); + if (IS_ERR(handle)) + goto err_free; + + ret = iommu_sva_set_ops(handle, &uacce_sva_ops); + if (ret) + goto err_unbind; + + uacce_mm->pasid = iommu_sva_get_pasid(handle); + if (uacce_mm->pasid == IOMMU_PASID_INVALID) + goto err_unbind; + } + + uacce_mm->mm = mm; + uacce_mm->handle = handle; + INIT_LIST_HEAD(&uacce_mm->queues); + mutex_init(&uacce_mm->lock); + list_add(&q->list, &uacce_mm->queues); + list_add(&uacce_mm->list, &uacce->mm_list); + + return uacce_mm; + +err_unbind: + if (handle) + iommu_sva_unbind_device(handle); +err_free: + kfree(uacce_mm); + return NULL; +} + +static void uacce_mm_put(struct uacce_queue *q) +{ + struct uacce_mm *uacce_mm = q->uacce_mm; + + lockdep_assert_held(&q->uacce->mm_lock); + + mutex_lock(&uacce_mm->lock); + list_del(&q->list); + mutex_unlock(&uacce_mm->lock); + + if (list_empty(&uacce_mm->queues)) { + if (uacce_mm->handle) + iommu_sva_unbind_device(uacce_mm->handle); + list_del(&uacce_mm->list); + kfree(uacce_mm); + } +} + +static int uacce_fops_open(struct inode *inode, struct file *filep) +{ + struct uacce_mm *uacce_mm = NULL; + struct uacce_device *uacce; + struct uacce_queue *q; + int ret = 0; + + uacce = xa_load(&uacce_xa, iminor(inode)); + if (!uacce) + return -ENODEV; + + q = kzalloc(sizeof(struct uacce_queue), GFP_KERNEL); + if (!q) + return -ENOMEM; + + mutex_lock(&uacce->mm_lock); + uacce_mm = uacce_mm_get(uacce, q, current->mm); + mutex_unlock(&uacce->mm_lock); + if (!uacce_mm) { + ret = -ENOMEM; + goto out_with_mem; + } + + q->uacce = uacce; + q->uacce_mm = uacce_mm; + + if (uacce->ops->get_queue) { + ret = uacce->ops->get_queue(uacce, uacce_mm->pasid, q); + if (ret < 0) + goto out_with_mm; + } + + init_waitqueue_head(&q->wait); + filep->private_data = q; + uacce->inode = inode; + q->state = UACCE_Q_INIT; + + return 0; + +out_with_mm: + mutex_lock(&uacce->mm_lock); + uacce_mm_put(q); + mutex_unlock(&uacce->mm_lock); +out_with_mem: + kfree(q); + return ret; +} + +static int uacce_fops_release(struct inode *inode, struct file *filep) +{ + struct uacce_queue *q = filep->private_data; + struct uacce_device *uacce = q->uacce; + + uacce_put_queue(q); + + mutex_lock(&uacce->mm_lock); + uacce_mm_put(q); + mutex_unlock(&uacce->mm_lock); + + kfree(q); + + return 0; +} + +static vm_fault_t uacce_vma_fault(struct vm_fault *vmf) +{ + if (vmf->flags & (FAULT_FLAG_MKWRITE | FAULT_FLAG_WRITE)) + return VM_FAULT_SIGBUS; + + return 0; +} + +static void uacce_vma_close(struct vm_area_struct *vma) +{ + struct uacce_queue *q = vma->vm_private_data; + struct uacce_qfile_region *qfr = NULL; + + if (vma->vm_pgoff < UACCE_MAX_REGION) + qfr = q->qfrs[vma->vm_pgoff]; + + kfree(qfr); +} + +static const struct vm_operations_struct uacce_vm_ops = { + .fault = uacce_vma_fault, + .close = uacce_vma_close, +}; + +static int uacce_fops_mmap(struct file *filep, struct vm_area_struct *vma) +{ + struct uacce_queue *q = filep->private_data; + struct uacce_device *uacce = q->uacce; + struct uacce_qfile_region *qfr; + enum uacce_qfrt type = UACCE_MAX_REGION; + int ret = 0; + + if (vma->vm_pgoff < UACCE_MAX_REGION) + type = vma->vm_pgoff; + else + return -EINVAL; + + qfr = kzalloc(sizeof(*qfr), GFP_KERNEL); + if (!qfr) + return -ENOMEM; + + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_WIPEONFORK; + vma->vm_ops = &uacce_vm_ops; + vma->vm_private_data = q; + qfr->type = type; + + mutex_lock(&uacce_mutex); + + if (q->state != UACCE_Q_INIT && q->state != UACCE_Q_STARTED) { + ret = -EINVAL; + goto out_with_lock; + } + + if (q->qfrs[type]) { + ret = -EEXIST; + goto out_with_lock; + } + + switch (type) { + case UACCE_QFRT_MMIO: + if (!uacce->ops->mmap) { + ret = -EINVAL; + goto out_with_lock; + } + + ret = uacce->ops->mmap(q, vma, qfr); + if (ret) + goto out_with_lock; + + break; + + case UACCE_QFRT_DUS: + if (!uacce->ops->mmap) { + ret = -EINVAL; + goto out_with_lock; + } + + ret = uacce->ops->mmap(q, vma, qfr); + if (ret) + goto out_with_lock; + break; + + default: + ret = -EINVAL; + goto out_with_lock; + } + + q->qfrs[type] = qfr; + mutex_unlock(&uacce_mutex); + + return ret; + +out_with_lock: + mutex_unlock(&uacce_mutex); + kfree(qfr); + return ret; +} + +static __poll_t uacce_fops_poll(struct file *file, poll_table *wait) +{ + struct uacce_queue *q = file->private_data; + struct uacce_device *uacce = q->uacce; + + poll_wait(file, &q->wait, wait); + if (uacce->ops->is_q_updated && uacce->ops->is_q_updated(q)) + return EPOLLIN | EPOLLRDNORM; + + return 0; +} + +static const struct file_operations uacce_fops = { + .owner = THIS_MODULE, + .open = uacce_fops_open, + .release = uacce_fops_release, + .unlocked_ioctl = uacce_fops_unl_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = uacce_fops_compat_ioctl, +#endif + .mmap = uacce_fops_mmap, + .poll = uacce_fops_poll, +}; + +#define to_uacce_device(dev) container_of(dev, struct uacce_device, dev) + +static ssize_t api_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + return sprintf(buf, "%s\n", uacce->api_ver); +} + +static ssize_t flags_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + return sprintf(buf, "%u\n", uacce->flags); +} + +static ssize_t available_instances_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + if (!uacce->ops->get_available_instances) + return -ENODEV; + + return sprintf(buf, "%d\n", + uacce->ops->get_available_instances(uacce)); +} + +static ssize_t algorithms_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + return sprintf(buf, "%s\n", uacce->algs); +} + +static ssize_t region_mmio_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + return sprintf(buf, "%lu\n", + uacce->qf_pg_num[UACCE_QFRT_MMIO] << PAGE_SHIFT); +} + +static ssize_t region_dus_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + return sprintf(buf, "%lu\n", + uacce->qf_pg_num[UACCE_QFRT_DUS] << PAGE_SHIFT); +} + +static DEVICE_ATTR_RO(api); +static DEVICE_ATTR_RO(flags); +static DEVICE_ATTR_RO(available_instances); +static DEVICE_ATTR_RO(algorithms); +static DEVICE_ATTR_RO(region_mmio_size); +static DEVICE_ATTR_RO(region_dus_size); + +static struct attribute *uacce_dev_attrs[] = { + &dev_attr_api.attr, + &dev_attr_flags.attr, + &dev_attr_available_instances.attr, + &dev_attr_algorithms.attr, + &dev_attr_region_mmio_size.attr, + &dev_attr_region_dus_size.attr, + NULL, +}; + +static umode_t uacce_dev_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct uacce_device *uacce = to_uacce_device(dev); + + if (((attr == &dev_attr_region_mmio_size.attr) && + (!uacce->qf_pg_num[UACCE_QFRT_MMIO])) || + ((attr == &dev_attr_region_dus_size.attr) && + (!uacce->qf_pg_num[UACCE_QFRT_DUS]))) + return 0; + + return attr->mode; +} + +static struct attribute_group uacce_dev_group = { + .is_visible = uacce_dev_is_visible, + .attrs = uacce_dev_attrs, +}; + +__ATTRIBUTE_GROUPS(uacce_dev); + +static void uacce_release(struct device *dev) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + kfree(uacce); +} + +/** + * uacce_alloc() - alloc an accelerator + * @parent: pointer of uacce parent device + * @interface: pointer of uacce_interface for register + * + * Returns uacce pointer if success and ERR_PTR if not + * Need check returned negotiated uacce->flags + */ +struct uacce_device *uacce_alloc(struct device *parent, + struct uacce_interface *interface) +{ + unsigned int flags = interface->flags; + struct uacce_device *uacce; + int ret; + + uacce = kzalloc(sizeof(struct uacce_device), GFP_KERNEL); + if (!uacce) + return ERR_PTR(-ENOMEM); + + if (flags & UACCE_DEV_SVA) { + ret = iommu_dev_enable_feature(parent, IOMMU_DEV_FEAT_SVA); + if (ret) + flags &= ~UACCE_DEV_SVA; + } + + uacce->parent = parent; + uacce->flags = flags; + uacce->ops = interface->ops; + + ret = xa_alloc(&uacce_xa, &uacce->dev_id, uacce, xa_limit_32b, + GFP_KERNEL); + if (ret < 0) + goto err_with_uacce; + + INIT_LIST_HEAD(&uacce->mm_list); + mutex_init(&uacce->mm_lock); + device_initialize(&uacce->dev); + uacce->dev.devt = MKDEV(MAJOR(uacce_devt), uacce->dev_id); + uacce->dev.class = uacce_class; + uacce->dev.groups = uacce_dev_groups; + uacce->dev.parent = uacce->parent; + uacce->dev.release = uacce_release; + dev_set_name(&uacce->dev, "%s-%d", interface->name, uacce->dev_id); + + return uacce; + +err_with_uacce: + if (flags & UACCE_DEV_SVA) + iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_SVA); + kfree(uacce); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(uacce_alloc); + +/** + * uacce_register() - add the accelerator to cdev and export to user space + * @uacce: The initialized uacce device + * + * Return 0 if register succeeded, or an error. + */ +int uacce_register(struct uacce_device *uacce) +{ + if (!uacce) + return -ENODEV; + + uacce->cdev = cdev_alloc(); + if (!uacce->cdev) + return -ENOMEM; + + uacce->cdev->ops = &uacce_fops; + uacce->cdev->owner = THIS_MODULE; + + return cdev_device_add(uacce->cdev, &uacce->dev); +} +EXPORT_SYMBOL_GPL(uacce_register); + +/** + * uacce_remove() - remove the accelerator + * @uacce: the accelerator to remove + */ +void uacce_remove(struct uacce_device *uacce) +{ + struct uacce_mm *uacce_mm; + struct uacce_queue *q; + + if (!uacce) + return; + /* + * unmap remaining mapping from user space, preventing user still + * access the mmaped area while parent device is already removed + */ + if (uacce->inode) + unmap_mapping_range(uacce->inode->i_mapping, 0, 0, 1); + + /* ensure no open queue remains */ + mutex_lock(&uacce->mm_lock); + list_for_each_entry(uacce_mm, &uacce->mm_list, list) { + /* + * We don't take the uacce_mm->lock here. Since we hold the + * device's mm_lock, no queue can be added to or removed from + * this uacce_mm. We may run concurrently with mm_exit, but + * uacce_put_queue() is serialized and iommu_sva_unbind_device() + * waits for the lock that mm_exit is holding. + */ + list_for_each_entry(q, &uacce_mm->queues, list) + uacce_put_queue(q); + + if (uacce->flags & UACCE_DEV_SVA) { + iommu_sva_unbind_device(uacce_mm->handle); + uacce_mm->handle = NULL; + } + } + mutex_unlock(&uacce->mm_lock); + + /* disable sva now since no opened queues */ + if (uacce->flags & UACCE_DEV_SVA) + iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_SVA); + + if (uacce->cdev) + cdev_device_del(uacce->cdev, &uacce->dev); + xa_erase(&uacce_xa, uacce->dev_id); + put_device(&uacce->dev); +} +EXPORT_SYMBOL_GPL(uacce_remove); + +static int __init uacce_init(void) +{ + int ret; + + uacce_class = class_create(THIS_MODULE, UACCE_NAME); + if (IS_ERR(uacce_class)) + return PTR_ERR(uacce_class); + + ret = alloc_chrdev_region(&uacce_devt, 0, MINORMASK, UACCE_NAME); + if (ret) + class_destroy(uacce_class); + + return ret; +} + +static __exit void uacce_exit(void) +{ + unregister_chrdev_region(uacce_devt, MINORMASK); + class_destroy(uacce_class); +} + +subsys_initcall(uacce_init); +module_exit(uacce_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Hisilicon Tech. Co., Ltd."); +MODULE_DESCRIPTION("Accelerator interface for Userland applications"); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index f34757e8f25f..2d357680094c 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <linux/atomic.h> #include <linux/device.h> +#include <linux/pm_runtime.h> #include <linux/poll.h> #include <linux/security.h> @@ -1070,7 +1071,14 @@ static int debugfs_show_regset32(struct seq_file *s, void *data) { struct debugfs_regset32 *regset = s->private; + if (regset->dev) + pm_runtime_get_sync(regset->dev); + debugfs_print_regs32(s, regset->regs, regset->nregs, regset->base, ""); + + if (regset->dev) + pm_runtime_put(regset->dev); + return 0; } diff --git a/include/crypto/aead.h b/include/crypto/aead.h index 1b3ebe8593c0..62c68550aab6 100644 --- a/include/crypto/aead.h +++ b/include/crypto/aead.h @@ -43,27 +43,33 @@ * * Memory Structure: * - * To support the needs of the most prominent user of AEAD ciphers, namely - * IPSEC, the AEAD ciphers have a special memory layout the caller must adhere - * to. - * - * The scatter list pointing to the input data must contain: - * - * * for RFC4106 ciphers, the concatenation of - * associated authentication data || IV || plaintext or ciphertext. Note, the - * same IV (buffer) is also set with the aead_request_set_crypt call. Note, - * the API call of aead_request_set_ad must provide the length of the AAD and - * the IV. The API call of aead_request_set_crypt only points to the size of - * the input plaintext or ciphertext. - * - * * for "normal" AEAD ciphers, the concatenation of - * associated authentication data || plaintext or ciphertext. - * - * It is important to note that if multiple scatter gather list entries form - * the input data mentioned above, the first entry must not point to a NULL - * buffer. If there is any potential where the AAD buffer can be NULL, the - * calling code must contain a precaution to ensure that this does not result - * in the first scatter gather list entry pointing to a NULL buffer. + * The source scatterlist must contain the concatenation of + * associated data || plaintext or ciphertext. + * + * The destination scatterlist has the same layout, except that the plaintext + * (resp. ciphertext) will grow (resp. shrink) by the authentication tag size + * during encryption (resp. decryption). + * + * In-place encryption/decryption is enabled by using the same scatterlist + * pointer for both the source and destination. + * + * Even in the out-of-place case, space must be reserved in the destination for + * the associated data, even though it won't be written to. This makes the + * in-place and out-of-place cases more consistent. It is permissible for the + * "destination" associated data to alias the "source" associated data. + * + * As with the other scatterlist crypto APIs, zero-length scatterlist elements + * are not allowed in the used part of the scatterlist. Thus, if there is no + * associated data, the first element must point to the plaintext/ciphertext. + * + * To meet the needs of IPsec, a special quirk applies to rfc4106, rfc4309, + * rfc4543, and rfc7539esp ciphers. For these ciphers, the final 'ivsize' bytes + * of the associated data buffer must contain a second copy of the IV. This is + * in addition to the copy passed to aead_request_set_crypt(). These two IV + * copies must not differ; different implementations of the same algorithm may + * behave differently in that case. Note that the algorithm might not actually + * treat the IV as associated data; nevertheless the length passed to + * aead_request_set_ad() must include it. */ struct crypto_aead; diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 24cfa96f98ea..56527c85d122 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -66,7 +66,7 @@ struct af_alg_sgl { struct af_alg_tsgl { struct list_head list; unsigned int cur; /* Last processed SG entry */ - struct scatterlist sg[0]; /* Array of SGs forming the SGL */ + struct scatterlist sg[]; /* Array of SGs forming the SGL */ }; #define MAX_SGL_ENTS ((4096 - sizeof(struct af_alg_tsgl)) / \ diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index d672b7db0cfc..a274d95fa66e 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -35,6 +35,7 @@ struct debugfs_regset32 { const struct debugfs_reg32 *regs; int nregs; void __iomem *base; + struct device *dev; /* Optional device for Runtime PM */ }; extern struct dentry *arch_debugfs_dir; diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index c8c42214e7fb..a7a2391d6f96 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -82,6 +82,7 @@ enum pm_api_id { PM_CLOCK_GETRATE, PM_CLOCK_SETPARENT, PM_CLOCK_GETPARENT, + PM_SECURE_AES = 47, PM_FEATURE_CHECK = 63, PM_API_MAX, }; @@ -322,6 +323,7 @@ struct zynqmp_eemi_ops { const u32 capabilities, const u32 qos, const enum zynqmp_pm_request_ack ack); + int (*aes)(const u64 address, u32 *out); }; int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 arg0, u32 arg1, diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 54d9436600c7..2b5f8366dbe1 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -381,6 +381,22 @@ int __must_check __fsl_mc_driver_register(struct fsl_mc_driver *fsl_mc_driver, void fsl_mc_driver_unregister(struct fsl_mc_driver *driver); +/** + * struct fsl_mc_version + * @major: Major version number: incremented on API compatibility changes + * @minor: Minor version number: incremented on API additions (that are + * backward compatible); reset when major version is incremented + * @revision: Internal revision number: incremented on implementation changes + * and/or bug fixes that have no impact on API + */ +struct fsl_mc_version { + u32 major; + u32 minor; + u32 revision; +}; + +struct fsl_mc_version *fsl_mc_get_version(void); + int __must_check fsl_mc_portal_allocate(struct fsl_mc_device *mc_dev, u16 mc_io_flags, struct fsl_mc_io **new_mc_io); diff --git a/include/linux/uacce.h b/include/linux/uacce.h new file mode 100644 index 000000000000..0e215e6d0534 --- /dev/null +++ b/include/linux/uacce.h @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_UACCE_H +#define _LINUX_UACCE_H + +#include <linux/cdev.h> +#include <uapi/misc/uacce/uacce.h> + +#define UACCE_NAME "uacce" +#define UACCE_MAX_REGION 2 +#define UACCE_MAX_NAME_SIZE 64 + +struct uacce_queue; +struct uacce_device; + +/** + * struct uacce_qfile_region - structure of queue file region + * @type: type of the region + */ +struct uacce_qfile_region { + enum uacce_qfrt type; +}; + +/** + * struct uacce_ops - uacce device operations + * @get_available_instances: get available instances left of the device + * @get_queue: get a queue from the device + * @put_queue: free a queue to the device + * @start_queue: make the queue start work after get_queue + * @stop_queue: make the queue stop work before put_queue + * @is_q_updated: check whether the task is finished + * @mmap: mmap addresses of queue to user space + * @ioctl: ioctl for user space users of the queue + */ +struct uacce_ops { + int (*get_available_instances)(struct uacce_device *uacce); + int (*get_queue)(struct uacce_device *uacce, unsigned long arg, + struct uacce_queue *q); + void (*put_queue)(struct uacce_queue *q); + int (*start_queue)(struct uacce_queue *q); + void (*stop_queue)(struct uacce_queue *q); + int (*is_q_updated)(struct uacce_queue *q); + int (*mmap)(struct uacce_queue *q, struct vm_area_struct *vma, + struct uacce_qfile_region *qfr); + long (*ioctl)(struct uacce_queue *q, unsigned int cmd, + unsigned long arg); +}; + +/** + * struct uacce_interface - interface required for uacce_register() + * @name: the uacce device name. Will show up in sysfs + * @flags: uacce device attributes + * @ops: pointer to the struct uacce_ops + */ +struct uacce_interface { + char name[UACCE_MAX_NAME_SIZE]; + unsigned int flags; + const struct uacce_ops *ops; +}; + +enum uacce_q_state { + UACCE_Q_ZOMBIE = 0, + UACCE_Q_INIT, + UACCE_Q_STARTED, +}; + +/** + * struct uacce_queue + * @uacce: pointer to uacce + * @priv: private pointer + * @wait: wait queue head + * @list: index into uacce_mm + * @uacce_mm: the corresponding mm + * @qfrs: pointer of qfr regions + * @state: queue state machine + */ +struct uacce_queue { + struct uacce_device *uacce; + void *priv; + wait_queue_head_t wait; + struct list_head list; + struct uacce_mm *uacce_mm; + struct uacce_qfile_region *qfrs[UACCE_MAX_REGION]; + enum uacce_q_state state; +}; + +/** + * struct uacce_device + * @algs: supported algorithms + * @api_ver: api version + * @ops: pointer to the struct uacce_ops + * @qf_pg_num: page numbers of the queue file regions + * @parent: pointer to the parent device + * @is_vf: whether virtual function + * @flags: uacce attributes + * @dev_id: id of the uacce device + * @cdev: cdev of the uacce + * @dev: dev of the uacce + * @priv: private pointer of the uacce + * @mm_list: list head of uacce_mm->list + * @mm_lock: lock for mm_list + * @inode: core vfs + */ +struct uacce_device { + const char *algs; + const char *api_ver; + const struct uacce_ops *ops; + unsigned long qf_pg_num[UACCE_MAX_REGION]; + struct device *parent; + bool is_vf; + u32 flags; + u32 dev_id; + struct cdev *cdev; + struct device dev; + void *priv; + struct list_head mm_list; + struct mutex mm_lock; + struct inode *inode; +}; + +/** + * struct uacce_mm - keep track of queues bound to a process + * @list: index into uacce_device + * @queues: list of queues + * @mm: the mm struct + * @lock: protects the list of queues + * @pasid: pasid of the uacce_mm + * @handle: iommu_sva handle return from iommu_sva_bind_device + */ +struct uacce_mm { + struct list_head list; + struct list_head queues; + struct mm_struct *mm; + struct mutex lock; + int pasid; + struct iommu_sva *handle; +}; + +#if IS_ENABLED(CONFIG_UACCE) + +struct uacce_device *uacce_alloc(struct device *parent, + struct uacce_interface *interface); +int uacce_register(struct uacce_device *uacce); +void uacce_remove(struct uacce_device *uacce); + +#else /* CONFIG_UACCE */ + +static inline +struct uacce_device *uacce_alloc(struct device *parent, + struct uacce_interface *interface) +{ + return ERR_PTR(-ENODEV); +} + +static inline int uacce_register(struct uacce_device *uacce) +{ + return -EINVAL; +} + +static inline void uacce_remove(struct uacce_device *uacce) {} + +#endif /* CONFIG_UACCE */ + +#endif /* _LINUX_UACCE_H */ diff --git a/include/uapi/misc/uacce/hisi_qm.h b/include/uapi/misc/uacce/hisi_qm.h new file mode 100644 index 000000000000..6435f0bcb556 --- /dev/null +++ b/include/uapi/misc/uacce/hisi_qm.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +#ifndef _UAPI_HISI_QM_H +#define _UAPI_HISI_QM_H + +#include <linux/types.h> + +/** + * struct hisi_qp_ctx - User data for hisi qp. + * @id: qp_index return to user space + * @qc_type: Accelerator algorithm type + */ +struct hisi_qp_ctx { + __u16 id; + __u16 qc_type; +}; + +#define HISI_QM_API_VER_BASE "hisi_qm_v1" +#define HISI_QM_API_VER2_BASE "hisi_qm_v2" + +/* UACCE_CMD_QM_SET_QP_CTX: Set qp algorithm type */ +#define UACCE_CMD_QM_SET_QP_CTX _IOWR('H', 10, struct hisi_qp_ctx) + +#endif diff --git a/include/uapi/misc/uacce/uacce.h b/include/uapi/misc/uacce/uacce.h new file mode 100644 index 000000000000..cc7185678f47 --- /dev/null +++ b/include/uapi/misc/uacce/uacce.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +#ifndef _UAPIUUACCE_H +#define _UAPIUUACCE_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +/* + * UACCE_CMD_START_Q: Start queue + */ +#define UACCE_CMD_START_Q _IO('W', 0) + +/* + * UACCE_CMD_PUT_Q: + * User actively stop queue and free queue resource immediately + * Optimization method since close fd may delay + */ +#define UACCE_CMD_PUT_Q _IO('W', 1) + +/* + * UACCE Device flags: + * UACCE_DEV_SVA: Shared Virtual Addresses + * Support PASID + * Support device page faults (PCI PRI or SMMU Stall) + */ +#define UACCE_DEV_SVA BIT(0) + +/** + * enum uacce_qfrt: queue file region type + * @UACCE_QFRT_MMIO: device mmio region + * @UACCE_QFRT_DUS: device user share region + */ +enum uacce_qfrt { + UACCE_QFRT_MMIO = 0, + UACCE_QFRT_DUS = 1, +}; + +#endif diff --git a/kernel/padata.c b/kernel/padata.c index 72777c10bb9c..a6afa12fb75e 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -512,7 +512,7 @@ static int padata_replace_one(struct padata_shell *ps) static int padata_replace(struct padata_instance *pinst) { struct padata_shell *ps; - int err; + int err = 0; pinst->flags |= PADATA_RESET; @@ -1038,12 +1038,13 @@ EXPORT_SYMBOL(padata_alloc_shell); */ void padata_free_shell(struct padata_shell *ps) { - struct padata_instance *pinst = ps->pinst; + if (!ps) + return; - mutex_lock(&pinst->lock); + mutex_lock(&ps->pinst->lock); list_del(&ps->list); padata_free_pd(rcu_dereference_protected(ps->pd, 1)); - mutex_unlock(&pinst->lock); + mutex_unlock(&ps->pinst->lock); kfree(ps); } |