summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-04-01 14:47:40 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-04-01 14:47:40 -0700
commit72f35423e8a6a2451c202f52cb8adb92b08592ec (patch)
tree2cc5c715631a59d51b6445143e03a187e8e394f6
parent890f0b0d27dc400679b9a91d04ca44f5ee4c19c0 (diff)
parentfcb90d51c375d09a034993cda262b68499e233a4 (diff)
downloadlinux-stable-72f35423e8a6a2451c202f52cb8adb92b08592ec.tar.gz
linux-stable-72f35423e8a6a2451c202f52cb8adb92b08592ec.tar.bz2
linux-stable-72f35423e8a6a2451c202f52cb8adb92b08592ec.zip
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu: "API: - Fix out-of-sync IVs in self-test for IPsec AEAD algorithms Algorithms: - Use formally verified implementation of x86/curve25519 Drivers: - Enhance hwrng support in caam - Use crypto_engine for skcipher/aead/rsa/hash in caam - Add Xilinx AES driver - Add uacce driver - Register zip engine to uacce in hisilicon - Add support for OCTEON TX CPT engine in marvell" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (162 commits) crypto: af_alg - bool type cosmetics crypto: arm[64]/poly1305 - add artifact to .gitignore files crypto: caam - limit single JD RNG output to maximum of 16 bytes crypto: caam - enable prediction resistance in HRWNG bus: fsl-mc: add api to retrieve mc version crypto: caam - invalidate entropy register during RNG initialization crypto: caam - check if RNG job failed crypto: caam - simplify RNG implementation crypto: caam - drop global context pointer and init_done crypto: caam - use struct hwrng's .init for initialization crypto: caam - allocate RNG instantiation descriptor with GFP_DMA crypto: ccree - remove duplicated include from cc_aead.c crypto: chelsio - remove set but not used variable 'adap' crypto: marvell - enable OcteonTX cpt options for build crypto: marvell - add the Virtual Function driver for CPT crypto: marvell - add support for OCTEON TX CPT engine crypto: marvell - create common Kconfig and Makefile for Marvell crypto: arm/neon - memzero_explicit aes-cbc key crypto: bcm - Use scnprintf() for avoiding potential buffer overflow crypto: atmel-i2c - Fix wakeup fail ...
-rw-r--r--Documentation/ABI/testing/sysfs-driver-uacce39
-rw-r--r--Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml37
-rw-r--r--Documentation/misc-devices/uacce.rst176
-rw-r--r--MAINTAINERS17
-rw-r--r--arch/arm/crypto/.gitignore1
-rw-r--r--arch/arm/crypto/aes-neonbs-glue.c1
-rw-r--r--arch/arm/crypto/ghash-ce-core.S5
-rw-r--r--arch/arm64/crypto/.gitignore1
-rw-r--r--arch/arm64/crypto/aes-neonbs-glue.c1
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c20
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c23
-rw-r--r--arch/x86/crypto/curve25519-x86_64.c3546
-rw-r--r--crypto/af_alg.c10
-rw-r--r--crypto/algif_hash.c6
-rw-r--r--crypto/authencesn.c2
-rw-r--r--crypto/ccm.c29
-rw-r--r--crypto/cryptd.c37
-rw-r--r--crypto/ctr.c29
-rw-r--r--crypto/cts.c27
-rw-r--r--crypto/gcm.c66
-rw-r--r--crypto/geniv.c17
-rw-r--r--crypto/lrw.c28
-rw-r--r--crypto/md5.c3
-rw-r--r--crypto/pcrypt.c33
-rw-r--r--crypto/proc.c2
-rw-r--r--crypto/rng.c8
-rw-r--r--crypto/rsa-pkcs1pad.c59
-rw-r--r--crypto/tcrypt.c4
-rw-r--r--crypto/testmgr.c28
-rw-r--r--crypto/xts.c28
-rw-r--r--drivers/bus/fsl-mc/fsl-mc-bus.c33
-rw-r--r--drivers/char/hw_random/Kconfig17
-rw-r--r--drivers/char/hw_random/imx-rngc.c85
-rw-r--r--drivers/char/hw_random/omap3-rom-rng.c1
-rw-r--r--drivers/crypto/Kconfig50
-rw-r--r--drivers/crypto/Makefile3
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c4
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h2
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h2
-rw-r--r--drivers/crypto/atmel-i2c.c3
-rw-r--r--drivers/crypto/bcm/util.c40
-rw-r--r--drivers/crypto/caam/Kconfig2
-rw-r--r--drivers/crypto/caam/caamalg.c415
-rw-r--r--drivers/crypto/caam/caamalg_desc.c30
-rw-r--r--drivers/crypto/caam/caamalg_qi.c4
-rw-r--r--drivers/crypto/caam/caamalg_qi2.h6
-rw-r--r--drivers/crypto/caam/caamhash.c340
-rw-r--r--drivers/crypto/caam/caampkc.c185
-rw-r--r--drivers/crypto/caam/caampkc.h10
-rw-r--r--drivers/crypto/caam/caamrng.c405
-rw-r--r--drivers/crypto/caam/ctrl.c88
-rw-r--r--drivers/crypto/caam/desc.h2
-rw-r--r--drivers/crypto/caam/intern.h9
-rw-r--r--drivers/crypto/caam/jr.c49
-rw-r--r--drivers/crypto/caam/key_gen.c2
-rw-r--r--drivers/crypto/caam/qi.c60
-rw-r--r--drivers/crypto/caam/qi.h4
-rw-r--r--drivers/crypto/caam/regs.h7
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_main.c2
-rw-r--r--drivers/crypto/ccp/psp-dev.c3
-rw-r--r--drivers/crypto/ccp/sev-dev.c39
-rw-r--r--drivers/crypto/ccp/sp-dev.h1
-rw-r--r--drivers/crypto/ccp/sp-pci.c9
-rw-r--r--drivers/crypto/ccree/cc_aead.c176
-rw-r--r--drivers/crypto/ccree/cc_aead.h3
-rw-r--r--drivers/crypto/ccree/cc_buffer_mgr.c229
-rw-r--r--drivers/crypto/ccree/cc_buffer_mgr.h5
-rw-r--r--drivers/crypto/ccree/cc_cipher.c78
-rw-r--r--drivers/crypto/ccree/cc_debugfs.c29
-rw-r--r--drivers/crypto/ccree/cc_driver.c127
-rw-r--r--drivers/crypto/ccree/cc_driver.h18
-rw-r--r--drivers/crypto/ccree/cc_hash.c228
-rw-r--r--drivers/crypto/ccree/cc_hash.h31
-rw-r--r--drivers/crypto/ccree/cc_hw_queue_defs.h332
-rw-r--r--drivers/crypto/ccree/cc_pm.c60
-rw-r--r--drivers/crypto/ccree/cc_pm.h21
-rw-r--r--drivers/crypto/ccree/cc_request_mgr.c48
-rw-r--r--drivers/crypto/ccree/cc_request_mgr.h19
-rw-r--r--drivers/crypto/ccree/cc_sram_mgr.c78
-rw-r--r--drivers/crypto/ccree/cc_sram_mgr.h45
-rw-r--r--drivers/crypto/chelsio/chcr_algo.c358
-rw-r--r--drivers/crypto/chelsio/chcr_core.c3
-rw-r--r--drivers/crypto/chelsio/chcr_core.h6
-rw-r--r--drivers/crypto/chelsio/chcr_crypto.h16
-rw-r--r--drivers/crypto/chelsio/chtls/chtls_io.c7
-rw-r--r--drivers/crypto/chelsio/chtls/chtls_main.c10
-rw-r--r--drivers/crypto/hisilicon/Kconfig2
-rw-r--r--drivers/crypto/hisilicon/hpre/hpre.h3
-rw-r--r--drivers/crypto/hisilicon/hpre/hpre_crypto.c20
-rw-r--r--drivers/crypto/hisilicon/hpre/hpre_main.c160
-rw-r--r--drivers/crypto/hisilicon/qm.c619
-rw-r--r--drivers/crypto/hisilicon/qm.h72
-rw-r--r--drivers/crypto/hisilicon/sec2/sec.h12
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_crypto.c260
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_main.c294
-rw-r--r--drivers/crypto/hisilicon/zip/zip.h2
-rw-r--r--drivers/crypto/hisilicon/zip/zip_crypto.c54
-rw-r--r--drivers/crypto/hisilicon/zip/zip_main.c324
-rw-r--r--drivers/crypto/img-hash.c2
-rw-r--r--drivers/crypto/marvell/Kconfig37
-rw-r--r--drivers/crypto/marvell/Makefile7
-rw-r--r--drivers/crypto/marvell/cesa/Makefile3
-rw-r--r--drivers/crypto/marvell/cesa/cesa.c (renamed from drivers/crypto/marvell/cesa.c)0
-rw-r--r--drivers/crypto/marvell/cesa/cesa.h (renamed from drivers/crypto/marvell/cesa.h)5
-rw-r--r--drivers/crypto/marvell/cesa/cipher.c (renamed from drivers/crypto/marvell/cipher.c)15
-rw-r--r--drivers/crypto/marvell/cesa/hash.c (renamed from drivers/crypto/marvell/hash.c)38
-rw-r--r--drivers/crypto/marvell/cesa/tdma.c (renamed from drivers/crypto/marvell/tdma.c)10
-rw-r--r--drivers/crypto/marvell/octeontx/Makefile6
-rw-r--r--drivers/crypto/marvell/octeontx/otx_cpt_common.h51
-rw-r--r--drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h824
-rw-r--r--drivers/crypto/marvell/octeontx/otx_cptpf.h34
-rw-r--r--drivers/crypto/marvell/octeontx/otx_cptpf_main.c307
-rw-r--r--drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c253
-rw-r--r--drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c1686
-rw-r--r--drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h180
-rw-r--r--drivers/crypto/marvell/octeontx/otx_cptvf.h104
-rw-r--r--drivers/crypto/marvell/octeontx/otx_cptvf_algs.c1744
-rw-r--r--drivers/crypto/marvell/octeontx/otx_cptvf_algs.h188
-rw-r--r--drivers/crypto/marvell/octeontx/otx_cptvf_main.c985
-rw-r--r--drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c247
-rw-r--r--drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c612
-rw-r--r--drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h227
-rw-r--r--drivers/crypto/mediatek/mtk-sha.c2
-rw-r--r--drivers/crypto/mxs-dcp.c58
-rw-r--r--drivers/crypto/nx/nx.h2
-rw-r--r--drivers/crypto/omap-sham.c4
-rw-r--r--drivers/crypto/qat/qat_common/qat_algs.c2
-rw-r--r--drivers/crypto/qat/qat_common/qat_crypto.c3
-rw-r--r--drivers/crypto/qce/common.c2
-rw-r--r--drivers/crypto/qce/common.h3
-rw-r--r--drivers/crypto/qce/dma.c11
-rw-r--r--drivers/crypto/qce/dma.h2
-rw-r--r--drivers/crypto/qce/skcipher.c30
-rw-r--r--drivers/crypto/s5p-sss.c2
-rw-r--r--drivers/crypto/xilinx/Makefile2
-rw-r--r--drivers/crypto/xilinx/zynqmp-aes-gcm.c457
-rw-r--r--drivers/firmware/xilinx/zynqmp.c25
-rw-r--r--drivers/misc/Kconfig1
-rw-r--r--drivers/misc/Makefile1
-rw-r--r--drivers/misc/uacce/Kconfig13
-rw-r--r--drivers/misc/uacce/Makefile2
-rw-r--r--drivers/misc/uacce/uacce.c633
-rw-r--r--fs/debugfs/file.c8
-rw-r--r--include/crypto/aead.h48
-rw-r--r--include/crypto/if_alg.h2
-rw-r--r--include/linux/debugfs.h1
-rw-r--r--include/linux/firmware/xlnx-zynqmp.h2
-rw-r--r--include/linux/fsl/mc.h16
-rw-r--r--include/linux/uacce.h163
-rw-r--r--include/uapi/misc/uacce/hisi_qm.h23
-rw-r--r--include/uapi/misc/uacce/uacce.h38
-rw-r--r--kernel/padata.c9
152 files changed, 13777 insertions, 5297 deletions
diff --git a/Documentation/ABI/testing/sysfs-driver-uacce b/Documentation/ABI/testing/sysfs-driver-uacce
new file mode 100644
index 000000000000..08f2591138af
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-uacce
@@ -0,0 +1,39 @@
+What: /sys/class/uacce/<dev_name>/api
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: linux-accelerators@lists.ozlabs.org
+Description: Api of the device
+ Can be any string and up to userspace to parse.
+ Application use the api to match the correct driver
+
+What: /sys/class/uacce/<dev_name>/flags
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: linux-accelerators@lists.ozlabs.org
+Description: Attributes of the device, see UACCE_DEV_xxx flag defined in uacce.h
+
+What: /sys/class/uacce/<dev_name>/available_instances
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: linux-accelerators@lists.ozlabs.org
+Description: Available instances left of the device
+ Return -ENODEV if uacce_ops get_available_instances is not provided
+
+What: /sys/class/uacce/<dev_name>/algorithms
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: linux-accelerators@lists.ozlabs.org
+Description: Algorithms supported by this accelerator, separated by new line.
+ Can be any string and up to userspace to parse.
+
+What: /sys/class/uacce/<dev_name>/region_mmio_size
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: linux-accelerators@lists.ozlabs.org
+Description: Size (bytes) of mmio region queue file
+
+What: /sys/class/uacce/<dev_name>/region_dus_size
+Date: Feb 2020
+KernelVersion: 5.7
+Contact: linux-accelerators@lists.ozlabs.org
+Description: Size (bytes) of dus region queue file
diff --git a/Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml b/Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml
new file mode 100644
index 000000000000..55dd6e3d270d
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/xlnx,zynqmp-aes.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Xilinx ZynqMP AES-GCM Hardware Accelerator Device Tree Bindings
+
+maintainers:
+ - Kalyani Akula <kalyani.akula@xilinx.com>
+ - Michal Simek <michal.simek@xilinx.com>
+
+description: |
+ The ZynqMP AES-GCM hardened cryptographic accelerator is used to
+ encrypt or decrypt the data with provided key and initialization vector.
+
+properties:
+ compatible:
+ const: xlnx,zynqmp-aes
+
+required:
+ - compatible
+
+additionalProperties: false
+
+examples:
+ - |
+ firmware {
+ zynqmp_firmware: zynqmp-firmware {
+ compatible = "xlnx,zynqmp-firmware";
+ method = "smc";
+ xlnx_aes: zynqmp-aes {
+ compatible = "xlnx,zynqmp-aes";
+ };
+ };
+ };
+...
diff --git a/Documentation/misc-devices/uacce.rst b/Documentation/misc-devices/uacce.rst
new file mode 100644
index 000000000000..1db412e9b1a3
--- /dev/null
+++ b/Documentation/misc-devices/uacce.rst
@@ -0,0 +1,176 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Introduction of Uacce
+---------------------
+
+Uacce (Unified/User-space-access-intended Accelerator Framework) targets to
+provide Shared Virtual Addressing (SVA) between accelerators and processes.
+So accelerator can access any data structure of the main cpu.
+This differs from the data sharing between cpu and io device, which share
+only data content rather than address.
+Because of the unified address, hardware and user space of process can
+share the same virtual address in the communication.
+Uacce takes the hardware accelerator as a heterogeneous processor, while
+IOMMU share the same CPU page tables and as a result the same translation
+from va to pa.
+
+::
+
+ __________________________ __________________________
+ | | | |
+ | User application (CPU) | | Hardware Accelerator |
+ |__________________________| |__________________________|
+
+ | |
+ | va | va
+ V V
+ __________ __________
+ | | | |
+ | MMU | | IOMMU |
+ |__________| |__________|
+ | |
+ | |
+ V pa V pa
+ _______________________________________
+ | |
+ | Memory |
+ |_______________________________________|
+
+
+
+Architecture
+------------
+
+Uacce is the kernel module, taking charge of iommu and address sharing.
+The user drivers and libraries are called WarpDrive.
+
+The uacce device, built around the IOMMU SVA API, can access multiple
+address spaces, including the one without PASID.
+
+A virtual concept, queue, is used for the communication. It provides a
+FIFO-like interface. And it maintains a unified address space between the
+application and all involved hardware.
+
+::
+
+ ___________________ ________________
+ | | user API | |
+ | WarpDrive library | ------------> | user driver |
+ |___________________| |________________|
+ | |
+ | |
+ | queue fd |
+ | |
+ | |
+ v |
+ ___________________ _________ |
+ | | | | | mmap memory
+ | Other framework | | uacce | | r/w interface
+ | crypto/nic/others | |_________| |
+ |___________________| |
+ | | |
+ | register | register |
+ | | |
+ | | |
+ | _________________ __________ |
+ | | | | | |
+ ------------- | Device Driver | | IOMMU | |
+ |_________________| |__________| |
+ | |
+ | V
+ | ___________________
+ | | |
+ -------------------------- | Device(Hardware) |
+ |___________________|
+
+
+How does it work
+----------------
+
+Uacce uses mmap and IOMMU to play the trick.
+
+Uacce creates a chrdev for every device registered to it. New queue is
+created when user application open the chrdev. The file descriptor is used
+as the user handle of the queue.
+The accelerator device present itself as an Uacce object, which exports as
+a chrdev to the user space. The user application communicates with the
+hardware by ioctl (as control path) or share memory (as data path).
+
+The control path to the hardware is via file operation, while data path is
+via mmap space of the queue fd.
+
+The queue file address space:
+
+::
+
+ /**
+ * enum uacce_qfrt: qfrt type
+ * @UACCE_QFRT_MMIO: device mmio region
+ * @UACCE_QFRT_DUS: device user share region
+ */
+ enum uacce_qfrt {
+ UACCE_QFRT_MMIO = 0,
+ UACCE_QFRT_DUS = 1,
+ };
+
+All regions are optional and differ from device type to type.
+Each region can be mmapped only once, otherwise -EEXIST returns.
+
+The device mmio region is mapped to the hardware mmio space. It is generally
+used for doorbell or other notification to the hardware. It is not fast enough
+as data channel.
+
+The device user share region is used for share data buffer between user process
+and device.
+
+
+The Uacce register API
+----------------------
+
+The register API is defined in uacce.h.
+
+::
+
+ struct uacce_interface {
+ char name[UACCE_MAX_NAME_SIZE];
+ unsigned int flags;
+ const struct uacce_ops *ops;
+ };
+
+According to the IOMMU capability, uacce_interface flags can be:
+
+::
+
+ /**
+ * UACCE Device flags:
+ * UACCE_DEV_SVA: Shared Virtual Addresses
+ * Support PASID
+ * Support device page faults (PCI PRI or SMMU Stall)
+ */
+ #define UACCE_DEV_SVA BIT(0)
+
+ struct uacce_device *uacce_alloc(struct device *parent,
+ struct uacce_interface *interface);
+ int uacce_register(struct uacce_device *uacce);
+ void uacce_remove(struct uacce_device *uacce);
+
+uacce_register results can be:
+
+a. If uacce module is not compiled, ERR_PTR(-ENODEV)
+
+b. Succeed with the desired flags
+
+c. Succeed with the negotiated flags, for example
+
+ uacce_interface.flags = UACCE_DEV_SVA but uacce->flags = ~UACCE_DEV_SVA
+
+ So user driver need check return value as well as the negotiated uacce->flags.
+
+
+The user driver
+---------------
+
+The queue file mmap space will need a user driver to wrap the communication
+protocol. Uacce provides some attributes in sysfs for the user driver to
+match the right accelerator accordingly.
+More details in Documentation/ABI/testing/sysfs-driver-uacce.
diff --git a/MAINTAINERS b/MAINTAINERS
index 195052943574..b76e11384c3b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4577,7 +4577,9 @@ S: Supported
F: drivers/scsi/cxgbi/cxgb3i
CXGB4 CRYPTO DRIVER (chcr)
-M: Atul Gupta <atul.gupta@chelsio.com>
+M: Ayush Sawal <ayush.sawal@chelsio.com>
+M: Vinay Kumar Yadav <vinay.yadav@chelsio.com>
+M: Rohit Maheshwari <rohitm@chelsio.com>
L: linux-crypto@vger.kernel.org
W: http://www.chelsio.com
S: Supported
@@ -10066,6 +10068,7 @@ F: Documentation/devicetree/bindings/phy/phy-mvebu-utmi.txt
MARVELL CRYPTO DRIVER
M: Boris Brezillon <bbrezillon@kernel.org>
M: Arnaud Ebalard <arno@natisbad.org>
+M: Srujana Challa <schalla@marvell.com>
F: drivers/crypto/marvell/
S: Maintained
L: linux-crypto@vger.kernel.org
@@ -17139,6 +17142,18 @@ W: http://linuxtv.org
S: Maintained
F: drivers/media/pci/tw686x/
+UACCE ACCELERATOR FRAMEWORK
+M: Zhangfei Gao <zhangfei.gao@linaro.org>
+M: Zhou Wang <wangzhou1@hisilicon.com>
+L: linux-accelerators@lists.ozlabs.org
+L: linux-kernel@vger.kernel.org
+S: Maintained
+F: Documentation/ABI/testing/sysfs-driver-uacce
+F: Documentation/misc-devices/uacce.rst
+F: drivers/misc/uacce/
+F: include/linux/uacce.h
+F: include/uapi/misc/uacce/
+
UBI FILE SYSTEM (UBIFS)
M: Richard Weinberger <richard@nod.at>
L: linux-mtd@lists.infradead.org
diff --git a/arch/arm/crypto/.gitignore b/arch/arm/crypto/.gitignore
index 31e1f538df7d..a3c7ad52a469 100644
--- a/arch/arm/crypto/.gitignore
+++ b/arch/arm/crypto/.gitignore
@@ -1,3 +1,4 @@
aesbs-core.S
sha256-core.S
sha512-core.S
+poly1305-core.S
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index e85839a8aaeb..e6fd32919c81 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -138,6 +138,7 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
kernel_neon_begin();
aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
kernel_neon_end();
+ memzero_explicit(&rk, sizeof(rk));
return crypto_cipher_setkey(ctx->enc_tfm, in_key, key_len);
}
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
index 534c9647726d..9f51e3fa4526 100644
--- a/arch/arm/crypto/ghash-ce-core.S
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -8,6 +8,9 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
+ .arch armv8-a
+ .fpu crypto-neon-fp-armv8
+
SHASH .req q0
T1 .req q1
XL .req q2
@@ -88,8 +91,6 @@
T3_H .req d17
.text
- .arch armv8-a
- .fpu crypto-neon-fp-armv8
.macro __pmull_p64, rd, rn, rm, b1, b2, b3, b4
vmull.p64 \rd, \rn, \rm
diff --git a/arch/arm64/crypto/.gitignore b/arch/arm64/crypto/.gitignore
index 879df8781ed5..e403b1343328 100644
--- a/arch/arm64/crypto/.gitignore
+++ b/arch/arm64/crypto/.gitignore
@@ -1,2 +1,3 @@
sha256-core.S
sha512-core.S
+poly1305-core.S
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index e3e27349a9fe..fb507d569922 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -151,6 +151,7 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
kernel_neon_begin();
aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
kernel_neon_end();
+ memzero_explicit(&rk, sizeof(rk));
return 0;
}
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index 63c875d3314b..565ef604ca04 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -91,12 +91,32 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out)
return sha1_base_finish(desc, out);
}
+static int sha1_ce_export(struct shash_desc *desc, void *out)
+{
+ struct sha1_ce_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(out, &sctx->sst, sizeof(struct sha1_state));
+ return 0;
+}
+
+static int sha1_ce_import(struct shash_desc *desc, const void *in)
+{
+ struct sha1_ce_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(&sctx->sst, in, sizeof(struct sha1_state));
+ sctx->finalize = 0;
+ return 0;
+}
+
static struct shash_alg alg = {
.init = sha1_base_init,
.update = sha1_ce_update,
.final = sha1_ce_final,
.finup = sha1_ce_finup,
+ .import = sha1_ce_import,
+ .export = sha1_ce_export,
.descsize = sizeof(struct sha1_ce_state),
+ .statesize = sizeof(struct sha1_state),
.digestsize = SHA1_DIGEST_SIZE,
.base = {
.cra_name = "sha1",
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index a8e67bafba3d..9450d19b9e6e 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -109,12 +109,32 @@ static int sha256_ce_final(struct shash_desc *desc, u8 *out)
return sha256_base_finish(desc, out);
}
+static int sha256_ce_export(struct shash_desc *desc, void *out)
+{
+ struct sha256_ce_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(out, &sctx->sst, sizeof(struct sha256_state));
+ return 0;
+}
+
+static int sha256_ce_import(struct shash_desc *desc, const void *in)
+{
+ struct sha256_ce_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(&sctx->sst, in, sizeof(struct sha256_state));
+ sctx->finalize = 0;
+ return 0;
+}
+
static struct shash_alg algs[] = { {
.init = sha224_base_init,
.update = sha256_ce_update,
.final = sha256_ce_final,
.finup = sha256_ce_finup,
+ .export = sha256_ce_export,
+ .import = sha256_ce_import,
.descsize = sizeof(struct sha256_ce_state),
+ .statesize = sizeof(struct sha256_state),
.digestsize = SHA224_DIGEST_SIZE,
.base = {
.cra_name = "sha224",
@@ -128,7 +148,10 @@ static struct shash_alg algs[] = { {
.update = sha256_ce_update,
.final = sha256_ce_final,
.finup = sha256_ce_finup,
+ .export = sha256_ce_export,
+ .import = sha256_ce_import,
.descsize = sizeof(struct sha256_ce_state),
+ .statesize = sizeof(struct sha256_state),
.digestsize = SHA256_DIGEST_SIZE,
.base = {
.cra_name = "sha256",
diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
index eec7d2d24239..8a17621f7d3a 100644
--- a/arch/x86/crypto/curve25519-x86_64.c
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -1,8 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>. All Rights Reserved.
- * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- * Copyright (C) 2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+ * Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
*/
#include <crypto/curve25519.h>
@@ -16,2337 +15,1378 @@
#include <asm/cpufeature.h>
#include <asm/processor.h>
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2);
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_adx);
-
-enum { NUM_WORDS_ELTFP25519 = 4 };
-typedef __aligned(32) u64 eltfp25519_1w[NUM_WORDS_ELTFP25519];
-typedef __aligned(32) u64 eltfp25519_1w_buffer[2 * NUM_WORDS_ELTFP25519];
-
-#define mul_eltfp25519_1w_adx(c, a, b) do { \
- mul_256x256_integer_adx(m.buffer, a, b); \
- red_eltfp25519_1w_adx(c, m.buffer); \
-} while (0)
-
-#define mul_eltfp25519_1w_bmi2(c, a, b) do { \
- mul_256x256_integer_bmi2(m.buffer, a, b); \
- red_eltfp25519_1w_bmi2(c, m.buffer); \
-} while (0)
-
-#define sqr_eltfp25519_1w_adx(a) do { \
- sqr_256x256_integer_adx(m.buffer, a); \
- red_eltfp25519_1w_adx(a, m.buffer); \
-} while (0)
-
-#define sqr_eltfp25519_1w_bmi2(a) do { \
- sqr_256x256_integer_bmi2(m.buffer, a); \
- red_eltfp25519_1w_bmi2(a, m.buffer); \
-} while (0)
-
-#define mul_eltfp25519_2w_adx(c, a, b) do { \
- mul2_256x256_integer_adx(m.buffer, a, b); \
- red_eltfp25519_2w_adx(c, m.buffer); \
-} while (0)
-
-#define mul_eltfp25519_2w_bmi2(c, a, b) do { \
- mul2_256x256_integer_bmi2(m.buffer, a, b); \
- red_eltfp25519_2w_bmi2(c, m.buffer); \
-} while (0)
-
-#define sqr_eltfp25519_2w_adx(a) do { \
- sqr2_256x256_integer_adx(m.buffer, a); \
- red_eltfp25519_2w_adx(a, m.buffer); \
-} while (0)
-
-#define sqr_eltfp25519_2w_bmi2(a) do { \
- sqr2_256x256_integer_bmi2(m.buffer, a); \
- red_eltfp25519_2w_bmi2(a, m.buffer); \
-} while (0)
-
-#define sqrn_eltfp25519_1w_adx(a, times) do { \
- int ____counter = (times); \
- while (____counter-- > 0) \
- sqr_eltfp25519_1w_adx(a); \
-} while (0)
-
-#define sqrn_eltfp25519_1w_bmi2(a, times) do { \
- int ____counter = (times); \
- while (____counter-- > 0) \
- sqr_eltfp25519_1w_bmi2(a); \
-} while (0)
-
-#define copy_eltfp25519_1w(C, A) do { \
- (C)[0] = (A)[0]; \
- (C)[1] = (A)[1]; \
- (C)[2] = (A)[2]; \
- (C)[3] = (A)[3]; \
-} while (0)
-
-#define setzero_eltfp25519_1w(C) do { \
- (C)[0] = 0; \
- (C)[1] = 0; \
- (C)[2] = 0; \
- (C)[3] = 0; \
-} while (0)
-
-__aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = {
- /* 1 */ 0xfffffffffffffff3UL, 0xffffffffffffffffUL,
- 0xffffffffffffffffUL, 0x5fffffffffffffffUL,
- /* 2 */ 0x6b8220f416aafe96UL, 0x82ebeb2b4f566a34UL,
- 0xd5a9a5b075a5950fUL, 0x5142b2cf4b2488f4UL,
- /* 3 */ 0x6aaebc750069680cUL, 0x89cf7820a0f99c41UL,
- 0x2a58d9183b56d0f4UL, 0x4b5aca80e36011a4UL,
- /* 4 */ 0x329132348c29745dUL, 0xf4a2e616e1642fd7UL,
- 0x1e45bb03ff67bc34UL, 0x306912d0f42a9b4aUL,
- /* 5 */ 0xff886507e6af7154UL, 0x04f50e13dfeec82fUL,
- 0xaa512fe82abab5ceUL, 0x174e251a68d5f222UL,
- /* 6 */ 0xcf96700d82028898UL, 0x1743e3370a2c02c5UL,
- 0x379eec98b4e86eaaUL, 0x0c59888a51e0482eUL,
- /* 7 */ 0xfbcbf1d699b5d189UL, 0xacaef0d58e9fdc84UL,
- 0xc1c20d06231f7614UL, 0x2938218da274f972UL,
- /* 8 */ 0xf6af49beff1d7f18UL, 0xcc541c22387ac9c2UL,
- 0x96fcc9ef4015c56bUL, 0x69c1627c690913a9UL,
- /* 9 */ 0x7a86fd2f4733db0eUL, 0xfdb8c4f29e087de9UL,
- 0x095e4b1a8ea2a229UL, 0x1ad7a7c829b37a79UL,
- /* 10 */ 0x342d89cad17ea0c0UL, 0x67bedda6cced2051UL,
- 0x19ca31bf2bb42f74UL, 0x3df7b4c84980acbbUL,
- /* 11 */ 0xa8c6444dc80ad883UL, 0xb91e440366e3ab85UL,
- 0xc215cda00164f6d8UL, 0x3d867c6ef247e668UL,
- /* 12 */ 0xc7dd582bcc3e658cUL, 0xfd2c4748ee0e5528UL,
- 0xa0fd9b95cc9f4f71UL, 0x7529d871b0675ddfUL,
- /* 13 */ 0xb8f568b42d3cbd78UL, 0x1233011b91f3da82UL,
- 0x2dce6ccd4a7c3b62UL, 0x75e7fc8e9e498603UL,
- /* 14 */ 0x2f4f13f1fcd0b6ecUL, 0xf1a8ca1f29ff7a45UL,
- 0xc249c1a72981e29bUL, 0x6ebe0dbb8c83b56aUL,
- /* 15 */ 0x7114fa8d170bb222UL, 0x65a2dcd5bf93935fUL,
- 0xbdc41f68b59c979aUL, 0x2f0eef79a2ce9289UL,
- /* 16 */ 0x42ecbf0c083c37ceUL, 0x2930bc09ec496322UL,
- 0xf294b0c19cfeac0dUL, 0x3780aa4bedfabb80UL,
- /* 17 */ 0x56c17d3e7cead929UL, 0xe7cb4beb2e5722c5UL,
- 0x0ce931732dbfe15aUL, 0x41b883c7621052f8UL,
- /* 18 */ 0xdbf75ca0c3d25350UL, 0x2936be086eb1e351UL,
- 0xc936e03cb4a9b212UL, 0x1d45bf82322225aaUL,
- /* 19 */ 0xe81ab1036a024cc5UL, 0xe212201c304c9a72UL,
- 0xc5d73fba6832b1fcUL, 0x20ffdb5a4d839581UL,
- /* 20 */ 0xa283d367be5d0fadUL, 0x6c2b25ca8b164475UL,
- 0x9d4935467caaf22eUL, 0x5166408eee85ff49UL,
- /* 21 */ 0x3c67baa2fab4e361UL, 0xb3e433c67ef35cefUL,
- 0x5259729241159b1cUL, 0x6a621892d5b0ab33UL,
- /* 22 */ 0x20b74a387555cdcbUL, 0x532aa10e1208923fUL,
- 0xeaa17b7762281dd1UL, 0x61ab3443f05c44bfUL,
- /* 23 */ 0x257a6c422324def8UL, 0x131c6c1017e3cf7fUL,
- 0x23758739f630a257UL, 0x295a407a01a78580UL,
- /* 24 */ 0xf8c443246d5da8d9UL, 0x19d775450c52fa5dUL,
- 0x2afcfc92731bf83dUL, 0x7d10c8e81b2b4700UL,
- /* 25 */ 0xc8e0271f70baa20bUL, 0x993748867ca63957UL,
- 0x5412efb3cb7ed4bbUL, 0x3196d36173e62975UL,
- /* 26 */ 0xde5bcad141c7dffcUL, 0x47cc8cd2b395c848UL,
- 0xa34cd942e11af3cbUL, 0x0256dbf2d04ecec2UL,
- /* 27 */ 0x875ab7e94b0e667fUL, 0xcad4dd83c0850d10UL,
- 0x47f12e8f4e72c79fUL, 0x5f1a87bb8c85b19bUL,
- /* 28 */ 0x7ae9d0b6437f51b8UL, 0x12c7ce5518879065UL,
- 0x2ade09fe5cf77aeeUL, 0x23a05a2f7d2c5627UL,
- /* 29 */ 0x5908e128f17c169aUL, 0xf77498dd8ad0852dUL,
- 0x74b4c4ceab102f64UL, 0x183abadd10139845UL,
- /* 30 */ 0xb165ba8daa92aaacUL, 0xd5c5ef9599386705UL,
- 0xbe2f8f0cf8fc40d1UL, 0x2701e635ee204514UL,
- /* 31 */ 0x629fa80020156514UL, 0xf223868764a8c1ceUL,
- 0x5b894fff0b3f060eUL, 0x60d9944cf708a3faUL,
- /* 32 */ 0xaeea001a1c7a201fUL, 0xebf16a633ee2ce63UL,
- 0x6f7709594c7a07e1UL, 0x79b958150d0208cbUL,
- /* 33 */ 0x24b55e5301d410e7UL, 0xe3a34edff3fdc84dUL,
- 0xd88768e4904032d8UL, 0x131384427b3aaeecUL,
- /* 34 */ 0x8405e51286234f14UL, 0x14dc4739adb4c529UL,
- 0xb8a2b5b250634ffdUL, 0x2fe2a94ad8a7ff93UL,
- /* 35 */ 0xec5c57efe843faddUL, 0x2843ce40f0bb9918UL,
- 0xa4b561d6cf3d6305UL, 0x743629bde8fb777eUL,
- /* 36 */ 0x343edd46bbaf738fUL, 0xed981828b101a651UL,
- 0xa401760b882c797aUL, 0x1fc223e28dc88730UL,
- /* 37 */ 0x48604e91fc0fba0eUL, 0xb637f78f052c6fa4UL,
- 0x91ccac3d09e9239cUL, 0x23f7eed4437a687cUL,
- /* 38 */ 0x5173b1118d9bd800UL, 0x29d641b63189d4a7UL,
- 0xfdbf177988bbc586UL, 0x2959894fcad81df5UL,
- /* 39 */ 0xaebc8ef3b4bbc899UL, 0x4148995ab26992b9UL,
- 0x24e20b0134f92cfbUL, 0x40d158894a05dee8UL,
- /* 40 */ 0x46b00b1185af76f6UL, 0x26bac77873187a79UL,
- 0x3dc0bf95ab8fff5fUL, 0x2a608bd8945524d7UL,
- /* 41 */ 0x26449588bd446302UL, 0x7c4bc21c0388439cUL,
- 0x8e98a4f383bd11b2UL, 0x26218d7bc9d876b9UL,
- /* 42 */ 0xe3081542997c178aUL, 0x3c2d29a86fb6606fUL,
- 0x5c217736fa279374UL, 0x7dde05734afeb1faUL,
- /* 43 */ 0x3bf10e3906d42babUL, 0xe4f7803e1980649cUL,
- 0xe6053bf89595bf7aUL, 0x394faf38da245530UL,
- /* 44 */ 0x7a8efb58896928f4UL, 0xfbc778e9cc6a113cUL,
- 0x72670ce330af596fUL, 0x48f222a81d3d6cf7UL,
- /* 45 */ 0xf01fce410d72caa7UL, 0x5a20ecc7213b5595UL,
- 0x7bc21165c1fa1483UL, 0x07f89ae31da8a741UL,
- /* 46 */ 0x05d2c2b4c6830ff9UL, 0xd43e330fc6316293UL,
- 0xa5a5590a96d3a904UL, 0x705edb91a65333b6UL,
- /* 47 */ 0x048ee15e0bb9a5f7UL, 0x3240cfca9e0aaf5dUL,
- 0x8f4b71ceedc4a40bUL, 0x621c0da3de544a6dUL,
- /* 48 */ 0x92872836a08c4091UL, 0xce8375b010c91445UL,
- 0x8a72eb524f276394UL, 0x2667fcfa7ec83635UL,
- /* 49 */ 0x7f4c173345e8752aUL, 0x061b47feee7079a5UL,
- 0x25dd9afa9f86ff34UL, 0x3780cef5425dc89cUL,
- /* 50 */ 0x1a46035a513bb4e9UL, 0x3e1ef379ac575adaUL,
- 0xc78c5f1c5fa24b50UL, 0x321a967634fd9f22UL,
- /* 51 */ 0x946707b8826e27faUL, 0x3dca84d64c506fd0UL,
- 0xc189218075e91436UL, 0x6d9284169b3b8484UL,
- /* 52 */ 0x3a67e840383f2ddfUL, 0x33eec9a30c4f9b75UL,
- 0x3ec7c86fa783ef47UL, 0x26ec449fbac9fbc4UL,
- /* 53 */ 0x5c0f38cba09b9e7dUL, 0x81168cc762a3478cUL,
- 0x3e23b0d306fc121cUL, 0x5a238aa0a5efdcddUL,
- /* 54 */ 0x1ba26121c4ea43ffUL, 0x36f8c77f7c8832b5UL,
- 0x88fbea0b0adcf99aUL, 0x5ca9938ec25bebf9UL,
- /* 55 */ 0xd5436a5e51fccda0UL, 0x1dbc4797c2cd893bUL,
- 0x19346a65d3224a08UL, 0x0f5034e49b9af466UL,
- /* 56 */ 0xf23c3967a1e0b96eUL, 0xe58b08fa867a4d88UL,
- 0xfb2fabc6a7341679UL, 0x2a75381eb6026946UL,
- /* 57 */ 0xc80a3be4c19420acUL, 0x66b1f6c681f2b6dcUL,
- 0x7cf7036761e93388UL, 0x25abbbd8a660a4c4UL,
- /* 58 */ 0x91ea12ba14fd5198UL, 0x684950fc4a3cffa9UL,
- 0xf826842130f5ad28UL, 0x3ea988f75301a441UL,
- /* 59 */ 0xc978109a695f8c6fUL, 0x1746eb4a0530c3f3UL,
- 0x444d6d77b4459995UL, 0x75952b8c054e5cc7UL,
- /* 60 */ 0xa3703f7915f4d6aaUL, 0x66c346202f2647d8UL,
- 0xd01469df811d644bUL, 0x77fea47d81a5d71fUL,
- /* 61 */ 0xc5e9529ef57ca381UL, 0x6eeeb4b9ce2f881aUL,
- 0xb6e91a28e8009bd6UL, 0x4b80be3e9afc3fecUL,
- /* 62 */ 0x7e3773c526aed2c5UL, 0x1b4afcb453c9a49dUL,
- 0xa920bdd7baffb24dUL, 0x7c54699f122d400eUL,
- /* 63 */ 0xef46c8e14fa94bc8UL, 0xe0b074ce2952ed5eUL,
- 0xbea450e1dbd885d5UL, 0x61b68649320f712cUL,
- /* 64 */ 0x8a485f7309ccbdd1UL, 0xbd06320d7d4d1a2dUL,
- 0x25232973322dbef4UL, 0x445dc4758c17f770UL,
- /* 65 */ 0xdb0434177cc8933cUL, 0xed6fe82175ea059fUL,
- 0x1efebefdc053db34UL, 0x4adbe867c65daf99UL,
- /* 66 */ 0x3acd71a2a90609dfUL, 0xe5e991856dd04050UL,
- 0x1ec69b688157c23cUL, 0x697427f6885cfe4dUL,
- /* 67 */ 0xd7be7b9b65e1a851UL, 0xa03d28d522c536ddUL,
- 0x28399d658fd2b645UL, 0x49e5b7e17c2641e1UL,
- /* 68 */ 0x6f8c3a98700457a4UL, 0x5078f0a25ebb6778UL,
- 0xd13c3ccbc382960fUL, 0x2e003258a7df84b1UL,
- /* 69 */ 0x8ad1f39be6296a1cUL, 0xc1eeaa652a5fbfb2UL,
- 0x33ee0673fd26f3cbUL, 0x59256173a69d2cccUL,
- /* 70 */ 0x41ea07aa4e18fc41UL, 0xd9fc19527c87a51eUL,
- 0xbdaacb805831ca6fUL, 0x445b652dc916694fUL,
- /* 71 */ 0xce92a3a7f2172315UL, 0x1edc282de11b9964UL,
- 0xa1823aafe04c314aUL, 0x790a2d94437cf586UL,
- /* 72 */ 0x71c447fb93f6e009UL, 0x8922a56722845276UL,
- 0xbf70903b204f5169UL, 0x2f7a89891ba319feUL,
- /* 73 */ 0x02a08eb577e2140cUL, 0xed9a4ed4427bdcf4UL,
- 0x5253ec44e4323cd1UL, 0x3e88363c14e9355bUL,
- /* 74 */ 0xaa66c14277110b8cUL, 0x1ae0391610a23390UL,
- 0x2030bd12c93fc2a2UL, 0x3ee141579555c7abUL,
- /* 75 */ 0x9214de3a6d6e7d41UL, 0x3ccdd88607f17efeUL,
- 0x674f1288f8e11217UL, 0x5682250f329f93d0UL,
- /* 76 */ 0x6cf00b136d2e396eUL, 0x6e4cf86f1014debfUL,
- 0x5930b1b5bfcc4e83UL, 0x047069b48aba16b6UL,
- /* 77 */ 0x0d4ce4ab69b20793UL, 0xb24db91a97d0fb9eUL,
- 0xcdfa50f54e00d01dUL, 0x221b1085368bddb5UL,
- /* 78 */ 0xe7e59468b1e3d8d2UL, 0x53c56563bd122f93UL,
- 0xeee8a903e0663f09UL, 0x61efa662cbbe3d42UL,
- /* 79 */ 0x2cf8ddddde6eab2aUL, 0x9bf80ad51435f231UL,
- 0x5deadacec9f04973UL, 0x29275b5d41d29b27UL,
- /* 80 */ 0xcfde0f0895ebf14fUL, 0xb9aab96b054905a7UL,
- 0xcae80dd9a1c420fdUL, 0x0a63bf2f1673bbc7UL,
- /* 81 */ 0x092f6e11958fbc8cUL, 0x672a81e804822fadUL,
- 0xcac8351560d52517UL, 0x6f3f7722c8f192f8UL,
- /* 82 */ 0xf8ba90ccc2e894b7UL, 0x2c7557a438ff9f0dUL,
- 0x894d1d855ae52359UL, 0x68e122157b743d69UL,
- /* 83 */ 0xd87e5570cfb919f3UL, 0x3f2cdecd95798db9UL,
- 0x2121154710c0a2ceUL, 0x3c66a115246dc5b2UL,
- /* 84 */ 0xcbedc562294ecb72UL, 0xba7143c36a280b16UL,
- 0x9610c2efd4078b67UL, 0x6144735d946a4b1eUL,
- /* 85 */ 0x536f111ed75b3350UL, 0x0211db8c2041d81bUL,
- 0xf93cb1000e10413cUL, 0x149dfd3c039e8876UL,
- /* 86 */ 0xd479dde46b63155bUL, 0xb66e15e93c837976UL,
- 0xdafde43b1f13e038UL, 0x5fafda1a2e4b0b35UL,
- /* 87 */ 0x3600bbdf17197581UL, 0x3972050bbe3cd2c2UL,
- 0x5938906dbdd5be86UL, 0x34fce5e43f9b860fUL,
- /* 88 */ 0x75a8a4cd42d14d02UL, 0x828dabc53441df65UL,
- 0x33dcabedd2e131d3UL, 0x3ebad76fb814d25fUL,
- /* 89 */ 0xd4906f566f70e10fUL, 0x5d12f7aa51690f5aUL,
- 0x45adb16e76cefcf2UL, 0x01f768aead232999UL,
- /* 90 */ 0x2b6cc77b6248febdUL, 0x3cd30628ec3aaffdUL,
- 0xce1c0b80d4ef486aUL, 0x4c3bff2ea6f66c23UL,
- /* 91 */ 0x3f2ec4094aeaeb5fUL, 0x61b19b286e372ca7UL,
- 0x5eefa966de2a701dUL, 0x23b20565de55e3efUL,
- /* 92 */ 0xe301ca5279d58557UL, 0x07b2d4ce27c2874fUL,
- 0xa532cd8a9dcf1d67UL, 0x2a52fee23f2bff56UL,
- /* 93 */ 0x8624efb37cd8663dUL, 0xbbc7ac20ffbd7594UL,
- 0x57b85e9c82d37445UL, 0x7b3052cb86a6ec66UL,
- /* 94 */ 0x3482f0ad2525e91eUL, 0x2cb68043d28edca0UL,
- 0xaf4f6d052e1b003aUL, 0x185f8c2529781b0aUL,
- /* 95 */ 0xaa41de5bd80ce0d6UL, 0x9407b2416853e9d6UL,
- 0x563ec36e357f4c3aUL, 0x4cc4b8dd0e297bceUL,
- /* 96 */ 0xa2fc1a52ffb8730eUL, 0x1811f16e67058e37UL,
- 0x10f9a366cddf4ee1UL, 0x72f4a0c4a0b9f099UL,
- /* 97 */ 0x8c16c06f663f4ea7UL, 0x693b3af74e970fbaUL,
- 0x2102e7f1d69ec345UL, 0x0ba53cbc968a8089UL,
- /* 98 */ 0xca3d9dc7fea15537UL, 0x4c6824bb51536493UL,
- 0xb9886314844006b1UL, 0x40d2a72ab454cc60UL,
- /* 99 */ 0x5936a1b712570975UL, 0x91b9d648debda657UL,
- 0x3344094bb64330eaUL, 0x006ba10d12ee51d0UL,
- /* 100 */ 0x19228468f5de5d58UL, 0x0eb12f4c38cc05b0UL,
- 0xa1039f9dd5601990UL, 0x4502d4ce4fff0e0bUL,
- /* 101 */ 0xeb2054106837c189UL, 0xd0f6544c6dd3b93cUL,
- 0x40727064c416d74fUL, 0x6e15c6114b502ef0UL,
- /* 102 */ 0x4df2a398cfb1a76bUL, 0x11256c7419f2f6b1UL,
- 0x4a497962066e6043UL, 0x705b3aab41355b44UL,
- /* 103 */ 0x365ef536d797b1d8UL, 0x00076bd622ddf0dbUL,
- 0x3bbf33b0e0575a88UL, 0x3777aa05c8e4ca4dUL,
- /* 104 */ 0x392745c85578db5fUL, 0x6fda4149dbae5ae2UL,
- 0xb1f0b00b8adc9867UL, 0x09963437d36f1da3UL,
- /* 105 */ 0x7e824e90a5dc3853UL, 0xccb5f6641f135cbdUL,
- 0x6736d86c87ce8fccUL, 0x625f3ce26604249fUL,
- /* 106 */ 0xaf8ac8059502f63fUL, 0x0c05e70a2e351469UL,
- 0x35292e9c764b6305UL, 0x1a394360c7e23ac3UL,
- /* 107 */ 0xd5c6d53251183264UL, 0x62065abd43c2b74fUL,
- 0xb5fbf5d03b973f9bUL, 0x13a3da3661206e5eUL,
- /* 108 */ 0xc6bd5837725d94e5UL, 0x18e30912205016c5UL,
- 0x2088ce1570033c68UL, 0x7fba1f495c837987UL,
- /* 109 */ 0x5a8c7423f2f9079dUL, 0x1735157b34023fc5UL,
- 0xe4f9b49ad2fab351UL, 0x6691ff72c878e33cUL,
- /* 110 */ 0x122c2adedc5eff3eUL, 0xf8dd4bf1d8956cf4UL,
- 0xeb86205d9e9e5bdaUL, 0x049b92b9d975c743UL,
- /* 111 */ 0xa5379730b0f6c05aUL, 0x72a0ffacc6f3a553UL,
- 0xb0032c34b20dcd6dUL, 0x470e9dbc88d5164aUL,
- /* 112 */ 0xb19cf10ca237c047UL, 0xb65466711f6c81a2UL,
- 0xb3321bd16dd80b43UL, 0x48c14f600c5fbe8eUL,
- /* 113 */ 0x66451c264aa6c803UL, 0xb66e3904a4fa7da6UL,
- 0xd45f19b0b3128395UL, 0x31602627c3c9bc10UL,
- /* 114 */ 0x3120dc4832e4e10dUL, 0xeb20c46756c717f7UL,
- 0x00f52e3f67280294UL, 0x566d4fc14730c509UL,
- /* 115 */ 0x7e3a5d40fd837206UL, 0xc1e926dc7159547aUL,
- 0x216730fba68d6095UL, 0x22e8c3843f69cea7UL,
- /* 116 */ 0x33d074e8930e4b2bUL, 0xb6e4350e84d15816UL,
- 0x5534c26ad6ba2365UL, 0x7773c12f89f1f3f3UL,
- /* 117 */ 0x8cba404da57962aaUL, 0x5b9897a81999ce56UL,
- 0x508e862f121692fcUL, 0x3a81907fa093c291UL,
- /* 118 */ 0x0dded0ff4725a510UL, 0x10d8cc10673fc503UL,
- 0x5b9d151c9f1f4e89UL, 0x32a5c1d5cb09a44cUL,
- /* 119 */ 0x1e0aa442b90541fbUL, 0x5f85eb7cc1b485dbUL,
- 0xbee595ce8a9df2e5UL, 0x25e496c722422236UL,
- /* 120 */ 0x5edf3c46cd0fe5b9UL, 0x34e75a7ed2a43388UL,
- 0xe488de11d761e352UL, 0x0e878a01a085545cUL,
- /* 121 */ 0xba493c77e021bb04UL, 0x2b4d1843c7df899aUL,
- 0x9ea37a487ae80d67UL, 0x67a9958011e41794UL,
- /* 122 */ 0x4b58051a6697b065UL, 0x47e33f7d8d6ba6d4UL,
- 0xbb4da8d483ca46c1UL, 0x68becaa181c2db0dUL,
- /* 123 */ 0x8d8980e90b989aa5UL, 0xf95eb14a2c93c99bUL,
- 0x51c6c7c4796e73a2UL, 0x6e228363b5efb569UL,
- /* 124 */ 0xc6bbc0b02dd624c8UL, 0x777eb47dec8170eeUL,
- 0x3cde15a004cfafa9UL, 0x1dc6bc087160bf9bUL,
- /* 125 */ 0x2e07e043eec34002UL, 0x18e9fc677a68dc7fUL,
- 0xd8da03188bd15b9aUL, 0x48fbc3bb00568253UL,
- /* 126 */ 0x57547d4cfb654ce1UL, 0xd3565b82a058e2adUL,
- 0xf63eaf0bbf154478UL, 0x47531ef114dfbb18UL,
- /* 127 */ 0xe1ec630a4278c587UL, 0x5507d546ca8e83f3UL,
- 0x85e135c63adc0c2bUL, 0x0aa7efa85682844eUL,
- /* 128 */ 0x72691ba8b3e1f615UL, 0x32b4e9701fbe3ffaUL,
- 0x97b6d92e39bb7868UL, 0x2cfe53dea02e39e8UL,
- /* 129 */ 0x687392cd85cd52b0UL, 0x27ff66c910e29831UL,
- 0x97134556a9832d06UL, 0x269bb0360a84f8a0UL,
- /* 130 */ 0x706e55457643f85cUL, 0x3734a48c9b597d1bUL,
- 0x7aee91e8c6efa472UL, 0x5cd6abc198a9d9e0UL,
- /* 131 */ 0x0e04de06cb3ce41aUL, 0xd8c6eb893402e138UL,
- 0x904659bb686e3772UL, 0x7215c371746ba8c8UL,
- /* 132 */ 0xfd12a97eeae4a2d9UL, 0x9514b7516394f2c5UL,
- 0x266fd5809208f294UL, 0x5c847085619a26b9UL,
- /* 133 */ 0x52985410fed694eaUL, 0x3c905b934a2ed254UL,
- 0x10bb47692d3be467UL, 0x063b3d2d69e5e9e1UL,
- /* 134 */ 0x472726eedda57debUL, 0xefb6c4ae10f41891UL,
- 0x2b1641917b307614UL, 0x117c554fc4f45b7cUL,
- /* 135 */ 0xc07cf3118f9d8812UL, 0x01dbd82050017939UL,
- 0xd7e803f4171b2827UL, 0x1015e87487d225eaUL,
- /* 136 */ 0xc58de3fed23acc4dUL, 0x50db91c294a7be2dUL,
- 0x0b94d43d1c9cf457UL, 0x6b1640fa6e37524aUL,
- /* 137 */ 0x692f346c5fda0d09UL, 0x200b1c59fa4d3151UL,
- 0xb8c46f760777a296UL, 0x4b38395f3ffdfbcfUL,
- /* 138 */ 0x18d25e00be54d671UL, 0x60d50582bec8aba6UL,
- 0x87ad8f263b78b982UL, 0x50fdf64e9cda0432UL,
- /* 139 */ 0x90f567aac578dcf0UL, 0xef1e9b0ef2a3133bUL,
- 0x0eebba9242d9de71UL, 0x15473c9bf03101c7UL,
- /* 140 */ 0x7c77e8ae56b78095UL, 0xb678e7666e6f078eUL,
- 0x2da0b9615348ba1fUL, 0x7cf931c1ff733f0bUL,
- /* 141 */ 0x26b357f50a0a366cUL, 0xe9708cf42b87d732UL,
- 0xc13aeea5f91cb2c0UL, 0x35d90c991143bb4cUL,
- /* 142 */ 0x47c1c404a9a0d9dcUL, 0x659e58451972d251UL,
- 0x3875a8c473b38c31UL, 0x1fbd9ed379561f24UL,
- /* 143 */ 0x11fabc6fd41ec28dUL, 0x7ef8dfe3cd2a2dcaUL,
- 0x72e73b5d8c404595UL, 0x6135fa4954b72f27UL,
- /* 144 */ 0xccfc32a2de24b69cUL, 0x3f55698c1f095d88UL,
- 0xbe3350ed5ac3f929UL, 0x5e9bf806ca477eebUL,
- /* 145 */ 0xe9ce8fb63c309f68UL, 0x5376f63565e1f9f4UL,
- 0xd1afcfb35a6393f1UL, 0x6632a1ede5623506UL,
- /* 146 */ 0x0b7d6c390c2ded4cUL, 0x56cb3281df04cb1fUL,
- 0x66305a1249ecc3c7UL, 0x5d588b60a38ca72aUL,
- /* 147 */ 0xa6ecbf78e8e5f42dUL, 0x86eeb44b3c8a3eecUL,
- 0xec219c48fbd21604UL, 0x1aaf1af517c36731UL,
- /* 148 */ 0xc306a2836769bde7UL, 0x208280622b1e2adbUL,
- 0x8027f51ffbff94a6UL, 0x76cfa1ce1124f26bUL,
- /* 149 */ 0x18eb00562422abb6UL, 0xf377c4d58f8c29c3UL,
- 0x4dbbc207f531561aUL, 0x0253b7f082128a27UL,
- /* 150 */ 0x3d1f091cb62c17e0UL, 0x4860e1abd64628a9UL,
- 0x52d17436309d4253UL, 0x356f97e13efae576UL,
- /* 151 */ 0xd351e11aa150535bUL, 0x3e6b45bb1dd878ccUL,
- 0x0c776128bed92c98UL, 0x1d34ae93032885b8UL,
- /* 152 */ 0x4ba0488ca85ba4c3UL, 0x985348c33c9ce6ceUL,
- 0x66124c6f97bda770UL, 0x0f81a0290654124aUL,
- /* 153 */ 0x9ed09ca6569b86fdUL, 0x811009fd18af9a2dUL,
- 0xff08d03f93d8c20aUL, 0x52a148199faef26bUL,
- /* 154 */ 0x3e03f9dc2d8d1b73UL, 0x4205801873961a70UL,
- 0xc0d987f041a35970UL, 0x07aa1f15a1c0d549UL,
- /* 155 */ 0xdfd46ce08cd27224UL, 0x6d0a024f934e4239UL,
- 0x808a7a6399897b59UL, 0x0a4556e9e13d95a2UL,
- /* 156 */ 0xd21a991fe9c13045UL, 0x9b0e8548fe7751b8UL,
- 0x5da643cb4bf30035UL, 0x77db28d63940f721UL,
- /* 157 */ 0xfc5eeb614adc9011UL, 0x5229419ae8c411ebUL,
- 0x9ec3e7787d1dcf74UL, 0x340d053e216e4cb5UL,
- /* 158 */ 0xcac7af39b48df2b4UL, 0xc0faec2871a10a94UL,
- 0x140a69245ca575edUL, 0x0cf1c37134273a4cUL,
- /* 159 */ 0xc8ee306ac224b8a5UL, 0x57eaee7ccb4930b0UL,
- 0xa1e806bdaacbe74fUL, 0x7d9a62742eeb657dUL,
- /* 160 */ 0x9eb6b6ef546c4830UL, 0x885cca1fddb36e2eUL,
- 0xe6b9f383ef0d7105UL, 0x58654fef9d2e0412UL,
- /* 161 */ 0xa905c4ffbe0e8e26UL, 0x942de5df9b31816eUL,
- 0x497d723f802e88e1UL, 0x30684dea602f408dUL,
- /* 162 */ 0x21e5a278a3e6cb34UL, 0xaefb6e6f5b151dc4UL,
- 0xb30b8e049d77ca15UL, 0x28c3c9cf53b98981UL,
- /* 163 */ 0x287fb721556cdd2aUL, 0x0d317ca897022274UL,
- 0x7468c7423a543258UL, 0x4a7f11464eb5642fUL,
- /* 164 */ 0xa237a4774d193aa6UL, 0xd865986ea92129a1UL,
- 0x24c515ecf87c1a88UL, 0x604003575f39f5ebUL,
- /* 165 */ 0x47b9f189570a9b27UL, 0x2b98cede465e4b78UL,
- 0x026df551dbb85c20UL, 0x74fcd91047e21901UL,
- /* 166 */ 0x13e2a90a23c1bfa3UL, 0x0cb0074e478519f6UL,
- 0x5ff1cbbe3af6cf44UL, 0x67fe5438be812dbeUL,
- /* 167 */ 0xd13cf64fa40f05b0UL, 0x054dfb2f32283787UL,
- 0x4173915b7f0d2aeaUL, 0x482f144f1f610d4eUL,
- /* 168 */ 0xf6210201b47f8234UL, 0x5d0ae1929e70b990UL,
- 0xdcd7f455b049567cUL, 0x7e93d0f1f0916f01UL,
- /* 169 */ 0xdd79cbf18a7db4faUL, 0xbe8391bf6f74c62fUL,
- 0x027145d14b8291bdUL, 0x585a73ea2cbf1705UL,
- /* 170 */ 0x485ca03e928a0db2UL, 0x10fc01a5742857e7UL,
- 0x2f482edbd6d551a7UL, 0x0f0433b5048fdb8aUL,
- /* 171 */ 0x60da2e8dd7dc6247UL, 0x88b4c9d38cd4819aUL,
- 0x13033ac001f66697UL, 0x273b24fe3b367d75UL,
- /* 172 */ 0xc6e8f66a31b3b9d4UL, 0x281514a494df49d5UL,
- 0xd1726fdfc8b23da7UL, 0x4b3ae7d103dee548UL,
- /* 173 */ 0xc6256e19ce4b9d7eUL, 0xff5c5cf186e3c61cUL,
- 0xacc63ca34b8ec145UL, 0x74621888fee66574UL,
- /* 174 */ 0x956f409645290a1eUL, 0xef0bf8e3263a962eUL,
- 0xed6a50eb5ec2647bUL, 0x0694283a9dca7502UL,
- /* 175 */ 0x769b963643a2dcd1UL, 0x42b7c8ea09fc5353UL,
- 0x4f002aee13397eabUL, 0x63005e2c19b7d63aUL,
- /* 176 */ 0xca6736da63023beaUL, 0x966c7f6db12a99b7UL,
- 0xace09390c537c5e1UL, 0x0b696063a1aa89eeUL,
- /* 177 */ 0xebb03e97288c56e5UL, 0x432a9f9f938c8be8UL,
- 0xa6a5a93d5b717f71UL, 0x1a5fb4c3e18f9d97UL,
- /* 178 */ 0x1c94e7ad1c60cdceUL, 0xee202a43fc02c4a0UL,
- 0x8dafe4d867c46a20UL, 0x0a10263c8ac27b58UL,
- /* 179 */ 0xd0dea9dfe4432a4aUL, 0x856af87bbe9277c5UL,
- 0xce8472acc212c71aUL, 0x6f151b6d9bbb1e91UL,
- /* 180 */ 0x26776c527ceed56aUL, 0x7d211cb7fbf8faecUL,
- 0x37ae66a6fd4609ccUL, 0x1f81b702d2770c42UL,
- /* 181 */ 0x2fb0b057eac58392UL, 0xe1dd89fe29744e9dUL,
- 0xc964f8eb17beb4f8UL, 0x29571073c9a2d41eUL,
- /* 182 */ 0xa948a18981c0e254UL, 0x2df6369b65b22830UL,
- 0xa33eb2d75fcfd3c6UL, 0x078cd6ec4199a01fUL,
- /* 183 */ 0x4a584a41ad900d2fUL, 0x32142b78e2c74c52UL,
- 0x68c4e8338431c978UL, 0x7f69ea9008689fc2UL,
- /* 184 */ 0x52f2c81e46a38265UL, 0xfd78072d04a832fdUL,
- 0x8cd7d5fa25359e94UL, 0x4de71b7454cc29d2UL,
- /* 185 */ 0x42eb60ad1eda6ac9UL, 0x0aad37dfdbc09c3aUL,
- 0x81004b71e33cc191UL, 0x44e6be345122803cUL,
- /* 186 */ 0x03fe8388ba1920dbUL, 0xf5d57c32150db008UL,
- 0x49c8c4281af60c29UL, 0x21edb518de701aeeUL,
- /* 187 */ 0x7fb63e418f06dc99UL, 0xa4460d99c166d7b8UL,
- 0x24dd5248ce520a83UL, 0x5ec3ad712b928358UL,
- /* 188 */ 0x15022a5fbd17930fUL, 0xa4f64a77d82570e3UL,
- 0x12bc8d6915783712UL, 0x498194c0fc620abbUL,
- /* 189 */ 0x38a2d9d255686c82UL, 0x785c6bd9193e21f0UL,
- 0xe4d5c81ab24a5484UL, 0x56307860b2e20989UL,
- /* 190 */ 0x429d55f78b4d74c4UL, 0x22f1834643350131UL,
- 0x1e60c24598c71fffUL, 0x59f2f014979983efUL,
- /* 191 */ 0x46a47d56eb494a44UL, 0x3e22a854d636a18eUL,
- 0xb346e15274491c3bUL, 0x2ceafd4e5390cde7UL,
- /* 192 */ 0xba8a8538be0d6675UL, 0x4b9074bb50818e23UL,
- 0xcbdab89085d304c3UL, 0x61a24fe0e56192c4UL,
- /* 193 */ 0xcb7615e6db525bcbUL, 0xdd7d8c35a567e4caUL,
- 0xe6b4153acafcdd69UL, 0x2d668e097f3c9766UL,
- /* 194 */ 0xa57e7e265ce55ef0UL, 0x5d9f4e527cd4b967UL,
- 0xfbc83606492fd1e5UL, 0x090d52beb7c3f7aeUL,
- /* 195 */ 0x09b9515a1e7b4d7cUL, 0x1f266a2599da44c0UL,
- 0xa1c49548e2c55504UL, 0x7ef04287126f15ccUL,
- /* 196 */ 0xfed1659dbd30ef15UL, 0x8b4ab9eec4e0277bUL,
- 0x884d6236a5df3291UL, 0x1fd96ea6bf5cf788UL,
- /* 197 */ 0x42a161981f190d9aUL, 0x61d849507e6052c1UL,
- 0x9fe113bf285a2cd5UL, 0x7c22d676dbad85d8UL,
- /* 198 */ 0x82e770ed2bfbd27dUL, 0x4c05b2ece996f5a5UL,
- 0xcd40a9c2b0900150UL, 0x5895319213d9bf64UL,
- /* 199 */ 0xe7cc5d703fea2e08UL, 0xb50c491258e2188cUL,
- 0xcce30baa48205bf0UL, 0x537c659ccfa32d62UL,
- /* 200 */ 0x37b6623a98cfc088UL, 0xfe9bed1fa4d6aca4UL,
- 0x04d29b8e56a8d1b0UL, 0x725f71c40b519575UL,
- /* 201 */ 0x28c7f89cd0339ce6UL, 0x8367b14469ddc18bUL,
- 0x883ada83a6a1652cUL, 0x585f1974034d6c17UL,
- /* 202 */ 0x89cfb266f1b19188UL, 0xe63b4863e7c35217UL,
- 0xd88c9da6b4c0526aUL, 0x3e035c9df0954635UL,
- /* 203 */ 0xdd9d5412fb45de9dUL, 0xdd684532e4cff40dUL,
- 0x4b5c999b151d671cUL, 0x2d8c2cc811e7f690UL,
- /* 204 */ 0x7f54be1d90055d40UL, 0xa464c5df464aaf40UL,
- 0x33979624f0e917beUL, 0x2c018dc527356b30UL,
- /* 205 */ 0xa5415024e330b3d4UL, 0x73ff3d96691652d3UL,
- 0x94ec42c4ef9b59f1UL, 0x0747201618d08e5aUL,
- /* 206 */ 0x4d6ca48aca411c53UL, 0x66415f2fcfa66119UL,
- 0x9c4dd40051e227ffUL, 0x59810bc09a02f7ebUL,
- /* 207 */ 0x2a7eb171b3dc101dUL, 0x441c5ab99ffef68eUL,
- 0x32025c9b93b359eaUL, 0x5e8ce0a71e9d112fUL,
- /* 208 */ 0xbfcccb92429503fdUL, 0xd271ba752f095d55UL,
- 0x345ead5e972d091eUL, 0x18c8df11a83103baUL,
- /* 209 */ 0x90cd949a9aed0f4cUL, 0xc5d1f4cb6660e37eUL,
- 0xb8cac52d56c52e0bUL, 0x6e42e400c5808e0dUL,
- /* 210 */ 0xa3b46966eeaefd23UL, 0x0c4f1f0be39ecdcaUL,
- 0x189dc8c9d683a51dUL, 0x51f27f054c09351bUL,
- /* 211 */ 0x4c487ccd2a320682UL, 0x587ea95bb3df1c96UL,
- 0xc8ccf79e555cb8e8UL, 0x547dc829a206d73dUL,
- /* 212 */ 0xb822a6cd80c39b06UL, 0xe96d54732000d4c6UL,
- 0x28535b6f91463b4dUL, 0x228f4660e2486e1dUL,
- /* 213 */ 0x98799538de8d3abfUL, 0x8cd8330045ebca6eUL,
- 0x79952a008221e738UL, 0x4322e1a7535cd2bbUL,
- /* 214 */ 0xb114c11819d1801cUL, 0x2016e4d84f3f5ec7UL,
- 0xdd0e2df409260f4cUL, 0x5ec362c0ae5f7266UL,
- /* 215 */ 0xc0462b18b8b2b4eeUL, 0x7cc8d950274d1afbUL,
- 0xf25f7105436b02d2UL, 0x43bbf8dcbff9ccd3UL,
- /* 216 */ 0xb6ad1767a039e9dfUL, 0xb0714da8f69d3583UL,
- 0x5e55fa18b42931f5UL, 0x4ed5558f33c60961UL,
- /* 217 */ 0x1fe37901c647a5ddUL, 0x593ddf1f8081d357UL,
- 0x0249a4fd813fd7a6UL, 0x69acca274e9caf61UL,
- /* 218 */ 0x047ba3ea330721c9UL, 0x83423fc20e7e1ea0UL,
- 0x1df4c0af01314a60UL, 0x09a62dab89289527UL,
- /* 219 */ 0xa5b325a49cc6cb00UL, 0xe94b5dc654b56cb6UL,
- 0x3be28779adc994a0UL, 0x4296e8f8ba3a4aadUL,
- /* 220 */ 0x328689761e451eabUL, 0x2e4d598bff59594aUL,
- 0x49b96853d7a7084aUL, 0x4980a319601420a8UL,
- /* 221 */ 0x9565b9e12f552c42UL, 0x8a5318db7100fe96UL,
- 0x05c90b4d43add0d7UL, 0x538b4cd66a5d4edaUL,
- /* 222 */ 0xf4e94fc3e89f039fUL, 0x592c9af26f618045UL,
- 0x08a36eb5fd4b9550UL, 0x25fffaf6c2ed1419UL,
- /* 223 */ 0x34434459cc79d354UL, 0xeeecbfb4b1d5476bUL,
- 0xddeb34a061615d99UL, 0x5129cecceb64b773UL,
- /* 224 */ 0xee43215894993520UL, 0x772f9c7cf14c0b3bUL,
- 0xd2e2fce306bedad5UL, 0x715f42b546f06a97UL,
- /* 225 */ 0x434ecdceda5b5f1aUL, 0x0da17115a49741a9UL,
- 0x680bd77c73edad2eUL, 0x487c02354edd9041UL,
- /* 226 */ 0xb8efeff3a70ed9c4UL, 0x56a32aa3e857e302UL,
- 0xdf3a68bd48a2a5a0UL, 0x07f650b73176c444UL,
- /* 227 */ 0xe38b9b1626e0ccb1UL, 0x79e053c18b09fb36UL,
- 0x56d90319c9f94964UL, 0x1ca941e7ac9ff5c4UL,
- /* 228 */ 0x49c4df29162fa0bbUL, 0x8488cf3282b33305UL,
- 0x95dfda14cabb437dUL, 0x3391f78264d5ad86UL,
- /* 229 */ 0x729ae06ae2b5095dUL, 0xd58a58d73259a946UL,
- 0xe9834262d13921edUL, 0x27fedafaa54bb592UL,
- /* 230 */ 0xa99dc5b829ad48bbUL, 0x5f025742499ee260UL,
- 0x802c8ecd5d7513fdUL, 0x78ceb3ef3f6dd938UL,
- /* 231 */ 0xc342f44f8a135d94UL, 0x7b9edb44828cdda3UL,
- 0x9436d11a0537cfe7UL, 0x5064b164ec1ab4c8UL,
- /* 232 */ 0x7020eccfd37eb2fcUL, 0x1f31ea3ed90d25fcUL,
- 0x1b930d7bdfa1bb34UL, 0x5344467a48113044UL,
- /* 233 */ 0x70073170f25e6dfbUL, 0xe385dc1a50114cc8UL,
- 0x2348698ac8fc4f00UL, 0x2a77a55284dd40d8UL,
- /* 234 */ 0xfe06afe0c98c6ce4UL, 0xc235df96dddfd6e4UL,
- 0x1428d01e33bf1ed3UL, 0x785768ec9300bdafUL,
- /* 235 */ 0x9702e57a91deb63bUL, 0x61bdb8bfe5ce8b80UL,
- 0x645b426f3d1d58acUL, 0x4804a82227a557bcUL,
- /* 236 */ 0x8e57048ab44d2601UL, 0x68d6501a4b3a6935UL,
- 0xc39c9ec3f9e1c293UL, 0x4172f257d4de63e2UL,
- /* 237 */ 0xd368b450330c6401UL, 0x040d3017418f2391UL,
- 0x2c34bb6090b7d90dUL, 0x16f649228fdfd51fUL,
- /* 238 */ 0xbea6818e2b928ef5UL, 0xe28ccf91cdc11e72UL,
- 0x594aaa68e77a36cdUL, 0x313034806c7ffd0fUL,
- /* 239 */ 0x8a9d27ac2249bd65UL, 0x19a3b464018e9512UL,
- 0xc26ccff352b37ec7UL, 0x056f68341d797b21UL,
- /* 240 */ 0x5e79d6757efd2327UL, 0xfabdbcb6553afe15UL,
- 0xd3e7222c6eaf5a60UL, 0x7046c76d4dae743bUL,
- /* 241 */ 0x660be872b18d4a55UL, 0x19992518574e1496UL,
- 0xc103053a302bdcbbUL, 0x3ed8e9800b218e8eUL,
- /* 242 */ 0x7b0b9239fa75e03eUL, 0xefe9fb684633c083UL,
- 0x98a35fbe391a7793UL, 0x6065510fe2d0fe34UL,
- /* 243 */ 0x55cb668548abad0cUL, 0xb4584548da87e527UL,
- 0x2c43ecea0107c1ddUL, 0x526028809372de35UL,
- /* 244 */ 0x3415c56af9213b1fUL, 0x5bee1a4d017e98dbUL,
- 0x13f6b105b5cf709bUL, 0x5ff20e3482b29ab6UL,
- /* 245 */ 0x0aa29c75cc2e6c90UL, 0xfc7d73ca3a70e206UL,
- 0x899fc38fc4b5c515UL, 0x250386b124ffc207UL,
- /* 246 */ 0x54ea28d5ae3d2b56UL, 0x9913149dd6de60ceUL,
- 0x16694fc58f06d6c1UL, 0x46b23975eb018fc7UL,
- /* 247 */ 0x470a6a0fb4b7b4e2UL, 0x5d92475a8f7253deUL,
- 0xabeee5b52fbd3adbUL, 0x7fa20801a0806968UL,
- /* 248 */ 0x76f3faf19f7714d2UL, 0xb3e840c12f4660c3UL,
- 0x0fb4cd8df212744eUL, 0x4b065a251d3a2dd2UL,
- /* 249 */ 0x5cebde383d77cd4aUL, 0x6adf39df882c9cb1UL,
- 0xa2dd242eb09af759UL, 0x3147c0e50e5f6422UL,
- /* 250 */ 0x164ca5101d1350dbUL, 0xf8d13479c33fc962UL,
- 0xe640ce4d13e5da08UL, 0x4bdee0c45061f8baUL,
- /* 251 */ 0xd7c46dc1a4edb1c9UL, 0x5514d7b6437fd98aUL,
- 0x58942f6bb2a1c00bUL, 0x2dffb2ab1d70710eUL,
- /* 252 */ 0xccdfcf2fc18b6d68UL, 0xa8ebcba8b7806167UL,
- 0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL
-};
-
-/* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7]
- * a is two 256-bit integers: a0[0:3] and a1[4:7]
- * b is two 256-bit integers: b0[0:3] and b1[4:7]
- */
-static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a,
- const u64 *const b)
-{
- asm volatile(
- "xorl %%r14d, %%r14d ;"
- "movq (%1), %%rdx; " /* A[0] */
- "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
- "xorl %%r10d, %%r10d ;"
- "movq %%r8, (%0) ;"
- "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
- "adox %%r10, %%r15 ;"
- "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
- "adox %%r8, %%rax ;"
- "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
- "adox %%r10, %%rbx ;"
- /******************************************/
- "adox %%r14, %%rcx ;"
-
- "movq 8(%1), %%rdx; " /* A[1] */
- "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
- "adox %%r15, %%r8 ;"
- "movq %%r8, 8(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
- "adox %%r10, %%r9 ;"
- "adcx %%r9, %%rax ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
- "adox %%r8, %%r11 ;"
- "adcx %%r11, %%rbx ;"
- "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
- "adox %%r10, %%r13 ;"
- "adcx %%r13, %%rcx ;"
- /******************************************/
- "adox %%r14, %%r15 ;"
- "adcx %%r14, %%r15 ;"
-
- "movq 16(%1), %%rdx; " /* A[2] */
- "xorl %%r10d, %%r10d ;"
- "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
- "adox %%rax, %%r8 ;"
- "movq %%r8, 16(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
- "adox %%r10, %%r9 ;"
- "adcx %%r9, %%rbx ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
- "adox %%r8, %%r11 ;"
- "adcx %%r11, %%rcx ;"
- "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
- "adox %%r10, %%r13 ;"
- "adcx %%r13, %%r15 ;"
- /******************************************/
- "adox %%r14, %%rax ;"
- "adcx %%r14, %%rax ;"
-
- "movq 24(%1), %%rdx; " /* A[3] */
- "xorl %%r10d, %%r10d ;"
- "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
- "adox %%rbx, %%r8 ;"
- "movq %%r8, 24(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
- "adox %%r10, %%r9 ;"
- "adcx %%r9, %%rcx ;"
- "movq %%rcx, 32(%0) ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
- "adox %%r8, %%r11 ;"
- "adcx %%r11, %%r15 ;"
- "movq %%r15, 40(%0) ;"
- "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
- "adox %%r10, %%r13 ;"
- "adcx %%r13, %%rax ;"
- "movq %%rax, 48(%0) ;"
- /******************************************/
- "adox %%r14, %%rbx ;"
- "adcx %%r14, %%rbx ;"
- "movq %%rbx, 56(%0) ;"
-
- "movq 32(%1), %%rdx; " /* C[0] */
- "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */
- "xorl %%r10d, %%r10d ;"
- "movq %%r8, 64(%0);"
- "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
- "adox %%r10, %%r15 ;"
- "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */
- "adox %%r8, %%rax ;"
- "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
- "adox %%r10, %%rbx ;"
- /******************************************/
- "adox %%r14, %%rcx ;"
-
- "movq 40(%1), %%rdx; " /* C[1] */
- "xorl %%r10d, %%r10d ;"
- "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */
- "adox %%r15, %%r8 ;"
- "movq %%r8, 72(%0);"
- "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
- "adox %%r10, %%r9 ;"
- "adcx %%r9, %%rax ;"
- "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */
- "adox %%r8, %%r11 ;"
- "adcx %%r11, %%rbx ;"
- "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
- "adox %%r10, %%r13 ;"
- "adcx %%r13, %%rcx ;"
- /******************************************/
- "adox %%r14, %%r15 ;"
- "adcx %%r14, %%r15 ;"
-
- "movq 48(%1), %%rdx; " /* C[2] */
- "xorl %%r10d, %%r10d ;"
- "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */
- "adox %%rax, %%r8 ;"
- "movq %%r8, 80(%0);"
- "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
- "adox %%r10, %%r9 ;"
- "adcx %%r9, %%rbx ;"
- "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */
- "adox %%r8, %%r11 ;"
- "adcx %%r11, %%rcx ;"
- "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
- "adox %%r10, %%r13 ;"
- "adcx %%r13, %%r15 ;"
- /******************************************/
- "adox %%r14, %%rax ;"
- "adcx %%r14, %%rax ;"
-
- "movq 56(%1), %%rdx; " /* C[3] */
- "xorl %%r10d, %%r10d ;"
- "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */
- "adox %%rbx, %%r8 ;"
- "movq %%r8, 88(%0);"
- "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
- "adox %%r10, %%r9 ;"
- "adcx %%r9, %%rcx ;"
- "movq %%rcx, 96(%0) ;"
- "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */
- "adox %%r8, %%r11 ;"
- "adcx %%r11, %%r15 ;"
- "movq %%r15, 104(%0) ;"
- "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
- "adox %%r10, %%r13 ;"
- "adcx %%r13, %%rax ;"
- "movq %%rax, 112(%0) ;"
- /******************************************/
- "adox %%r14, %%rbx ;"
- "adcx %%r14, %%rbx ;"
- "movq %%rbx, 120(%0) ;"
- :
- : "r"(c), "r"(a), "r"(b)
- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
- "%r10", "%r11", "%r13", "%r14", "%r15");
-}
-
-static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a,
- const u64 *const b)
+static __always_inline u64 eq_mask(u64 a, u64 b)
{
- asm volatile(
- "movq (%1), %%rdx; " /* A[0] */
- "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
- "movq %%r8, (%0) ;"
- "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
- "addq %%r10, %%r15 ;"
- "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
- "adcq %%r8, %%rax ;"
- "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
- "adcq %%r10, %%rbx ;"
- /******************************************/
- "adcq $0, %%rcx ;"
-
- "movq 8(%1), %%rdx; " /* A[1] */
- "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
- "addq %%r15, %%r8 ;"
- "movq %%r8, 8(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%r15 ;"
-
- "addq %%r9, %%rax ;"
- "adcq %%r11, %%rbx ;"
- "adcq %%r13, %%rcx ;"
- "adcq $0, %%r15 ;"
-
- "movq 16(%1), %%rdx; " /* A[2] */
- "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
- "addq %%rax, %%r8 ;"
- "movq %%r8, 16(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%rax ;"
-
- "addq %%r9, %%rbx ;"
- "adcq %%r11, %%rcx ;"
- "adcq %%r13, %%r15 ;"
- "adcq $0, %%rax ;"
-
- "movq 24(%1), %%rdx; " /* A[3] */
- "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
- "addq %%rbx, %%r8 ;"
- "movq %%r8, 24(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%rbx ;"
-
- "addq %%r9, %%rcx ;"
- "movq %%rcx, 32(%0) ;"
- "adcq %%r11, %%r15 ;"
- "movq %%r15, 40(%0) ;"
- "adcq %%r13, %%rax ;"
- "movq %%rax, 48(%0) ;"
- "adcq $0, %%rbx ;"
- "movq %%rbx, 56(%0) ;"
-
- "movq 32(%1), %%rdx; " /* C[0] */
- "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */
- "movq %%r8, 64(%0) ;"
- "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
- "addq %%r10, %%r15 ;"
- "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */
- "adcq %%r8, %%rax ;"
- "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
- "adcq %%r10, %%rbx ;"
- /******************************************/
- "adcq $0, %%rcx ;"
-
- "movq 40(%1), %%rdx; " /* C[1] */
- "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */
- "addq %%r15, %%r8 ;"
- "movq %%r8, 72(%0) ;"
- "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%r15 ;"
-
- "addq %%r9, %%rax ;"
- "adcq %%r11, %%rbx ;"
- "adcq %%r13, %%rcx ;"
- "adcq $0, %%r15 ;"
-
- "movq 48(%1), %%rdx; " /* C[2] */
- "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */
- "addq %%rax, %%r8 ;"
- "movq %%r8, 80(%0) ;"
- "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%rax ;"
-
- "addq %%r9, %%rbx ;"
- "adcq %%r11, %%rcx ;"
- "adcq %%r13, %%r15 ;"
- "adcq $0, %%rax ;"
-
- "movq 56(%1), %%rdx; " /* C[3] */
- "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */
- "addq %%rbx, %%r8 ;"
- "movq %%r8, 88(%0) ;"
- "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%rbx ;"
-
- "addq %%r9, %%rcx ;"
- "movq %%rcx, 96(%0) ;"
- "adcq %%r11, %%r15 ;"
- "movq %%r15, 104(%0) ;"
- "adcq %%r13, %%rax ;"
- "movq %%rax, 112(%0) ;"
- "adcq $0, %%rbx ;"
- "movq %%rbx, 120(%0) ;"
- :
- : "r"(c), "r"(a), "r"(b)
- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
- "%r10", "%r11", "%r13", "%r15");
+ u64 x = a ^ b;
+ u64 minus_x = ~x + (u64)1U;
+ u64 x_or_minus_x = x | minus_x;
+ u64 xnx = x_or_minus_x >> (u32)63U;
+ return xnx - (u64)1U;
}
-static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a)
+static __always_inline u64 gte_mask(u64 a, u64 b)
{
- asm volatile(
- "movq (%1), %%rdx ;" /* A[0] */
- "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */
- "xorl %%r15d, %%r15d;"
- "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */
- "adcx %%r14, %%r9 ;"
- "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
- "adcx %%rax, %%r10 ;"
- "movq 24(%1), %%rdx ;" /* A[3] */
- "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
- "adcx %%rcx, %%r11 ;"
- "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
- "adcx %%rax, %%rbx ;"
- "movq 8(%1), %%rdx ;" /* A[1] */
- "adcx %%r15, %%r13 ;"
- "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
- "movq $0, %%r14 ;"
- /******************************************/
- "adcx %%r15, %%r14 ;"
-
- "xorl %%r15d, %%r15d;"
- "adox %%rax, %%r10 ;"
- "adcx %%r8, %%r8 ;"
- "adox %%rcx, %%r11 ;"
- "adcx %%r9, %%r9 ;"
- "adox %%r15, %%rbx ;"
- "adcx %%r10, %%r10 ;"
- "adox %%r15, %%r13 ;"
- "adcx %%r11, %%r11 ;"
- "adox %%r15, %%r14 ;"
- "adcx %%rbx, %%rbx ;"
- "adcx %%r13, %%r13 ;"
- "adcx %%r14, %%r14 ;"
-
- "movq (%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
- /*******************/
- "movq %%rax, 0(%0) ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, 8(%0) ;"
- "movq 8(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
- "adcq %%rax, %%r9 ;"
- "movq %%r9, 16(%0) ;"
- "adcq %%rcx, %%r10 ;"
- "movq %%r10, 24(%0) ;"
- "movq 16(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
- "adcq %%rax, %%r11 ;"
- "movq %%r11, 32(%0) ;"
- "adcq %%rcx, %%rbx ;"
- "movq %%rbx, 40(%0) ;"
- "movq 24(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
- "adcq %%rax, %%r13 ;"
- "movq %%r13, 48(%0) ;"
- "adcq %%rcx, %%r14 ;"
- "movq %%r14, 56(%0) ;"
-
-
- "movq 32(%1), %%rdx ;" /* B[0] */
- "mulx 40(%1), %%r8, %%r14 ;" /* B[1]*B[0] */
- "xorl %%r15d, %%r15d;"
- "mulx 48(%1), %%r9, %%r10 ;" /* B[2]*B[0] */
- "adcx %%r14, %%r9 ;"
- "mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */
- "adcx %%rax, %%r10 ;"
- "movq 56(%1), %%rdx ;" /* B[3] */
- "mulx 40(%1), %%r11, %%rbx ;" /* B[1]*B[3] */
- "adcx %%rcx, %%r11 ;"
- "mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */
- "adcx %%rax, %%rbx ;"
- "movq 40(%1), %%rdx ;" /* B[1] */
- "adcx %%r15, %%r13 ;"
- "mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */
- "movq $0, %%r14 ;"
- /******************************************/
- "adcx %%r15, %%r14 ;"
-
- "xorl %%r15d, %%r15d;"
- "adox %%rax, %%r10 ;"
- "adcx %%r8, %%r8 ;"
- "adox %%rcx, %%r11 ;"
- "adcx %%r9, %%r9 ;"
- "adox %%r15, %%rbx ;"
- "adcx %%r10, %%r10 ;"
- "adox %%r15, %%r13 ;"
- "adcx %%r11, %%r11 ;"
- "adox %%r15, %%r14 ;"
- "adcx %%rbx, %%rbx ;"
- "adcx %%r13, %%r13 ;"
- "adcx %%r14, %%r14 ;"
-
- "movq 32(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */
- /*******************/
- "movq %%rax, 64(%0) ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, 72(%0) ;"
- "movq 40(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */
- "adcq %%rax, %%r9 ;"
- "movq %%r9, 80(%0) ;"
- "adcq %%rcx, %%r10 ;"
- "movq %%r10, 88(%0) ;"
- "movq 48(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */
- "adcq %%rax, %%r11 ;"
- "movq %%r11, 96(%0) ;"
- "adcq %%rcx, %%rbx ;"
- "movq %%rbx, 104(%0) ;"
- "movq 56(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */
- "adcq %%rax, %%r13 ;"
- "movq %%r13, 112(%0) ;"
- "adcq %%rcx, %%r14 ;"
- "movq %%r14, 120(%0) ;"
- :
- : "r"(c), "r"(a)
- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
- "%r10", "%r11", "%r13", "%r14", "%r15");
+ u64 x = a;
+ u64 y = b;
+ u64 x_xor_y = x ^ y;
+ u64 x_sub_y = x - y;
+ u64 x_sub_y_xor_y = x_sub_y ^ y;
+ u64 q = x_xor_y | x_sub_y_xor_y;
+ u64 x_xor_q = x ^ q;
+ u64 x_xor_q_ = x_xor_q >> (u32)63U;
+ return x_xor_q_ - (u64)1U;
}
-static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a)
+/* Computes the addition of four-element f1 with value in f2
+ * and returns the carry (if any) */
+static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2)
{
- asm volatile(
- "movq 8(%1), %%rdx ;" /* A[1] */
- "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */
- "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
- "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
-
- "movq 16(%1), %%rdx ;" /* A[2] */
- "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
- "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
-
- "addq %%rax, %%r9 ;"
- "adcq %%rdx, %%r10 ;"
- "adcq %%rcx, %%r11 ;"
- "adcq %%r14, %%r15 ;"
- "adcq $0, %%r13 ;"
- "movq $0, %%r14 ;"
- "adcq $0, %%r14 ;"
-
- "movq (%1), %%rdx ;" /* A[0] */
- "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
-
- "addq %%rax, %%r10 ;"
- "adcq %%rcx, %%r11 ;"
- "adcq $0, %%r15 ;"
- "adcq $0, %%r13 ;"
- "adcq $0, %%r14 ;"
-
- "shldq $1, %%r13, %%r14 ;"
- "shldq $1, %%r15, %%r13 ;"
- "shldq $1, %%r11, %%r15 ;"
- "shldq $1, %%r10, %%r11 ;"
- "shldq $1, %%r9, %%r10 ;"
- "shldq $1, %%r8, %%r9 ;"
- "shlq $1, %%r8 ;"
-
- /*******************/
- "mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */
- /*******************/
- "movq %%rax, 0(%0) ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, 8(%0) ;"
- "movq 8(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */
- "adcq %%rax, %%r9 ;"
- "movq %%r9, 16(%0) ;"
- "adcq %%rcx, %%r10 ;"
- "movq %%r10, 24(%0) ;"
- "movq 16(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */
- "adcq %%rax, %%r11 ;"
- "movq %%r11, 32(%0) ;"
- "adcq %%rcx, %%r15 ;"
- "movq %%r15, 40(%0) ;"
- "movq 24(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */
- "adcq %%rax, %%r13 ;"
- "movq %%r13, 48(%0) ;"
- "adcq %%rcx, %%r14 ;"
- "movq %%r14, 56(%0) ;"
-
- "movq 40(%1), %%rdx ;" /* B[1] */
- "mulx 32(%1), %%r8, %%r9 ;" /* B[0]*B[1] */
- "mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */
- "mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */
-
- "movq 48(%1), %%rdx ;" /* B[2] */
- "mulx 56(%1), %%r15, %%r13 ;" /* B[3]*B[2] */
- "mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */
-
- "addq %%rax, %%r9 ;"
- "adcq %%rdx, %%r10 ;"
- "adcq %%rcx, %%r11 ;"
- "adcq %%r14, %%r15 ;"
- "adcq $0, %%r13 ;"
- "movq $0, %%r14 ;"
- "adcq $0, %%r14 ;"
-
- "movq 32(%1), %%rdx ;" /* B[0] */
- "mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */
-
- "addq %%rax, %%r10 ;"
- "adcq %%rcx, %%r11 ;"
- "adcq $0, %%r15 ;"
- "adcq $0, %%r13 ;"
- "adcq $0, %%r14 ;"
-
- "shldq $1, %%r13, %%r14 ;"
- "shldq $1, %%r15, %%r13 ;"
- "shldq $1, %%r11, %%r15 ;"
- "shldq $1, %%r10, %%r11 ;"
- "shldq $1, %%r9, %%r10 ;"
- "shldq $1, %%r8, %%r9 ;"
- "shlq $1, %%r8 ;"
-
- /*******************/
- "mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */
- /*******************/
- "movq %%rax, 64(%0) ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, 72(%0) ;"
- "movq 40(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */
- "adcq %%rax, %%r9 ;"
- "movq %%r9, 80(%0) ;"
- "adcq %%rcx, %%r10 ;"
- "movq %%r10, 88(%0) ;"
- "movq 48(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */
- "adcq %%rax, %%r11 ;"
- "movq %%r11, 96(%0) ;"
- "adcq %%rcx, %%r15 ;"
- "movq %%r15, 104(%0) ;"
- "movq 56(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */
- "adcq %%rax, %%r13 ;"
- "movq %%r13, 112(%0) ;"
- "adcq %%rcx, %%r14 ;"
- "movq %%r14, 120(%0) ;"
- :
- : "r"(c), "r"(a)
- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
- "%r11", "%r13", "%r14", "%r15");
-}
+ u64 carry_r;
-static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a)
-{
asm volatile(
- "movl $38, %%edx; " /* 2*c = 38 = 2^256 */
- "mulx 32(%1), %%r8, %%r10; " /* c*C[4] */
- "xorl %%ebx, %%ebx ;"
- "adox (%1), %%r8 ;"
- "mulx 40(%1), %%r9, %%r11; " /* c*C[5] */
- "adcx %%r10, %%r9 ;"
- "adox 8(%1), %%r9 ;"
- "mulx 48(%1), %%r10, %%rax; " /* c*C[6] */
- "adcx %%r11, %%r10 ;"
- "adox 16(%1), %%r10 ;"
- "mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */
- "adcx %%rax, %%r11 ;"
- "adox 24(%1), %%r11 ;"
- /***************************************/
- "adcx %%rbx, %%rcx ;"
- "adox %%rbx, %%rcx ;"
- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
- "adcx %%rcx, %%r8 ;"
- "adcx %%rbx, %%r9 ;"
- "movq %%r9, 8(%0) ;"
- "adcx %%rbx, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "adcx %%rbx, %%r11 ;"
- "movq %%r11, 24(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%edx, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, (%0) ;"
-
- "mulx 96(%1), %%r8, %%r10; " /* c*C[4] */
- "xorl %%ebx, %%ebx ;"
- "adox 64(%1), %%r8 ;"
- "mulx 104(%1), %%r9, %%r11; " /* c*C[5] */
- "adcx %%r10, %%r9 ;"
- "adox 72(%1), %%r9 ;"
- "mulx 112(%1), %%r10, %%rax; " /* c*C[6] */
- "adcx %%r11, %%r10 ;"
- "adox 80(%1), %%r10 ;"
- "mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */
- "adcx %%rax, %%r11 ;"
- "adox 88(%1), %%r11 ;"
- /****************************************/
- "adcx %%rbx, %%rcx ;"
- "adox %%rbx, %%rcx ;"
- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
- "adcx %%rcx, %%r8 ;"
- "adcx %%rbx, %%r9 ;"
- "movq %%r9, 40(%0) ;"
- "adcx %%rbx, %%r10 ;"
- "movq %%r10, 48(%0) ;"
- "adcx %%rbx, %%r11 ;"
- "movq %%r11, 56(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%edx, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, 32(%0) ;"
- :
- : "r"(c), "r"(a)
- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
- "%r10", "%r11");
-}
+ /* Clear registers to propagate the carry bit */
+ " xor %%r8, %%r8;"
+ " xor %%r9, %%r9;"
+ " xor %%r10, %%r10;"
+ " xor %%r11, %%r11;"
+ " xor %1, %1;"
+
+ /* Begin addition chain */
+ " addq 0(%3), %0;"
+ " movq %0, 0(%2);"
+ " adcxq 8(%3), %%r8;"
+ " movq %%r8, 8(%2);"
+ " adcxq 16(%3), %%r9;"
+ " movq %%r9, 16(%2);"
+ " adcxq 24(%3), %%r10;"
+ " movq %%r10, 24(%2);"
+
+ /* Return the carry bit in a register */
+ " adcx %%r11, %1;"
+ : "+&r" (f2), "=&r" (carry_r)
+ : "r" (out), "r" (f1)
+ : "%r8", "%r9", "%r10", "%r11", "memory", "cc"
+ );
-static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a)
-{
- asm volatile(
- "movl $38, %%edx ; " /* 2*c = 38 = 2^256 */
- "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
- "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
- "addq %%r10, %%r9 ;"
- "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
- "adcq %%r11, %%r10 ;"
- "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
- "adcq %%rax, %%r11 ;"
- /***************************************/
- "adcq $0, %%rcx ;"
- "addq (%1), %%r8 ;"
- "adcq 8(%1), %%r9 ;"
- "adcq 16(%1), %%r10 ;"
- "adcq 24(%1), %%r11 ;"
- "adcq $0, %%rcx ;"
- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
- "addq %%rcx, %%r8 ;"
- "adcq $0, %%r9 ;"
- "movq %%r9, 8(%0) ;"
- "adcq $0, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "adcq $0, %%r11 ;"
- "movq %%r11, 24(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%edx, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, (%0) ;"
-
- "mulx 96(%1), %%r8, %%r10 ;" /* c*C[4] */
- "mulx 104(%1), %%r9, %%r11 ;" /* c*C[5] */
- "addq %%r10, %%r9 ;"
- "mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */
- "adcq %%r11, %%r10 ;"
- "mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */
- "adcq %%rax, %%r11 ;"
- /****************************************/
- "adcq $0, %%rcx ;"
- "addq 64(%1), %%r8 ;"
- "adcq 72(%1), %%r9 ;"
- "adcq 80(%1), %%r10 ;"
- "adcq 88(%1), %%r11 ;"
- "adcq $0, %%rcx ;"
- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
- "addq %%rcx, %%r8 ;"
- "adcq $0, %%r9 ;"
- "movq %%r9, 40(%0) ;"
- "adcq $0, %%r10 ;"
- "movq %%r10, 48(%0) ;"
- "adcq $0, %%r11 ;"
- "movq %%r11, 56(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%edx, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, 32(%0) ;"
- :
- : "r"(c), "r"(a)
- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
- "%r11");
+ return carry_r;
}
-static void mul_256x256_integer_adx(u64 *const c, const u64 *const a,
- const u64 *const b)
+/* Computes the field addition of two field elements */
+static inline void fadd(u64 *out, const u64 *f1, const u64 *f2)
{
asm volatile(
- "movq (%1), %%rdx; " /* A[0] */
- "mulx (%2), %%r8, %%r9; " /* A[0]*B[0] */
- "xorl %%r10d, %%r10d ;"
- "movq %%r8, (%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[0]*B[1] */
- "adox %%r9, %%r10 ;"
- "movq %%r10, 8(%0) ;"
- "mulx 16(%2), %%r15, %%r13; " /* A[0]*B[2] */
- "adox %%r11, %%r15 ;"
- "mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */
- "adox %%r13, %%r14 ;"
- "movq $0, %%rax ;"
- /******************************************/
- "adox %%rdx, %%rax ;"
-
- "movq 8(%1), %%rdx; " /* A[1] */
- "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
- "xorl %%r10d, %%r10d ;"
- "adcx 8(%0), %%r8 ;"
- "movq %%r8, 8(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
- "adox %%r9, %%r10 ;"
- "adcx %%r15, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "mulx 16(%2), %%r15, %%r13; " /* A[1]*B[2] */
- "adox %%r11, %%r15 ;"
- "adcx %%r14, %%r15 ;"
- "movq $0, %%r8 ;"
- "mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */
- "adox %%r13, %%r14 ;"
- "adcx %%rax, %%r14 ;"
- "movq $0, %%rax ;"
- /******************************************/
- "adox %%rdx, %%rax ;"
- "adcx %%r8, %%rax ;"
-
- "movq 16(%1), %%rdx; " /* A[2] */
- "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
- "xorl %%r10d, %%r10d ;"
- "adcx 16(%0), %%r8 ;"
- "movq %%r8, 16(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
- "adox %%r9, %%r10 ;"
- "adcx %%r15, %%r10 ;"
- "movq %%r10, 24(%0) ;"
- "mulx 16(%2), %%r15, %%r13; " /* A[2]*B[2] */
- "adox %%r11, %%r15 ;"
- "adcx %%r14, %%r15 ;"
- "movq $0, %%r8 ;"
- "mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */
- "adox %%r13, %%r14 ;"
- "adcx %%rax, %%r14 ;"
- "movq $0, %%rax ;"
- /******************************************/
- "adox %%rdx, %%rax ;"
- "adcx %%r8, %%rax ;"
-
- "movq 24(%1), %%rdx; " /* A[3] */
- "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
- "xorl %%r10d, %%r10d ;"
- "adcx 24(%0), %%r8 ;"
- "movq %%r8, 24(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
- "adox %%r9, %%r10 ;"
- "adcx %%r15, %%r10 ;"
- "movq %%r10, 32(%0) ;"
- "mulx 16(%2), %%r15, %%r13; " /* A[3]*B[2] */
- "adox %%r11, %%r15 ;"
- "adcx %%r14, %%r15 ;"
- "movq %%r15, 40(%0) ;"
- "movq $0, %%r8 ;"
- "mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */
- "adox %%r13, %%r14 ;"
- "adcx %%rax, %%r14 ;"
- "movq %%r14, 48(%0) ;"
- "movq $0, %%rax ;"
- /******************************************/
- "adox %%rdx, %%rax ;"
- "adcx %%r8, %%rax ;"
- "movq %%rax, 56(%0) ;"
- :
- : "r"(c), "r"(a), "r"(b)
- : "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11",
- "%r13", "%r14", "%r15");
+ /* Compute the raw addition of f1 + f2 */
+ " movq 0(%0), %%r8;"
+ " addq 0(%2), %%r8;"
+ " movq 8(%0), %%r9;"
+ " adcxq 8(%2), %%r9;"
+ " movq 16(%0), %%r10;"
+ " adcxq 16(%2), %%r10;"
+ " movq 24(%0), %%r11;"
+ " adcxq 24(%2), %%r11;"
+
+ /* Wrap the result back into the field */
+
+ /* Step 1: Compute carry*38 */
+ " mov $0, %%rax;"
+ " mov $38, %0;"
+ " cmovc %0, %%rax;"
+
+ /* Step 2: Add carry*38 to the original sum */
+ " xor %%rcx, %%rcx;"
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
+ " movq %%r9, 8(%1);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 16(%1);"
+ " adcx %%rcx, %%r11;"
+ " movq %%r11, 24(%1);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %0, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%1);"
+ : "+&r" (f2)
+ : "r" (out), "r" (f1)
+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"
+ );
}
-static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a,
- const u64 *const b)
+/* Computes the field substraction of two field elements */
+static inline void fsub(u64 *out, const u64 *f1, const u64 *f2)
{
asm volatile(
- "movq (%1), %%rdx; " /* A[0] */
- "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
- "movq %%r8, (%0) ;"
- "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
- "addq %%r10, %%r15 ;"
- "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
- "adcq %%r8, %%rax ;"
- "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
- "adcq %%r10, %%rbx ;"
- /******************************************/
- "adcq $0, %%rcx ;"
-
- "movq 8(%1), %%rdx; " /* A[1] */
- "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
- "addq %%r15, %%r8 ;"
- "movq %%r8, 8(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%r15 ;"
-
- "addq %%r9, %%rax ;"
- "adcq %%r11, %%rbx ;"
- "adcq %%r13, %%rcx ;"
- "adcq $0, %%r15 ;"
-
- "movq 16(%1), %%rdx; " /* A[2] */
- "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
- "addq %%rax, %%r8 ;"
- "movq %%r8, 16(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%rax ;"
-
- "addq %%r9, %%rbx ;"
- "adcq %%r11, %%rcx ;"
- "adcq %%r13, %%r15 ;"
- "adcq $0, %%rax ;"
-
- "movq 24(%1), %%rdx; " /* A[3] */
- "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
- "addq %%rbx, %%r8 ;"
- "movq %%r8, 24(%0) ;"
- "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
- "adcq %%r10, %%r9 ;"
- "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
- "adcq %%r8, %%r11 ;"
- "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
- "adcq %%r10, %%r13 ;"
- /******************************************/
- "adcq $0, %%rbx ;"
-
- "addq %%r9, %%rcx ;"
- "movq %%rcx, 32(%0) ;"
- "adcq %%r11, %%r15 ;"
- "movq %%r15, 40(%0) ;"
- "adcq %%r13, %%rax ;"
- "movq %%rax, 48(%0) ;"
- "adcq $0, %%rbx ;"
- "movq %%rbx, 56(%0) ;"
- :
- : "r"(c), "r"(a), "r"(b)
- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
- "%r10", "%r11", "%r13", "%r15");
+ /* Compute the raw substraction of f1-f2 */
+ " movq 0(%1), %%r8;"
+ " subq 0(%2), %%r8;"
+ " movq 8(%1), %%r9;"
+ " sbbq 8(%2), %%r9;"
+ " movq 16(%1), %%r10;"
+ " sbbq 16(%2), %%r10;"
+ " movq 24(%1), %%r11;"
+ " sbbq 24(%2), %%r11;"
+
+ /* Wrap the result back into the field */
+
+ /* Step 1: Compute carry*38 */
+ " mov $0, %%rax;"
+ " mov $38, %%rcx;"
+ " cmovc %%rcx, %%rax;"
+
+ /* Step 2: Substract carry*38 from the original difference */
+ " sub %%rax, %%r8;"
+ " sbb $0, %%r9;"
+ " sbb $0, %%r10;"
+ " sbb $0, %%r11;"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rcx, %%rax;"
+ " sub %%rax, %%r8;"
+
+ /* Store the result */
+ " movq %%r8, 0(%0);"
+ " movq %%r9, 8(%0);"
+ " movq %%r10, 16(%0);"
+ " movq %%r11, 24(%0);"
+ :
+ : "r" (out), "r" (f1), "r" (f2)
+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"
+ );
}
-static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a)
+/* Computes a field multiplication: out <- f1 * f2
+ * Uses the 8-element buffer tmp for intermediate results */
+static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
{
asm volatile(
- "movq (%1), %%rdx ;" /* A[0] */
- "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */
- "xorl %%r15d, %%r15d;"
- "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */
- "adcx %%r14, %%r9 ;"
- "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
- "adcx %%rax, %%r10 ;"
- "movq 24(%1), %%rdx ;" /* A[3] */
- "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
- "adcx %%rcx, %%r11 ;"
- "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
- "adcx %%rax, %%rbx ;"
- "movq 8(%1), %%rdx ;" /* A[1] */
- "adcx %%r15, %%r13 ;"
- "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
- "movq $0, %%r14 ;"
- /******************************************/
- "adcx %%r15, %%r14 ;"
-
- "xorl %%r15d, %%r15d;"
- "adox %%rax, %%r10 ;"
- "adcx %%r8, %%r8 ;"
- "adox %%rcx, %%r11 ;"
- "adcx %%r9, %%r9 ;"
- "adox %%r15, %%rbx ;"
- "adcx %%r10, %%r10 ;"
- "adox %%r15, %%r13 ;"
- "adcx %%r11, %%r11 ;"
- "adox %%r15, %%r14 ;"
- "adcx %%rbx, %%rbx ;"
- "adcx %%r13, %%r13 ;"
- "adcx %%r14, %%r14 ;"
-
- "movq (%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
- /*******************/
- "movq %%rax, 0(%0) ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, 8(%0) ;"
- "movq 8(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
- "adcq %%rax, %%r9 ;"
- "movq %%r9, 16(%0) ;"
- "adcq %%rcx, %%r10 ;"
- "movq %%r10, 24(%0) ;"
- "movq 16(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
- "adcq %%rax, %%r11 ;"
- "movq %%r11, 32(%0) ;"
- "adcq %%rcx, %%rbx ;"
- "movq %%rbx, 40(%0) ;"
- "movq 24(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
- "adcq %%rax, %%r13 ;"
- "movq %%r13, 48(%0) ;"
- "adcq %%rcx, %%r14 ;"
- "movq %%r14, 56(%0) ;"
- :
- : "r"(c), "r"(a)
- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
- "%r10", "%r11", "%r13", "%r14", "%r15");
+ /* Compute the raw multiplication: tmp <- src1 * src2 */
+
+ /* Compute src1[0] * src2 */
+ " movq 0(%1), %%rdx;"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ /* Compute src1[1] * src2 */
+ " movq 8(%1), %%rdx;"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ /* Compute src1[2] * src2 */
+ " movq 16(%1), %%rdx;"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ /* Compute src1[3] * src2 */
+ " movq 24(%1), %%rdx;"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
+ /* Line up pointers */
+ " mov %0, %1;"
+ " mov %2, %0;"
+
+ /* Wrap the result back into the field */
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
+ " mulxq 32(%1), %%r8, %%r13;"
+ " xor %3, %3;"
+ " adoxq 0(%1), %%r8;"
+ " mulxq 40(%1), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
+ " adoxq 8(%1), %%r9;"
+ " mulxq 48(%1), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
+ " adoxq 16(%1), %%r10;"
+ " mulxq 56(%1), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 24(%1), %%r11;"
+ " adcx %3, %%rax;"
+ " adox %3, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
+ " adcx %3, %%r9;"
+ " movq %%r9, 8(%0);"
+ " adcx %3, %%r10;"
+ " movq %%r10, 16(%0);"
+ " adcx %3, %%r11;"
+ " movq %%r11, 24(%0);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%0);"
+ : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
+ :
+ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc"
+ );
}
-static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a)
+/* Computes two field multiplications:
+ * out[0] <- f1[0] * f2[0]
+ * out[1] <- f1[1] * f2[1]
+ * Uses the 16-element buffer tmp for intermediate results. */
+static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
{
asm volatile(
- "movq 8(%1), %%rdx ;" /* A[1] */
- "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */
- "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
- "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
-
- "movq 16(%1), %%rdx ;" /* A[2] */
- "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
- "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
-
- "addq %%rax, %%r9 ;"
- "adcq %%rdx, %%r10 ;"
- "adcq %%rcx, %%r11 ;"
- "adcq %%r14, %%r15 ;"
- "adcq $0, %%r13 ;"
- "movq $0, %%r14 ;"
- "adcq $0, %%r14 ;"
-
- "movq (%1), %%rdx ;" /* A[0] */
- "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
-
- "addq %%rax, %%r10 ;"
- "adcq %%rcx, %%r11 ;"
- "adcq $0, %%r15 ;"
- "adcq $0, %%r13 ;"
- "adcq $0, %%r14 ;"
-
- "shldq $1, %%r13, %%r14 ;"
- "shldq $1, %%r15, %%r13 ;"
- "shldq $1, %%r11, %%r15 ;"
- "shldq $1, %%r10, %%r11 ;"
- "shldq $1, %%r9, %%r10 ;"
- "shldq $1, %%r8, %%r9 ;"
- "shlq $1, %%r8 ;"
-
- /*******************/
- "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
- /*******************/
- "movq %%rax, 0(%0) ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, 8(%0) ;"
- "movq 8(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
- "adcq %%rax, %%r9 ;"
- "movq %%r9, 16(%0) ;"
- "adcq %%rcx, %%r10 ;"
- "movq %%r10, 24(%0) ;"
- "movq 16(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
- "adcq %%rax, %%r11 ;"
- "movq %%r11, 32(%0) ;"
- "adcq %%rcx, %%r15 ;"
- "movq %%r15, 40(%0) ;"
- "movq 24(%1), %%rdx ;"
- "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
- "adcq %%rax, %%r13 ;"
- "movq %%r13, 48(%0) ;"
- "adcq %%rcx, %%r14 ;"
- "movq %%r14, 56(%0) ;"
- :
- : "r"(c), "r"(a)
- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
- "%r11", "%r13", "%r14", "%r15");
+ /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */
+
+ /* Compute src1[0] * src2 */
+ " movq 0(%1), %%rdx;"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ /* Compute src1[1] * src2 */
+ " movq 8(%1), %%rdx;"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ /* Compute src1[2] * src2 */
+ " movq 16(%1), %%rdx;"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ /* Compute src1[3] * src2 */
+ " movq 24(%1), %%rdx;"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
+
+ /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */
+
+ /* Compute src1[0] * src2 */
+ " movq 32(%1), %%rdx;"
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);"
+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
+ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ /* Compute src1[1] * src2 */
+ " movq 40(%1), %%rdx;"
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);"
+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
+ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ /* Compute src1[2] * src2 */
+ " movq 48(%1), %%rdx;"
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);"
+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
+ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
+ /* Compute src1[3] * src2 */
+ " movq 56(%1), %%rdx;"
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);"
+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 104(%0);" " mov $0, %%r8;"
+ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;"
+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);"
+ /* Line up pointers */
+ " mov %0, %1;"
+ " mov %2, %0;"
+
+ /* Wrap the results back into the field */
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
+ " mulxq 32(%1), %%r8, %%r13;"
+ " xor %3, %3;"
+ " adoxq 0(%1), %%r8;"
+ " mulxq 40(%1), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
+ " adoxq 8(%1), %%r9;"
+ " mulxq 48(%1), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
+ " adoxq 16(%1), %%r10;"
+ " mulxq 56(%1), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 24(%1), %%r11;"
+ " adcx %3, %%rax;"
+ " adox %3, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
+ " adcx %3, %%r9;"
+ " movq %%r9, 8(%0);"
+ " adcx %3, %%r10;"
+ " movq %%r10, 16(%0);"
+ " adcx %3, %%r11;"
+ " movq %%r11, 24(%0);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%0);"
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
+ " mulxq 96(%1), %%r8, %%r13;"
+ " xor %3, %3;"
+ " adoxq 64(%1), %%r8;"
+ " mulxq 104(%1), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
+ " adoxq 72(%1), %%r9;"
+ " mulxq 112(%1), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
+ " adoxq 80(%1), %%r10;"
+ " mulxq 120(%1), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 88(%1), %%r11;"
+ " adcx %3, %%rax;"
+ " adox %3, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
+ " adcx %3, %%r9;"
+ " movq %%r9, 40(%0);"
+ " adcx %3, %%r10;"
+ " movq %%r10, 48(%0);"
+ " adcx %3, %%r11;"
+ " movq %%r11, 56(%0);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 32(%0);"
+ : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
+ :
+ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc"
+ );
}
-static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
+/* Computes the field multiplication of four-element f1 with value in f2 */
+static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2)
{
- asm volatile(
- "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */
- "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
- "xorl %%ebx, %%ebx ;"
- "adox (%1), %%r8 ;"
- "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
- "adcx %%r10, %%r9 ;"
- "adox 8(%1), %%r9 ;"
- "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
- "adcx %%r11, %%r10 ;"
- "adox 16(%1), %%r10 ;"
- "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
- "adcx %%rax, %%r11 ;"
- "adox 24(%1), %%r11 ;"
- /***************************************/
- "adcx %%rbx, %%rcx ;"
- "adox %%rbx, %%rcx ;"
- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
- "adcx %%rcx, %%r8 ;"
- "adcx %%rbx, %%r9 ;"
- "movq %%r9, 8(%0) ;"
- "adcx %%rbx, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "adcx %%rbx, %%r11 ;"
- "movq %%r11, 24(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%edx, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, (%0) ;"
- :
- : "r"(c), "r"(a)
- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
- "%r10", "%r11");
-}
+ register u64 f2_r asm("rdx") = f2;
-static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
-{
asm volatile(
- "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */
- "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
- "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
- "addq %%r10, %%r9 ;"
- "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
- "adcq %%r11, %%r10 ;"
- "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
- "adcq %%rax, %%r11 ;"
- /***************************************/
- "adcq $0, %%rcx ;"
- "addq (%1), %%r8 ;"
- "adcq 8(%1), %%r9 ;"
- "adcq 16(%1), %%r10 ;"
- "adcq 24(%1), %%r11 ;"
- "adcq $0, %%rcx ;"
- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
- "addq %%rcx, %%r8 ;"
- "adcq $0, %%r9 ;"
- "movq %%r9, 8(%0) ;"
- "adcq $0, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "adcq $0, %%r11 ;"
- "movq %%r11, 24(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%edx, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, (%0) ;"
- :
- : "r"(c), "r"(a)
- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
- "%r11");
+ /* Compute the raw multiplication of f1*f2 */
+ " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */
+ " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */
+ " add %%rcx, %%r9;"
+ " mov $0, %%rcx;"
+ " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */
+ " adcx %%rbx, %%r10;"
+ " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */
+ " adcx %%r13, %%r11;"
+ " adcx %%rcx, %%rax;"
+
+ /* Wrap the result back into the field */
+
+ /* Step 1: Compute carry*38 */
+ " mov $38, %%rdx;"
+ " imul %%rdx, %%rax;"
+
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
+ " movq %%r9, 8(%1);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 16(%1);"
+ " adcx %%rcx, %%r11;"
+ " movq %%r11, 24(%1);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%1);"
+ : "+&r" (f2_r)
+ : "r" (out), "r" (f1)
+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "memory", "cc"
+ );
}
-static __always_inline void
-add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b)
+/* Computes p1 <- bit ? p2 : p1 in constant time */
+static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2)
{
asm volatile(
- "mov $38, %%eax ;"
- "xorl %%ecx, %%ecx ;"
- "movq (%2), %%r8 ;"
- "adcx (%1), %%r8 ;"
- "movq 8(%2), %%r9 ;"
- "adcx 8(%1), %%r9 ;"
- "movq 16(%2), %%r10 ;"
- "adcx 16(%1), %%r10 ;"
- "movq 24(%2), %%r11 ;"
- "adcx 24(%1), %%r11 ;"
- "cmovc %%eax, %%ecx ;"
- "xorl %%eax, %%eax ;"
- "adcx %%rcx, %%r8 ;"
- "adcx %%rax, %%r9 ;"
- "movq %%r9, 8(%0) ;"
- "adcx %%rax, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "adcx %%rax, %%r11 ;"
- "movq %%r11, 24(%0) ;"
- "mov $38, %%ecx ;"
- "cmovc %%ecx, %%eax ;"
- "addq %%rax, %%r8 ;"
- "movq %%r8, (%0) ;"
- :
- : "r"(c), "r"(a), "r"(b)
- : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+ /* Invert the polarity of bit to match cmov expectations */
+ " add $18446744073709551615, %0;"
+
+ /* cswap p1[0], p2[0] */
+ " movq 0(%1), %%r8;"
+ " movq 0(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 0(%1);"
+ " movq %%r9, 0(%2);"
+
+ /* cswap p1[1], p2[1] */
+ " movq 8(%1), %%r8;"
+ " movq 8(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 8(%1);"
+ " movq %%r9, 8(%2);"
+
+ /* cswap p1[2], p2[2] */
+ " movq 16(%1), %%r8;"
+ " movq 16(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 16(%1);"
+ " movq %%r9, 16(%2);"
+
+ /* cswap p1[3], p2[3] */
+ " movq 24(%1), %%r8;"
+ " movq 24(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 24(%1);"
+ " movq %%r9, 24(%2);"
+
+ /* cswap p1[4], p2[4] */
+ " movq 32(%1), %%r8;"
+ " movq 32(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 32(%1);"
+ " movq %%r9, 32(%2);"
+
+ /* cswap p1[5], p2[5] */
+ " movq 40(%1), %%r8;"
+ " movq 40(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 40(%1);"
+ " movq %%r9, 40(%2);"
+
+ /* cswap p1[6], p2[6] */
+ " movq 48(%1), %%r8;"
+ " movq 48(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 48(%1);"
+ " movq %%r9, 48(%2);"
+
+ /* cswap p1[7], p2[7] */
+ " movq 56(%1), %%r8;"
+ " movq 56(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 56(%1);"
+ " movq %%r9, 56(%2);"
+ : "+&r" (bit)
+ : "r" (p1), "r" (p2)
+ : "%r8", "%r9", "%r10", "memory", "cc"
+ );
}
-static __always_inline void
-add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b)
+/* Computes the square of a field element: out <- f * f
+ * Uses the 8-element buffer tmp for intermediate results */
+static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
{
asm volatile(
- "mov $38, %%eax ;"
- "movq (%2), %%r8 ;"
- "addq (%1), %%r8 ;"
- "movq 8(%2), %%r9 ;"
- "adcq 8(%1), %%r9 ;"
- "movq 16(%2), %%r10 ;"
- "adcq 16(%1), %%r10 ;"
- "movq 24(%2), %%r11 ;"
- "adcq 24(%1), %%r11 ;"
- "mov $0, %%ecx ;"
- "cmovc %%eax, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "adcq $0, %%r9 ;"
- "movq %%r9, 8(%0) ;"
- "adcq $0, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "adcq $0, %%r11 ;"
- "movq %%r11, 24(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%eax, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, (%0) ;"
- :
- : "r"(c), "r"(a), "r"(b)
- : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+ /* Compute the raw multiplication: tmp <- f * f */
+
+ /* Step 1: Compute all partial products */
+ " movq 0(%1), %%rdx;" /* f[0] */
+ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
+ " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
+ " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
+ " movq 24(%1), %%rdx;" /* f[3] */
+ " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
+ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
+ " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
+ " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
+
+ /* Step 2: Compute two parallel carry chains */
+ " xor %%r15, %%r15;"
+ " adox %%rax, %%r10;"
+ " adcx %%r8, %%r8;"
+ " adox %%rcx, %%r11;"
+ " adcx %%r9, %%r9;"
+ " adox %%r15, %%rbx;"
+ " adcx %%r10, %%r10;"
+ " adox %%r15, %%r13;"
+ " adcx %%r11, %%r11;"
+ " adox %%r15, %%r14;"
+ " adcx %%rbx, %%rbx;"
+ " adcx %%r13, %%r13;"
+ " adcx %%r14, %%r14;"
+
+ /* Step 3: Compute intermediate squares */
+ " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
+ " movq %%rax, 0(%0);"
+ " add %%rcx, %%r8;" " movq %%r8, 8(%0);"
+ " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
+ " adcx %%rax, %%r9;" " movq %%r9, 16(%0);"
+ " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
+ " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
+ " adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
+ " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);"
+ " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
+ " adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
+ " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
+
+ /* Line up pointers */
+ " mov %0, %1;"
+ " mov %2, %0;"
+
+ /* Wrap the result back into the field */
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
+ " mulxq 32(%1), %%r8, %%r13;"
+ " xor %%rcx, %%rcx;"
+ " adoxq 0(%1), %%r8;"
+ " mulxq 40(%1), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
+ " adoxq 8(%1), %%r9;"
+ " mulxq 48(%1), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
+ " adoxq 16(%1), %%r10;"
+ " mulxq 56(%1), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 24(%1), %%r11;"
+ " adcx %%rcx, %%rax;"
+ " adox %%rcx, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
+ " movq %%r9, 8(%0);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 16(%0);"
+ " adcx %%rcx, %%r11;"
+ " movq %%r11, 24(%0);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%0);"
+ : "+&r" (tmp), "+&r" (f), "+&r" (out)
+ :
+ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc"
+ );
}
-static __always_inline void
-sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b)
+/* Computes two field squarings:
+ * out[0] <- f[0] * f[0]
+ * out[1] <- f[1] * f[1]
+ * Uses the 16-element buffer tmp for intermediate results */
+static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
{
asm volatile(
- "mov $38, %%eax ;"
- "movq (%1), %%r8 ;"
- "subq (%2), %%r8 ;"
- "movq 8(%1), %%r9 ;"
- "sbbq 8(%2), %%r9 ;"
- "movq 16(%1), %%r10 ;"
- "sbbq 16(%2), %%r10 ;"
- "movq 24(%1), %%r11 ;"
- "sbbq 24(%2), %%r11 ;"
- "mov $0, %%ecx ;"
- "cmovc %%eax, %%ecx ;"
- "subq %%rcx, %%r8 ;"
- "sbbq $0, %%r9 ;"
- "movq %%r9, 8(%0) ;"
- "sbbq $0, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "sbbq $0, %%r11 ;"
- "movq %%r11, 24(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%eax, %%ecx ;"
- "subq %%rcx, %%r8 ;"
- "movq %%r8, (%0) ;"
- :
- : "r"(c), "r"(a), "r"(b)
- : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+ /* Step 1: Compute all partial products */
+ " movq 0(%1), %%rdx;" /* f[0] */
+ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
+ " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
+ " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
+ " movq 24(%1), %%rdx;" /* f[3] */
+ " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
+ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
+ " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
+ " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
+
+ /* Step 2: Compute two parallel carry chains */
+ " xor %%r15, %%r15;"
+ " adox %%rax, %%r10;"
+ " adcx %%r8, %%r8;"
+ " adox %%rcx, %%r11;"
+ " adcx %%r9, %%r9;"
+ " adox %%r15, %%rbx;"
+ " adcx %%r10, %%r10;"
+ " adox %%r15, %%r13;"
+ " adcx %%r11, %%r11;"
+ " adox %%r15, %%r14;"
+ " adcx %%rbx, %%rbx;"
+ " adcx %%r13, %%r13;"
+ " adcx %%r14, %%r14;"
+
+ /* Step 3: Compute intermediate squares */
+ " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
+ " movq %%rax, 0(%0);"
+ " add %%rcx, %%r8;" " movq %%r8, 8(%0);"
+ " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
+ " adcx %%rax, %%r9;" " movq %%r9, 16(%0);"
+ " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
+ " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
+ " adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
+ " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);"
+ " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
+ " adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
+ " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
+
+ /* Step 1: Compute all partial products */
+ " movq 32(%1), %%rdx;" /* f[0] */
+ " mulxq 40(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
+ " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
+ " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
+ " movq 56(%1), %%rdx;" /* f[3] */
+ " mulxq 40(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
+ " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
+ " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
+ " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
+
+ /* Step 2: Compute two parallel carry chains */
+ " xor %%r15, %%r15;"
+ " adox %%rax, %%r10;"
+ " adcx %%r8, %%r8;"
+ " adox %%rcx, %%r11;"
+ " adcx %%r9, %%r9;"
+ " adox %%r15, %%rbx;"
+ " adcx %%r10, %%r10;"
+ " adox %%r15, %%r13;"
+ " adcx %%r11, %%r11;"
+ " adox %%r15, %%r14;"
+ " adcx %%rbx, %%rbx;"
+ " adcx %%r13, %%r13;"
+ " adcx %%r14, %%r14;"
+
+ /* Step 3: Compute intermediate squares */
+ " movq 32(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
+ " movq %%rax, 64(%0);"
+ " add %%rcx, %%r8;" " movq %%r8, 72(%0);"
+ " movq 40(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
+ " adcx %%rax, %%r9;" " movq %%r9, 80(%0);"
+ " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);"
+ " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
+ " adcx %%rax, %%r11;" " movq %%r11, 96(%0);"
+ " adcx %%rcx, %%rbx;" " movq %%rbx, 104(%0);"
+ " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
+ " adcx %%rax, %%r13;" " movq %%r13, 112(%0);"
+ " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);"
+
+ /* Line up pointers */
+ " mov %0, %1;"
+ " mov %2, %0;"
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
+ " mulxq 32(%1), %%r8, %%r13;"
+ " xor %%rcx, %%rcx;"
+ " adoxq 0(%1), %%r8;"
+ " mulxq 40(%1), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
+ " adoxq 8(%1), %%r9;"
+ " mulxq 48(%1), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
+ " adoxq 16(%1), %%r10;"
+ " mulxq 56(%1), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 24(%1), %%r11;"
+ " adcx %%rcx, %%rax;"
+ " adox %%rcx, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
+ " movq %%r9, 8(%0);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 16(%0);"
+ " adcx %%rcx, %%r11;"
+ " movq %%r11, 24(%0);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%0);"
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
+ " mulxq 96(%1), %%r8, %%r13;"
+ " xor %%rcx, %%rcx;"
+ " adoxq 64(%1), %%r8;"
+ " mulxq 104(%1), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
+ " adoxq 72(%1), %%r9;"
+ " mulxq 112(%1), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
+ " adoxq 80(%1), %%r10;"
+ " mulxq 120(%1), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 88(%1), %%r11;"
+ " adcx %%rcx, %%rax;"
+ " adox %%rcx, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
+ " movq %%r9, 40(%0);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 48(%0);"
+ " adcx %%rcx, %%r11;"
+ " movq %%r11, 56(%0);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 32(%0);"
+ : "+&r" (tmp), "+&r" (f), "+&r" (out)
+ :
+ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc"
+ );
}
-/* Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 */
-static __always_inline void
-mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a)
+static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2)
{
- const u64 a24 = 121666;
- asm volatile(
- "movq %2, %%rdx ;"
- "mulx (%1), %%r8, %%r10 ;"
- "mulx 8(%1), %%r9, %%r11 ;"
- "addq %%r10, %%r9 ;"
- "mulx 16(%1), %%r10, %%rax ;"
- "adcq %%r11, %%r10 ;"
- "mulx 24(%1), %%r11, %%rcx ;"
- "adcq %%rax, %%r11 ;"
- /**************************/
- "adcq $0, %%rcx ;"
- "movl $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/
- "imul %%rdx, %%rcx ;"
- "addq %%rcx, %%r8 ;"
- "adcq $0, %%r9 ;"
- "movq %%r9, 8(%0) ;"
- "adcq $0, %%r10 ;"
- "movq %%r10, 16(%0) ;"
- "adcq $0, %%r11 ;"
- "movq %%r11, 24(%0) ;"
- "mov $0, %%ecx ;"
- "cmovc %%edx, %%ecx ;"
- "addq %%rcx, %%r8 ;"
- "movq %%r8, (%0) ;"
- :
- : "r"(c), "r"(a), "r"(a24)
- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
- "%r11");
+ u64 *nq = p01_tmp1;
+ u64 *nq_p1 = p01_tmp1 + (u32)8U;
+ u64 *tmp1 = p01_tmp1 + (u32)16U;
+ u64 *x1 = q;
+ u64 *x2 = nq;
+ u64 *z2 = nq + (u32)4U;
+ u64 *z3 = nq_p1 + (u32)4U;
+ u64 *a = tmp1;
+ u64 *b = tmp1 + (u32)4U;
+ u64 *ab = tmp1;
+ u64 *dc = tmp1 + (u32)8U;
+ u64 *x3;
+ u64 *z31;
+ u64 *d0;
+ u64 *c0;
+ u64 *a1;
+ u64 *b1;
+ u64 *d;
+ u64 *c;
+ u64 *ab1;
+ u64 *dc1;
+ fadd(a, x2, z2);
+ fsub(b, x2, z2);
+ x3 = nq_p1;
+ z31 = nq_p1 + (u32)4U;
+ d0 = dc;
+ c0 = dc + (u32)4U;
+ fadd(c0, x3, z31);
+ fsub(d0, x3, z31);
+ fmul2(dc, dc, ab, tmp2);
+ fadd(x3, d0, c0);
+ fsub(z31, d0, c0);
+ a1 = tmp1;
+ b1 = tmp1 + (u32)4U;
+ d = tmp1 + (u32)8U;
+ c = tmp1 + (u32)12U;
+ ab1 = tmp1;
+ dc1 = tmp1 + (u32)8U;
+ fsqr2(dc1, ab1, tmp2);
+ fsqr2(nq_p1, nq_p1, tmp2);
+ a1[0U] = c[0U];
+ a1[1U] = c[1U];
+ a1[2U] = c[2U];
+ a1[3U] = c[3U];
+ fsub(c, d, c);
+ fmul_scalar(b1, c, (u64)121665U);
+ fadd(b1, b1, d);
+ fmul2(nq, dc1, ab1, tmp2);
+ fmul(z3, z3, x1, tmp2);
}
-static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
+static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2)
{
- struct {
- eltfp25519_1w_buffer buffer;
- eltfp25519_1w x0, x1, x2;
- } __aligned(32) m;
- u64 *T[4];
-
- T[0] = m.x0;
- T[1] = c; /* x^(-1) */
- T[2] = m.x1;
- T[3] = m.x2;
-
- copy_eltfp25519_1w(T[1], a);
- sqrn_eltfp25519_1w_adx(T[1], 1);
- copy_eltfp25519_1w(T[2], T[1]);
- sqrn_eltfp25519_1w_adx(T[2], 2);
- mul_eltfp25519_1w_adx(T[0], a, T[2]);
- mul_eltfp25519_1w_adx(T[1], T[1], T[0]);
- copy_eltfp25519_1w(T[2], T[1]);
- sqrn_eltfp25519_1w_adx(T[2], 1);
- mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
- copy_eltfp25519_1w(T[2], T[0]);
- sqrn_eltfp25519_1w_adx(T[2], 5);
- mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
- copy_eltfp25519_1w(T[2], T[0]);
- sqrn_eltfp25519_1w_adx(T[2], 10);
- mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
- copy_eltfp25519_1w(T[3], T[2]);
- sqrn_eltfp25519_1w_adx(T[3], 20);
- mul_eltfp25519_1w_adx(T[3], T[3], T[2]);
- sqrn_eltfp25519_1w_adx(T[3], 10);
- mul_eltfp25519_1w_adx(T[3], T[3], T[0]);
- copy_eltfp25519_1w(T[0], T[3]);
- sqrn_eltfp25519_1w_adx(T[0], 50);
- mul_eltfp25519_1w_adx(T[0], T[0], T[3]);
- copy_eltfp25519_1w(T[2], T[0]);
- sqrn_eltfp25519_1w_adx(T[2], 100);
- mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
- sqrn_eltfp25519_1w_adx(T[2], 50);
- mul_eltfp25519_1w_adx(T[2], T[2], T[3]);
- sqrn_eltfp25519_1w_adx(T[2], 5);
- mul_eltfp25519_1w_adx(T[1], T[1], T[2]);
-
- memzero_explicit(&m, sizeof(m));
+ u64 *x2 = nq;
+ u64 *z2 = nq + (u32)4U;
+ u64 *a = tmp1;
+ u64 *b = tmp1 + (u32)4U;
+ u64 *d = tmp1 + (u32)8U;
+ u64 *c = tmp1 + (u32)12U;
+ u64 *ab = tmp1;
+ u64 *dc = tmp1 + (u32)8U;
+ fadd(a, x2, z2);
+ fsub(b, x2, z2);
+ fsqr2(dc, ab, tmp2);
+ a[0U] = c[0U];
+ a[1U] = c[1U];
+ a[2U] = c[2U];
+ a[3U] = c[3U];
+ fsub(c, d, c);
+ fmul_scalar(b, c, (u64)121665U);
+ fadd(b, b, d);
+ fmul2(nq, dc, ab, tmp2);
}
-static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
+static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1)
{
- struct {
- eltfp25519_1w_buffer buffer;
- eltfp25519_1w x0, x1, x2;
- } __aligned(32) m;
- u64 *T[5];
-
- T[0] = m.x0;
- T[1] = c; /* x^(-1) */
- T[2] = m.x1;
- T[3] = m.x2;
-
- copy_eltfp25519_1w(T[1], a);
- sqrn_eltfp25519_1w_bmi2(T[1], 1);
- copy_eltfp25519_1w(T[2], T[1]);
- sqrn_eltfp25519_1w_bmi2(T[2], 2);
- mul_eltfp25519_1w_bmi2(T[0], a, T[2]);
- mul_eltfp25519_1w_bmi2(T[1], T[1], T[0]);
- copy_eltfp25519_1w(T[2], T[1]);
- sqrn_eltfp25519_1w_bmi2(T[2], 1);
- mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
- copy_eltfp25519_1w(T[2], T[0]);
- sqrn_eltfp25519_1w_bmi2(T[2], 5);
- mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
- copy_eltfp25519_1w(T[2], T[0]);
- sqrn_eltfp25519_1w_bmi2(T[2], 10);
- mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
- copy_eltfp25519_1w(T[3], T[2]);
- sqrn_eltfp25519_1w_bmi2(T[3], 20);
- mul_eltfp25519_1w_bmi2(T[3], T[3], T[2]);
- sqrn_eltfp25519_1w_bmi2(T[3], 10);
- mul_eltfp25519_1w_bmi2(T[3], T[3], T[0]);
- copy_eltfp25519_1w(T[0], T[3]);
- sqrn_eltfp25519_1w_bmi2(T[0], 50);
- mul_eltfp25519_1w_bmi2(T[0], T[0], T[3]);
- copy_eltfp25519_1w(T[2], T[0]);
- sqrn_eltfp25519_1w_bmi2(T[2], 100);
- mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
- sqrn_eltfp25519_1w_bmi2(T[2], 50);
- mul_eltfp25519_1w_bmi2(T[2], T[2], T[3]);
- sqrn_eltfp25519_1w_bmi2(T[2], 5);
- mul_eltfp25519_1w_bmi2(T[1], T[1], T[2]);
-
- memzero_explicit(&m, sizeof(m));
+ u64 tmp2[16U] = { 0U };
+ u64 p01_tmp1_swap[33U] = { 0U };
+ u64 *p0 = p01_tmp1_swap;
+ u64 *p01 = p01_tmp1_swap;
+ u64 *p03 = p01;
+ u64 *p11 = p01 + (u32)8U;
+ u64 *x0;
+ u64 *z0;
+ u64 *p01_tmp1;
+ u64 *p01_tmp11;
+ u64 *nq10;
+ u64 *nq_p11;
+ u64 *swap1;
+ u64 sw0;
+ u64 *nq1;
+ u64 *tmp1;
+ memcpy(p11, init1, (u32)8U * sizeof(init1[0U]));
+ x0 = p03;
+ z0 = p03 + (u32)4U;
+ x0[0U] = (u64)1U;
+ x0[1U] = (u64)0U;
+ x0[2U] = (u64)0U;
+ x0[3U] = (u64)0U;
+ z0[0U] = (u64)0U;
+ z0[1U] = (u64)0U;
+ z0[2U] = (u64)0U;
+ z0[3U] = (u64)0U;
+ p01_tmp1 = p01_tmp1_swap;
+ p01_tmp11 = p01_tmp1_swap;
+ nq10 = p01_tmp1_swap;
+ nq_p11 = p01_tmp1_swap + (u32)8U;
+ swap1 = p01_tmp1_swap + (u32)32U;
+ cswap2((u64)1U, nq10, nq_p11);
+ point_add_and_double(init1, p01_tmp11, tmp2);
+ swap1[0U] = (u64)1U;
+ {
+ u32 i;
+ for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) {
+ u64 *p01_tmp12 = p01_tmp1_swap;
+ u64 *swap2 = p01_tmp1_swap + (u32)32U;
+ u64 *nq2 = p01_tmp12;
+ u64 *nq_p12 = p01_tmp12 + (u32)8U;
+ u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U);
+ u64 sw = swap2[0U] ^ bit;
+ cswap2(sw, nq2, nq_p12);
+ point_add_and_double(init1, p01_tmp12, tmp2);
+ swap2[0U] = bit;
+ }
+ }
+ sw0 = swap1[0U];
+ cswap2(sw0, nq10, nq_p11);
+ nq1 = p01_tmp1;
+ tmp1 = p01_tmp1 + (u32)16U;
+ point_double(nq1, tmp1, tmp2);
+ point_double(nq1, tmp1, tmp2);
+ point_double(nq1, tmp1, tmp2);
+ memcpy(out, p0, (u32)8U * sizeof(p0[0U]));
+
+ memzero_explicit(tmp2, sizeof(tmp2));
+ memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap));
}
-/* Given c, a 256-bit number, fred_eltfp25519_1w updates c
- * with a number such that 0 <= C < 2**255-19.
- */
-static __always_inline void fred_eltfp25519_1w(u64 *const c)
+static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1)
{
- u64 tmp0 = 38, tmp1 = 19;
- asm volatile(
- "btrq $63, %3 ;" /* Put bit 255 in carry flag and clear */
- "cmovncl %k5, %k4 ;" /* c[255] ? 38 : 19 */
-
- /* Add either 19 or 38 to c */
- "addq %4, %0 ;"
- "adcq $0, %1 ;"
- "adcq $0, %2 ;"
- "adcq $0, %3 ;"
-
- /* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */
- "movl $0, %k4 ;"
- "cmovnsl %k5, %k4 ;" /* c[255] ? 0 : 19 */
- "btrq $63, %3 ;" /* Clear bit 255 */
-
- /* Subtract 19 if necessary */
- "subq %4, %0 ;"
- "sbbq $0, %1 ;"
- "sbbq $0, %2 ;"
- "sbbq $0, %3 ;"
-
- : "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "+r"(tmp0),
- "+r"(tmp1)
- :
- : "memory", "cc");
+ u32 i;
+ fsqr(o, inp, tmp);
+ for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U)
+ fsqr(o, o, tmp);
}
-static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py)
+static void finv(u64 *o, const u64 *i, u64 *tmp)
{
- u64 temp;
- asm volatile(
- "test %9, %9 ;"
- "movq %0, %8 ;"
- "cmovnzq %4, %0 ;"
- "cmovnzq %8, %4 ;"
- "movq %1, %8 ;"
- "cmovnzq %5, %1 ;"
- "cmovnzq %8, %5 ;"
- "movq %2, %8 ;"
- "cmovnzq %6, %2 ;"
- "cmovnzq %8, %6 ;"
- "movq %3, %8 ;"
- "cmovnzq %7, %3 ;"
- "cmovnzq %8, %7 ;"
- : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]),
- "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]),
- "=r"(temp)
- : "r"(bit)
- : "cc"
- );
+ u64 t1[16U] = { 0U };
+ u64 *a0 = t1;
+ u64 *b = t1 + (u32)4U;
+ u64 *c = t1 + (u32)8U;
+ u64 *t00 = t1 + (u32)12U;
+ u64 *tmp1 = tmp;
+ u64 *a;
+ u64 *t0;
+ fsquare_times(a0, i, tmp1, (u32)1U);
+ fsquare_times(t00, a0, tmp1, (u32)2U);
+ fmul(b, t00, i, tmp);
+ fmul(a0, b, a0, tmp);
+ fsquare_times(t00, a0, tmp1, (u32)1U);
+ fmul(b, t00, b, tmp);
+ fsquare_times(t00, b, tmp1, (u32)5U);
+ fmul(b, t00, b, tmp);
+ fsquare_times(t00, b, tmp1, (u32)10U);
+ fmul(c, t00, b, tmp);
+ fsquare_times(t00, c, tmp1, (u32)20U);
+ fmul(t00, t00, c, tmp);
+ fsquare_times(t00, t00, tmp1, (u32)10U);
+ fmul(b, t00, b, tmp);
+ fsquare_times(t00, b, tmp1, (u32)50U);
+ fmul(c, t00, b, tmp);
+ fsquare_times(t00, c, tmp1, (u32)100U);
+ fmul(t00, t00, c, tmp);
+ fsquare_times(t00, t00, tmp1, (u32)50U);
+ fmul(t00, t00, b, tmp);
+ fsquare_times(t00, t00, tmp1, (u32)5U);
+ a = t1;
+ t0 = t1 + (u32)12U;
+ fmul(o, t0, a, tmp);
}
-static __always_inline void cselect(u8 bit, u64 *const px, const u64 *const py)
+static void store_felem(u64 *b, u64 *f)
{
- asm volatile(
- "test %4, %4 ;"
- "cmovnzq %5, %0 ;"
- "cmovnzq %6, %1 ;"
- "cmovnzq %7, %2 ;"
- "cmovnzq %8, %3 ;"
- : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3])
- : "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3])
- : "cc"
- );
+ u64 f30 = f[3U];
+ u64 top_bit0 = f30 >> (u32)63U;
+ u64 carry0;
+ u64 f31;
+ u64 top_bit;
+ u64 carry;
+ u64 f0;
+ u64 f1;
+ u64 f2;
+ u64 f3;
+ u64 m0;
+ u64 m1;
+ u64 m2;
+ u64 m3;
+ u64 mask;
+ u64 f0_;
+ u64 f1_;
+ u64 f2_;
+ u64 f3_;
+ u64 o0;
+ u64 o1;
+ u64 o2;
+ u64 o3;
+ f[3U] = f30 & (u64)0x7fffffffffffffffU;
+ carry0 = add_scalar(f, f, (u64)19U * top_bit0);
+ f31 = f[3U];
+ top_bit = f31 >> (u32)63U;
+ f[3U] = f31 & (u64)0x7fffffffffffffffU;
+ carry = add_scalar(f, f, (u64)19U * top_bit);
+ f0 = f[0U];
+ f1 = f[1U];
+ f2 = f[2U];
+ f3 = f[3U];
+ m0 = gte_mask(f0, (u64)0xffffffffffffffedU);
+ m1 = eq_mask(f1, (u64)0xffffffffffffffffU);
+ m2 = eq_mask(f2, (u64)0xffffffffffffffffU);
+ m3 = eq_mask(f3, (u64)0x7fffffffffffffffU);
+ mask = ((m0 & m1) & m2) & m3;
+ f0_ = f0 - (mask & (u64)0xffffffffffffffedU);
+ f1_ = f1 - (mask & (u64)0xffffffffffffffffU);
+ f2_ = f2 - (mask & (u64)0xffffffffffffffffU);
+ f3_ = f3 - (mask & (u64)0x7fffffffffffffffU);
+ o0 = f0_;
+ o1 = f1_;
+ o2 = f2_;
+ o3 = f3_;
+ b[0U] = o0;
+ b[1U] = o1;
+ b[2U] = o2;
+ b[3U] = o3;
}
-static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE],
- const u8 private_key[CURVE25519_KEY_SIZE],
- const u8 session_key[CURVE25519_KEY_SIZE])
+static void encode_point(u8 *o, const u64 *i)
{
- struct {
- u64 buffer[4 * NUM_WORDS_ELTFP25519];
- u64 coordinates[4 * NUM_WORDS_ELTFP25519];
- u64 workspace[6 * NUM_WORDS_ELTFP25519];
- u8 session[CURVE25519_KEY_SIZE];
- u8 private[CURVE25519_KEY_SIZE];
- } __aligned(32) m;
-
- int i = 0, j = 0;
- u64 prev = 0;
- u64 *const X1 = (u64 *)m.session;
- u64 *const key = (u64 *)m.private;
- u64 *const Px = m.coordinates + 0;
- u64 *const Pz = m.coordinates + 4;
- u64 *const Qx = m.coordinates + 8;
- u64 *const Qz = m.coordinates + 12;
- u64 *const X2 = Qx;
- u64 *const Z2 = Qz;
- u64 *const X3 = Px;
- u64 *const Z3 = Pz;
- u64 *const X2Z2 = Qx;
- u64 *const X3Z3 = Px;
-
- u64 *const A = m.workspace + 0;
- u64 *const B = m.workspace + 4;
- u64 *const D = m.workspace + 8;
- u64 *const C = m.workspace + 12;
- u64 *const DA = m.workspace + 16;
- u64 *const CB = m.workspace + 20;
- u64 *const AB = A;
- u64 *const DC = D;
- u64 *const DACB = DA;
-
- memcpy(m.private, private_key, sizeof(m.private));
- memcpy(m.session, session_key, sizeof(m.session));
-
- curve25519_clamp_secret(m.private);
-
- /* As in the draft:
- * When receiving such an array, implementations of curve25519
- * MUST mask the most-significant bit in the final byte. This
- * is done to preserve compatibility with point formats which
- * reserve the sign bit for use in other protocols and to
- * increase resistance to implementation fingerprinting
- */
- m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
-
- copy_eltfp25519_1w(Px, X1);
- setzero_eltfp25519_1w(Pz);
- setzero_eltfp25519_1w(Qx);
- setzero_eltfp25519_1w(Qz);
-
- Pz[0] = 1;
- Qx[0] = 1;
-
- /* main-loop */
- prev = 0;
- j = 62;
- for (i = 3; i >= 0; --i) {
- while (j >= 0) {
- u64 bit = (key[i] >> j) & 0x1;
- u64 swap = bit ^ prev;
- prev = bit;
-
- add_eltfp25519_1w_adx(A, X2, Z2); /* A = (X2+Z2) */
- sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */
- add_eltfp25519_1w_adx(C, X3, Z3); /* C = (X3+Z3) */
- sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */
- mul_eltfp25519_2w_adx(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */
-
- cselect(swap, A, C);
- cselect(swap, B, D);
-
- sqr_eltfp25519_2w_adx(AB); /* [AA|BB] = [A^2|B^2] */
- add_eltfp25519_1w_adx(X3, DA, CB); /* X3 = (DA+CB) */
- sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */
- sqr_eltfp25519_2w_adx(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
-
- copy_eltfp25519_1w(X2, B); /* X2 = B^2 */
- sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */
-
- mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */
- add_eltfp25519_1w_adx(B, B, X2); /* B = a24*E+B */
- mul_eltfp25519_2w_adx(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
- mul_eltfp25519_1w_adx(Z3, Z3, X1); /* Z3 = Z3*X1 */
- --j;
- }
- j = 63;
- }
-
- inv_eltfp25519_1w_adx(A, Qz);
- mul_eltfp25519_1w_adx((u64 *)shared, Qx, A);
- fred_eltfp25519_1w((u64 *)shared);
-
- memzero_explicit(&m, sizeof(m));
+ const u64 *x = i;
+ const u64 *z = i + (u32)4U;
+ u64 tmp[4U] = { 0U };
+ u64 tmp_w[16U] = { 0U };
+ finv(tmp, z, tmp_w);
+ fmul(tmp, tmp, x, tmp_w);
+ store_felem((u64 *)o, tmp);
}
-static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE],
- const u8 private_key[CURVE25519_KEY_SIZE])
+static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub)
{
- struct {
- u64 buffer[4 * NUM_WORDS_ELTFP25519];
- u64 coordinates[4 * NUM_WORDS_ELTFP25519];
- u64 workspace[4 * NUM_WORDS_ELTFP25519];
- u8 private[CURVE25519_KEY_SIZE];
- } __aligned(32) m;
-
- const int ite[4] = { 64, 64, 64, 63 };
- const int q = 3;
- u64 swap = 1;
-
- int i = 0, j = 0, k = 0;
- u64 *const key = (u64 *)m.private;
- u64 *const Ur1 = m.coordinates + 0;
- u64 *const Zr1 = m.coordinates + 4;
- u64 *const Ur2 = m.coordinates + 8;
- u64 *const Zr2 = m.coordinates + 12;
-
- u64 *const UZr1 = m.coordinates + 0;
- u64 *const ZUr2 = m.coordinates + 8;
-
- u64 *const A = m.workspace + 0;
- u64 *const B = m.workspace + 4;
- u64 *const C = m.workspace + 8;
- u64 *const D = m.workspace + 12;
-
- u64 *const AB = m.workspace + 0;
- u64 *const CD = m.workspace + 8;
-
- const u64 *const P = table_ladder_8k;
-
- memcpy(m.private, private_key, sizeof(m.private));
-
- curve25519_clamp_secret(m.private);
-
- setzero_eltfp25519_1w(Ur1);
- setzero_eltfp25519_1w(Zr1);
- setzero_eltfp25519_1w(Zr2);
- Ur1[0] = 1;
- Zr1[0] = 1;
- Zr2[0] = 1;
-
- /* G-S */
- Ur2[3] = 0x1eaecdeee27cab34UL;
- Ur2[2] = 0xadc7a0b9235d48e2UL;
- Ur2[1] = 0xbbf095ae14b2edf8UL;
- Ur2[0] = 0x7e94e1fec82faabdUL;
-
- /* main-loop */
- j = q;
- for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
- while (j < ite[i]) {
- u64 bit = (key[i] >> j) & 0x1;
- k = (64 * i + j - q);
- swap = swap ^ bit;
- cswap(swap, Ur1, Ur2);
- cswap(swap, Zr1, Zr2);
- swap = bit;
- /* Addition */
- sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
- add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */
- mul_eltfp25519_1w_adx(C, &P[4 * k], B); /* C = M0-B */
- sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
- add_eltfp25519_1w_adx(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
- sqr_eltfp25519_2w_adx(AB); /* A = A^2 | B = B^2 */
- mul_eltfp25519_2w_adx(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
- ++j;
+ u64 init1[8U] = { 0U };
+ u64 tmp[4U] = { 0U };
+ u64 tmp3;
+ u64 *x;
+ u64 *z;
+ {
+ u32 i;
+ for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) {
+ u64 *os = tmp;
+ const u8 *bj = pub + i * (u32)8U;
+ u64 u = *(u64 *)bj;
+ u64 r = u;
+ u64 x0 = r;
+ os[i] = x0;
}
- j = 0;
}
-
- /* Doubling */
- for (i = 0; i < q; ++i) {
- add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */
- sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
- sqr_eltfp25519_2w_adx(AB); /* A = A**2 B = B**2 */
- copy_eltfp25519_1w(C, B); /* C = B */
- sub_eltfp25519_1w(B, A, B); /* B = A-B */
- mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */
- add_eltfp25519_1w_adx(D, D, C); /* D = D+C */
- mul_eltfp25519_2w_adx(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */
- }
-
- /* Convert to affine coordinates */
- inv_eltfp25519_1w_adx(A, Zr1);
- mul_eltfp25519_1w_adx((u64 *)session_key, Ur1, A);
- fred_eltfp25519_1w((u64 *)session_key);
-
- memzero_explicit(&m, sizeof(m));
+ tmp3 = tmp[3U];
+ tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU;
+ x = init1;
+ z = init1 + (u32)4U;
+ z[0U] = (u64)1U;
+ z[1U] = (u64)0U;
+ z[2U] = (u64)0U;
+ z[3U] = (u64)0U;
+ x[0U] = tmp[0U];
+ x[1U] = tmp[1U];
+ x[2U] = tmp[2U];
+ x[3U] = tmp[3U];
+ montgomery_ladder(init1, priv, init1);
+ encode_point(out, init1);
}
-static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE],
- const u8 private_key[CURVE25519_KEY_SIZE],
- const u8 session_key[CURVE25519_KEY_SIZE])
-{
- struct {
- u64 buffer[4 * NUM_WORDS_ELTFP25519];
- u64 coordinates[4 * NUM_WORDS_ELTFP25519];
- u64 workspace[6 * NUM_WORDS_ELTFP25519];
- u8 session[CURVE25519_KEY_SIZE];
- u8 private[CURVE25519_KEY_SIZE];
- } __aligned(32) m;
-
- int i = 0, j = 0;
- u64 prev = 0;
- u64 *const X1 = (u64 *)m.session;
- u64 *const key = (u64 *)m.private;
- u64 *const Px = m.coordinates + 0;
- u64 *const Pz = m.coordinates + 4;
- u64 *const Qx = m.coordinates + 8;
- u64 *const Qz = m.coordinates + 12;
- u64 *const X2 = Qx;
- u64 *const Z2 = Qz;
- u64 *const X3 = Px;
- u64 *const Z3 = Pz;
- u64 *const X2Z2 = Qx;
- u64 *const X3Z3 = Px;
-
- u64 *const A = m.workspace + 0;
- u64 *const B = m.workspace + 4;
- u64 *const D = m.workspace + 8;
- u64 *const C = m.workspace + 12;
- u64 *const DA = m.workspace + 16;
- u64 *const CB = m.workspace + 20;
- u64 *const AB = A;
- u64 *const DC = D;
- u64 *const DACB = DA;
-
- memcpy(m.private, private_key, sizeof(m.private));
- memcpy(m.session, session_key, sizeof(m.session));
-
- curve25519_clamp_secret(m.private);
-
- /* As in the draft:
- * When receiving such an array, implementations of curve25519
- * MUST mask the most-significant bit in the final byte. This
- * is done to preserve compatibility with point formats which
- * reserve the sign bit for use in other protocols and to
- * increase resistance to implementation fingerprinting
- */
- m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
-
- copy_eltfp25519_1w(Px, X1);
- setzero_eltfp25519_1w(Pz);
- setzero_eltfp25519_1w(Qx);
- setzero_eltfp25519_1w(Qz);
-
- Pz[0] = 1;
- Qx[0] = 1;
-
- /* main-loop */
- prev = 0;
- j = 62;
- for (i = 3; i >= 0; --i) {
- while (j >= 0) {
- u64 bit = (key[i] >> j) & 0x1;
- u64 swap = bit ^ prev;
- prev = bit;
-
- add_eltfp25519_1w_bmi2(A, X2, Z2); /* A = (X2+Z2) */
- sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */
- add_eltfp25519_1w_bmi2(C, X3, Z3); /* C = (X3+Z3) */
- sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */
- mul_eltfp25519_2w_bmi2(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */
-
- cselect(swap, A, C);
- cselect(swap, B, D);
-
- sqr_eltfp25519_2w_bmi2(AB); /* [AA|BB] = [A^2|B^2] */
- add_eltfp25519_1w_bmi2(X3, DA, CB); /* X3 = (DA+CB) */
- sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */
- sqr_eltfp25519_2w_bmi2(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
-
- copy_eltfp25519_1w(X2, B); /* X2 = B^2 */
- sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */
-
- mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */
- add_eltfp25519_1w_bmi2(B, B, X2); /* B = a24*E+B */
- mul_eltfp25519_2w_bmi2(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
- mul_eltfp25519_1w_bmi2(Z3, Z3, X1); /* Z3 = Z3*X1 */
- --j;
- }
- j = 63;
- }
-
- inv_eltfp25519_1w_bmi2(A, Qz);
- mul_eltfp25519_1w_bmi2((u64 *)shared, Qx, A);
- fred_eltfp25519_1w((u64 *)shared);
+/* The below constants were generated using this sage script:
+ *
+ * #!/usr/bin/env sage
+ * import sys
+ * from sage.all import *
+ * def limbs(n):
+ * n = int(n)
+ * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64)
+ * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l
+ * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0])
+ * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0]
+ * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s))
+ * print("static const u64 table_ladder[] = {")
+ * p = ec.lift_x(9)
+ * for i in range(252):
+ * l = (p[0] + p[2]) / (p[0] - p[2])
+ * print(("\t%s" + ("," if i != 251 else "")) % limbs(l))
+ * p = p * 2
+ * print("};")
+ *
+ */
- memzero_explicit(&m, sizeof(m));
-}
+static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL };
+
+static const u64 table_ladder[] = {
+ 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL,
+ 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL,
+ 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL,
+ 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL,
+ 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL,
+ 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL,
+ 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL,
+ 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL,
+ 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL,
+ 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL,
+ 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL,
+ 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL,
+ 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL,
+ 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL,
+ 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL,
+ 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL,
+ 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL,
+ 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL,
+ 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL,
+ 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL,
+ 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL,
+ 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL,
+ 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL,
+ 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL,
+ 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL,
+ 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL,
+ 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL,
+ 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL,
+ 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL,
+ 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL,
+ 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL,
+ 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL,
+ 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL,
+ 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL,
+ 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL,
+ 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL,
+ 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL,
+ 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL,
+ 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL,
+ 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL,
+ 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL,
+ 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL,
+ 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL,
+ 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL,
+ 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL,
+ 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL,
+ 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL,
+ 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL,
+ 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL,
+ 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL,
+ 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL,
+ 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL,
+ 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL,
+ 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL,
+ 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL,
+ 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL,
+ 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL,
+ 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL,
+ 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL,
+ 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL,
+ 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL,
+ 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL,
+ 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL,
+ 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL,
+ 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL,
+ 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL,
+ 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL,
+ 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL,
+ 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL,
+ 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL,
+ 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL,
+ 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL,
+ 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL,
+ 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL,
+ 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL,
+ 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL,
+ 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL,
+ 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL,
+ 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL,
+ 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL,
+ 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL,
+ 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL,
+ 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL,
+ 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL,
+ 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL,
+ 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL,
+ 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL,
+ 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL,
+ 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL,
+ 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL,
+ 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL,
+ 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL,
+ 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL,
+ 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL,
+ 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL,
+ 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL,
+ 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL,
+ 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL,
+ 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL,
+ 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL,
+ 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL,
+ 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL,
+ 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL,
+ 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL,
+ 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL,
+ 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL,
+ 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL,
+ 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL,
+ 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL,
+ 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL,
+ 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL,
+ 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL,
+ 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL,
+ 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL,
+ 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL,
+ 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL,
+ 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL,
+ 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL,
+ 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL,
+ 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL,
+ 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL,
+ 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL,
+ 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL,
+ 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL,
+ 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL,
+ 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL,
+ 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL,
+ 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL,
+ 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL,
+ 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL,
+ 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL,
+ 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL,
+ 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL,
+ 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL,
+ 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL,
+ 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL,
+ 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL,
+ 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL,
+ 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL,
+ 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL,
+ 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL,
+ 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL,
+ 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL,
+ 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL,
+ 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL,
+ 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL,
+ 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL,
+ 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL,
+ 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL,
+ 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL,
+ 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL,
+ 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL,
+ 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL,
+ 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL,
+ 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL,
+ 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL,
+ 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL,
+ 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL,
+ 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL,
+ 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL,
+ 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL,
+ 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL,
+ 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL,
+ 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL,
+ 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL,
+ 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL,
+ 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL,
+ 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL,
+ 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL,
+ 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL,
+ 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL,
+ 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL,
+ 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL,
+ 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL,
+ 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL,
+ 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL,
+ 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL,
+ 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL,
+ 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL,
+ 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL,
+ 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL,
+ 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL,
+ 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL,
+ 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL,
+ 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL,
+ 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL,
+ 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL,
+ 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL,
+ 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL,
+ 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL,
+ 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL,
+ 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL,
+ 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL,
+ 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL,
+ 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL,
+ 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL,
+ 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL,
+ 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL,
+ 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL,
+ 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL,
+ 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL,
+ 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL,
+ 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL,
+ 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL,
+ 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL,
+ 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL,
+ 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL,
+ 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL,
+ 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL,
+ 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL,
+ 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL,
+ 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL,
+ 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL,
+ 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL,
+ 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL,
+ 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL,
+ 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL,
+ 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL,
+ 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL,
+ 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL,
+ 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL,
+ 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL,
+ 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL,
+ 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL,
+ 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL,
+ 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL,
+ 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL,
+ 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL,
+ 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL,
+ 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL,
+ 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL,
+ 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL,
+ 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL,
+ 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL,
+ 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL,
+ 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL,
+ 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL,
+ 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL,
+ 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL,
+ 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL,
+ 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL,
+ 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL,
+ 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL,
+ 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL,
+ 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL,
+ 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL,
+ 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL,
+ 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL,
+ 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL,
+ 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL,
+ 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL,
+ 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL
+};
-static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE],
- const u8 private_key[CURVE25519_KEY_SIZE])
+static void curve25519_ever64_base(u8 *out, const u8 *priv)
{
- struct {
- u64 buffer[4 * NUM_WORDS_ELTFP25519];
- u64 coordinates[4 * NUM_WORDS_ELTFP25519];
- u64 workspace[4 * NUM_WORDS_ELTFP25519];
- u8 private[CURVE25519_KEY_SIZE];
- } __aligned(32) m;
-
- const int ite[4] = { 64, 64, 64, 63 };
- const int q = 3;
u64 swap = 1;
-
- int i = 0, j = 0, k = 0;
- u64 *const key = (u64 *)m.private;
- u64 *const Ur1 = m.coordinates + 0;
- u64 *const Zr1 = m.coordinates + 4;
- u64 *const Ur2 = m.coordinates + 8;
- u64 *const Zr2 = m.coordinates + 12;
-
- u64 *const UZr1 = m.coordinates + 0;
- u64 *const ZUr2 = m.coordinates + 8;
-
- u64 *const A = m.workspace + 0;
- u64 *const B = m.workspace + 4;
- u64 *const C = m.workspace + 8;
- u64 *const D = m.workspace + 12;
-
- u64 *const AB = m.workspace + 0;
- u64 *const CD = m.workspace + 8;
-
- const u64 *const P = table_ladder_8k;
-
- memcpy(m.private, private_key, sizeof(m.private));
-
- curve25519_clamp_secret(m.private);
-
- setzero_eltfp25519_1w(Ur1);
- setzero_eltfp25519_1w(Zr1);
- setzero_eltfp25519_1w(Zr2);
- Ur1[0] = 1;
- Zr1[0] = 1;
- Zr2[0] = 1;
-
- /* G-S */
- Ur2[3] = 0x1eaecdeee27cab34UL;
- Ur2[2] = 0xadc7a0b9235d48e2UL;
- Ur2[1] = 0xbbf095ae14b2edf8UL;
- Ur2[0] = 0x7e94e1fec82faabdUL;
-
- /* main-loop */
- j = q;
- for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
- while (j < ite[i]) {
- u64 bit = (key[i] >> j) & 0x1;
- k = (64 * i + j - q);
+ int i, j, k;
+ u64 tmp[16 + 32 + 4];
+ u64 *x1 = &tmp[0];
+ u64 *z1 = &tmp[4];
+ u64 *x2 = &tmp[8];
+ u64 *z2 = &tmp[12];
+ u64 *xz1 = &tmp[0];
+ u64 *xz2 = &tmp[8];
+ u64 *a = &tmp[0 + 16];
+ u64 *b = &tmp[4 + 16];
+ u64 *c = &tmp[8 + 16];
+ u64 *ab = &tmp[0 + 16];
+ u64 *abcd = &tmp[0 + 16];
+ u64 *ef = &tmp[16 + 16];
+ u64 *efgh = &tmp[16 + 16];
+ u64 *key = &tmp[0 + 16 + 32];
+
+ memcpy(key, priv, 32);
+ ((u8 *)key)[0] &= 248;
+ ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64;
+
+ x1[0] = 1, x1[1] = x1[2] = x1[3] = 0;
+ z1[0] = 1, z1[1] = z1[2] = z1[3] = 0;
+ z2[0] = 1, z2[1] = z2[2] = z2[3] = 0;
+ memcpy(x2, p_minus_s, sizeof(p_minus_s));
+
+ j = 3;
+ for (i = 0; i < 4; ++i) {
+ while (j < (const int[]){ 64, 64, 64, 63 }[i]) {
+ u64 bit = (key[i] >> j) & 1;
+ k = (64 * i + j - 3);
swap = swap ^ bit;
- cswap(swap, Ur1, Ur2);
- cswap(swap, Zr1, Zr2);
+ cswap2(swap, xz1, xz2);
swap = bit;
- /* Addition */
- sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
- add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */
- mul_eltfp25519_1w_bmi2(C, &P[4 * k], B);/* C = M0-B */
- sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
- add_eltfp25519_1w_bmi2(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
- sqr_eltfp25519_2w_bmi2(AB); /* A = A^2 | B = B^2 */
- mul_eltfp25519_2w_bmi2(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
+ fsub(b, x1, z1);
+ fadd(a, x1, z1);
+ fmul(c, &table_ladder[4 * k], b, ef);
+ fsub(b, a, c);
+ fadd(a, a, c);
+ fsqr2(ab, ab, efgh);
+ fmul2(xz1, xz2, ab, efgh);
++j;
}
j = 0;
}
- /* Doubling */
- for (i = 0; i < q; ++i) {
- add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */
- sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
- sqr_eltfp25519_2w_bmi2(AB); /* A = A**2 B = B**2 */
- copy_eltfp25519_1w(C, B); /* C = B */
- sub_eltfp25519_1w(B, A, B); /* B = A-B */
- mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */
- add_eltfp25519_1w_bmi2(D, D, C); /* D = D+C */
- mul_eltfp25519_2w_bmi2(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */
- }
+ point_double(xz1, abcd, efgh);
+ point_double(xz1, abcd, efgh);
+ point_double(xz1, abcd, efgh);
+ encode_point(out, xz1);
- /* Convert to affine coordinates */
- inv_eltfp25519_1w_bmi2(A, Zr1);
- mul_eltfp25519_1w_bmi2((u64 *)session_key, Ur1, A);
- fred_eltfp25519_1w((u64 *)session_key);
-
- memzero_explicit(&m, sizeof(m));
+ memzero_explicit(tmp, sizeof(tmp));
}
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2_adx);
+
void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE],
const u8 basepoint[CURVE25519_KEY_SIZE])
{
- if (static_branch_likely(&curve25519_use_adx))
- curve25519_adx(mypublic, secret, basepoint);
- else if (static_branch_likely(&curve25519_use_bmi2))
- curve25519_bmi2(mypublic, secret, basepoint);
+ if (static_branch_likely(&curve25519_use_bmi2_adx))
+ curve25519_ever64(mypublic, secret, basepoint);
else
curve25519_generic(mypublic, secret, basepoint);
}
@@ -2355,10 +1395,8 @@ EXPORT_SYMBOL(curve25519_arch);
void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE])
{
- if (static_branch_likely(&curve25519_use_adx))
- curve25519_adx_base(pub, secret);
- else if (static_branch_likely(&curve25519_use_bmi2))
- curve25519_bmi2_base(pub, secret);
+ if (static_branch_likely(&curve25519_use_bmi2_adx))
+ curve25519_ever64_base(pub, secret);
else
curve25519_generic(pub, secret, curve25519_base_point);
}
@@ -2449,12 +1487,11 @@ static struct kpp_alg curve25519_alg = {
.max_size = curve25519_max_size,
};
+
static int __init curve25519_mod_init(void)
{
- if (boot_cpu_has(X86_FEATURE_BMI2))
- static_branch_enable(&curve25519_use_bmi2);
- else if (boot_cpu_has(X86_FEATURE_ADX))
- static_branch_enable(&curve25519_use_adx);
+ if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX))
+ static_branch_enable(&curve25519_use_bmi2_adx);
else
return 0;
return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
@@ -2474,3 +1511,4 @@ module_exit(curve25519_mod_exit);
MODULE_ALIAS_CRYPTO("curve25519");
MODULE_ALIAS_CRYPTO("curve25519-x86");
MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 439367a8e95c..b1cd3535c525 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -821,8 +821,8 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
struct af_alg_tsgl *sgl;
struct af_alg_control con = {};
long copied = 0;
- bool enc = 0;
- bool init = 0;
+ bool enc = false;
+ bool init = false;
int err = 0;
if (msg->msg_controllen) {
@@ -830,13 +830,13 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
if (err)
return err;
- init = 1;
+ init = true;
switch (con.op) {
case ALG_OP_ENCRYPT:
- enc = 1;
+ enc = true;
break;
case ALG_OP_DECRYPT:
- enc = 0;
+ enc = false;
break;
default:
return -EINVAL;
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 178f4cd75ef1..da1ffa4f7f8d 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -83,7 +83,7 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
goto unlock;
}
- ctx->more = 0;
+ ctx->more = false;
while (msg_data_left(msg)) {
int len = msg_data_left(msg);
@@ -211,7 +211,7 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
}
if (!result || ctx->more) {
- ctx->more = 0;
+ ctx->more = false;
err = crypto_wait_req(crypto_ahash_final(&ctx->req),
&ctx->wait);
if (err)
@@ -436,7 +436,7 @@ static int hash_accept_parent_nokey(void *private, struct sock *sk)
ctx->result = NULL;
ctx->len = len;
- ctx->more = 0;
+ ctx->more = false;
crypto_init_wait(&ctx->wait);
ask->private = ctx;
diff --git a/crypto/authencesn.c b/crypto/authencesn.c
index 589008146fce..149b70df2a91 100644
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -458,7 +458,7 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl,
inst->alg.encrypt = crypto_authenc_esn_encrypt;
inst->alg.decrypt = crypto_authenc_esn_decrypt;
- inst->free = crypto_authenc_esn_free,
+ inst->free = crypto_authenc_esn_free;
err = aead_register_instance(tmpl, inst);
if (err) {
diff --git a/crypto/ccm.c b/crypto/ccm.c
index 241ecdc5c4e0..d1fb01bbc814 100644
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -717,7 +717,6 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl,
struct aead_instance *inst;
struct crypto_aead_spawn *spawn;
struct aead_alg *alg;
- const char *ccm_name;
int err;
algt = crypto_get_attr_type(tb);
@@ -729,19 +728,15 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl,
mask = crypto_requires_sync(algt->type, algt->mask);
- ccm_name = crypto_attr_alg_name(tb[1]);
- if (IS_ERR(ccm_name))
- return PTR_ERR(ccm_name);
-
inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
if (!inst)
return -ENOMEM;
spawn = aead_instance_ctx(inst);
err = crypto_grab_aead(spawn, aead_crypto_instance(inst),
- ccm_name, 0, mask);
+ crypto_attr_alg_name(tb[1]), 0, mask);
if (err)
- goto out_free_inst;
+ goto err_free_inst;
alg = crypto_spawn_aead_alg(spawn);
@@ -749,11 +744,11 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl,
/* We only support 16-byte blocks. */
if (crypto_aead_alg_ivsize(alg) != 16)
- goto out_drop_alg;
+ goto err_free_inst;
/* Not a stream cipher? */
if (alg->base.cra_blocksize != 1)
- goto out_drop_alg;
+ goto err_free_inst;
err = -ENAMETOOLONG;
if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
@@ -762,7 +757,7 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl,
snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
"rfc4309(%s)", alg->base.cra_driver_name) >=
CRYPTO_MAX_ALG_NAME)
- goto out_drop_alg;
+ goto err_free_inst;
inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
inst->alg.base.cra_priority = alg->base.cra_priority;
@@ -786,17 +781,11 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl,
inst->free = crypto_rfc4309_free;
err = aead_register_instance(tmpl, inst);
- if (err)
- goto out_drop_alg;
-
-out:
+ if (err) {
+err_free_inst:
+ crypto_rfc4309_free(inst);
+ }
return err;
-
-out_drop_alg:
- crypto_drop_aead(spawn);
-out_free_inst:
- kfree(inst);
- goto out;
}
static int crypto_cbcmac_digest_setkey(struct crypto_shash *parent,
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index d94c75c840a5..283212262adb 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -369,7 +369,6 @@ static int cryptd_create_skcipher(struct crypto_template *tmpl,
struct skcipherd_instance_ctx *ctx;
struct skcipher_instance *inst;
struct skcipher_alg *alg;
- const char *name;
u32 type;
u32 mask;
int err;
@@ -379,10 +378,6 @@ static int cryptd_create_skcipher(struct crypto_template *tmpl,
cryptd_check_internal(tb, &type, &mask);
- name = crypto_attr_alg_name(tb[1]);
- if (IS_ERR(name))
- return PTR_ERR(name);
-
inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
if (!inst)
return -ENOMEM;
@@ -391,14 +386,14 @@ static int cryptd_create_skcipher(struct crypto_template *tmpl,
ctx->queue = queue;
err = crypto_grab_skcipher(&ctx->spawn, skcipher_crypto_instance(inst),
- name, type, mask);
+ crypto_attr_alg_name(tb[1]), type, mask);
if (err)
- goto out_free_inst;
+ goto err_free_inst;
alg = crypto_spawn_skcipher_alg(&ctx->spawn);
err = cryptd_init_instance(skcipher_crypto_instance(inst), &alg->base);
if (err)
- goto out_drop_skcipher;
+ goto err_free_inst;
inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC |
(alg->base.cra_flags & CRYPTO_ALG_INTERNAL);
@@ -421,10 +416,8 @@ static int cryptd_create_skcipher(struct crypto_template *tmpl,
err = skcipher_register_instance(tmpl, inst);
if (err) {
-out_drop_skcipher:
- crypto_drop_skcipher(&ctx->spawn);
-out_free_inst:
- kfree(inst);
+err_free_inst:
+ cryptd_skcipher_free(inst);
}
return err;
}
@@ -694,8 +687,7 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
err = ahash_register_instance(tmpl, inst);
if (err) {
err_free_inst:
- crypto_drop_shash(&ctx->spawn);
- kfree(inst);
+ cryptd_hash_free(inst);
}
return err;
}
@@ -833,17 +825,12 @@ static int cryptd_create_aead(struct crypto_template *tmpl,
struct aead_instance_ctx *ctx;
struct aead_instance *inst;
struct aead_alg *alg;
- const char *name;
u32 type = 0;
u32 mask = CRYPTO_ALG_ASYNC;
int err;
cryptd_check_internal(tb, &type, &mask);
- name = crypto_attr_alg_name(tb[1]);
- if (IS_ERR(name))
- return PTR_ERR(name);
-
inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
if (!inst)
return -ENOMEM;
@@ -852,14 +839,14 @@ static int cryptd_create_aead(struct crypto_template *tmpl,
ctx->queue = queue;
err = crypto_grab_aead(&ctx->aead_spawn, aead_crypto_instance(inst),
- name, type, mask);
+ crypto_attr_alg_name(tb[1]), type, mask);
if (err)
- goto out_free_inst;
+ goto err_free_inst;
alg = crypto_spawn_aead_alg(&ctx->aead_spawn);
err = cryptd_init_instance(aead_crypto_instance(inst), &alg->base);
if (err)
- goto out_drop_aead;
+ goto err_free_inst;
inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC |
(alg->base.cra_flags & CRYPTO_ALG_INTERNAL);
@@ -879,10 +866,8 @@ static int cryptd_create_aead(struct crypto_template *tmpl,
err = aead_register_instance(tmpl, inst);
if (err) {
-out_drop_aead:
- crypto_drop_aead(&ctx->aead_spawn);
-out_free_inst:
- kfree(inst);
+err_free_inst:
+ cryptd_aead_free(inst);
}
return err;
}
diff --git a/crypto/ctr.c b/crypto/ctr.c
index a8feab621c6c..31ac4ae598e1 100644
--- a/crypto/ctr.c
+++ b/crypto/ctr.c
@@ -260,7 +260,6 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl,
struct skcipher_instance *inst;
struct skcipher_alg *alg;
struct crypto_skcipher_spawn *spawn;
- const char *cipher_name;
u32 mask;
int err;
@@ -272,10 +271,6 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl,
if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
return -EINVAL;
- cipher_name = crypto_attr_alg_name(tb[1]);
- if (IS_ERR(cipher_name))
- return PTR_ERR(cipher_name);
-
inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
if (!inst)
return -ENOMEM;
@@ -287,7 +282,7 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl,
spawn = skcipher_instance_ctx(inst);
err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst),
- cipher_name, 0, mask);
+ crypto_attr_alg_name(tb[1]), 0, mask);
if (err)
goto err_free_inst;
@@ -296,20 +291,20 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl,
/* We only support 16-byte blocks. */
err = -EINVAL;
if (crypto_skcipher_alg_ivsize(alg) != CTR_RFC3686_BLOCK_SIZE)
- goto err_drop_spawn;
+ goto err_free_inst;
/* Not a stream cipher? */
if (alg->base.cra_blocksize != 1)
- goto err_drop_spawn;
+ goto err_free_inst;
err = -ENAMETOOLONG;
if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
"rfc3686(%s)", alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME)
- goto err_drop_spawn;
+ goto err_free_inst;
if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
"rfc3686(%s)", alg->base.cra_driver_name) >=
CRYPTO_MAX_ALG_NAME)
- goto err_drop_spawn;
+ goto err_free_inst;
inst->alg.base.cra_priority = alg->base.cra_priority;
inst->alg.base.cra_blocksize = 1;
@@ -336,17 +331,11 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl,
inst->free = crypto_rfc3686_free;
err = skcipher_register_instance(tmpl, inst);
- if (err)
- goto err_drop_spawn;
-
-out:
- return err;
-
-err_drop_spawn:
- crypto_drop_skcipher(spawn);
+ if (err) {
err_free_inst:
- kfree(inst);
- goto out;
+ crypto_rfc3686_free(inst);
+ }
+ return err;
}
static struct crypto_template crypto_ctr_tmpls[] = {
diff --git a/crypto/cts.c b/crypto/cts.c
index 48188adc8e91..5e005c4f0221 100644
--- a/crypto/cts.c
+++ b/crypto/cts.c
@@ -327,7 +327,6 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
struct skcipher_instance *inst;
struct crypto_attr_type *algt;
struct skcipher_alg *alg;
- const char *cipher_name;
u32 mask;
int err;
@@ -340,10 +339,6 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
mask = crypto_requires_sync(algt->type, algt->mask);
- cipher_name = crypto_attr_alg_name(tb[1]);
- if (IS_ERR(cipher_name))
- return PTR_ERR(cipher_name);
-
inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
if (!inst)
return -ENOMEM;
@@ -351,7 +346,7 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
spawn = skcipher_instance_ctx(inst);
err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst),
- cipher_name, 0, mask);
+ crypto_attr_alg_name(tb[1]), 0, mask);
if (err)
goto err_free_inst;
@@ -359,15 +354,15 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
err = -EINVAL;
if (crypto_skcipher_alg_ivsize(alg) != alg->base.cra_blocksize)
- goto err_drop_spawn;
+ goto err_free_inst;
if (strncmp(alg->base.cra_name, "cbc(", 4))
- goto err_drop_spawn;
+ goto err_free_inst;
err = crypto_inst_setname(skcipher_crypto_instance(inst), "cts",
&alg->base);
if (err)
- goto err_drop_spawn;
+ goto err_free_inst;
inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
inst->alg.base.cra_priority = alg->base.cra_priority;
@@ -391,17 +386,11 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
inst->free = crypto_cts_free;
err = skcipher_register_instance(tmpl, inst);
- if (err)
- goto err_drop_spawn;
-
-out:
- return err;
-
-err_drop_spawn:
- crypto_drop_skcipher(spawn);
+ if (err) {
err_free_inst:
- kfree(inst);
- goto out;
+ crypto_cts_free(inst);
+ }
+ return err;
}
static struct crypto_template crypto_cts_tmpl = {
diff --git a/crypto/gcm.c b/crypto/gcm.c
index 8e5c0ac65661..0103d28c541e 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -840,7 +840,6 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl,
struct aead_instance *inst;
struct crypto_aead_spawn *spawn;
struct aead_alg *alg;
- const char *ccm_name;
int err;
algt = crypto_get_attr_type(tb);
@@ -852,19 +851,15 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl,
mask = crypto_requires_sync(algt->type, algt->mask);
- ccm_name = crypto_attr_alg_name(tb[1]);
- if (IS_ERR(ccm_name))
- return PTR_ERR(ccm_name);
-
inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
if (!inst)
return -ENOMEM;
spawn = aead_instance_ctx(inst);
err = crypto_grab_aead(spawn, aead_crypto_instance(inst),
- ccm_name, 0, mask);
+ crypto_attr_alg_name(tb[1]), 0, mask);
if (err)
- goto out_free_inst;
+ goto err_free_inst;
alg = crypto_spawn_aead_alg(spawn);
@@ -872,11 +867,11 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl,
/* Underlying IV size must be 12. */
if (crypto_aead_alg_ivsize(alg) != GCM_AES_IV_SIZE)
- goto out_drop_alg;
+ goto err_free_inst;
/* Not a stream cipher? */
if (alg->base.cra_blocksize != 1)
- goto out_drop_alg;
+ goto err_free_inst;
err = -ENAMETOOLONG;
if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
@@ -885,7 +880,7 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl,
snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
"rfc4106(%s)", alg->base.cra_driver_name) >=
CRYPTO_MAX_ALG_NAME)
- goto out_drop_alg;
+ goto err_free_inst;
inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
inst->alg.base.cra_priority = alg->base.cra_priority;
@@ -909,17 +904,11 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl,
inst->free = crypto_rfc4106_free;
err = aead_register_instance(tmpl, inst);
- if (err)
- goto out_drop_alg;
-
-out:
+ if (err) {
+err_free_inst:
+ crypto_rfc4106_free(inst);
+ }
return err;
-
-out_drop_alg:
- crypto_drop_aead(spawn);
-out_free_inst:
- kfree(inst);
- goto out;
}
static int crypto_rfc4543_setkey(struct crypto_aead *parent, const u8 *key,
@@ -1071,10 +1060,8 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl,
struct crypto_attr_type *algt;
u32 mask;
struct aead_instance *inst;
- struct crypto_aead_spawn *spawn;
struct aead_alg *alg;
struct crypto_rfc4543_instance_ctx *ctx;
- const char *ccm_name;
int err;
algt = crypto_get_attr_type(tb);
@@ -1086,32 +1073,27 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl,
mask = crypto_requires_sync(algt->type, algt->mask);
- ccm_name = crypto_attr_alg_name(tb[1]);
- if (IS_ERR(ccm_name))
- return PTR_ERR(ccm_name);
-
inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
if (!inst)
return -ENOMEM;
ctx = aead_instance_ctx(inst);
- spawn = &ctx->aead;
- err = crypto_grab_aead(spawn, aead_crypto_instance(inst),
- ccm_name, 0, mask);
+ err = crypto_grab_aead(&ctx->aead, aead_crypto_instance(inst),
+ crypto_attr_alg_name(tb[1]), 0, mask);
if (err)
- goto out_free_inst;
+ goto err_free_inst;
- alg = crypto_spawn_aead_alg(spawn);
+ alg = crypto_spawn_aead_alg(&ctx->aead);
err = -EINVAL;
/* Underlying IV size must be 12. */
if (crypto_aead_alg_ivsize(alg) != GCM_AES_IV_SIZE)
- goto out_drop_alg;
+ goto err_free_inst;
/* Not a stream cipher? */
if (alg->base.cra_blocksize != 1)
- goto out_drop_alg;
+ goto err_free_inst;
err = -ENAMETOOLONG;
if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
@@ -1120,7 +1102,7 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl,
snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
"rfc4543(%s)", alg->base.cra_driver_name) >=
CRYPTO_MAX_ALG_NAME)
- goto out_drop_alg;
+ goto err_free_inst;
inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
inst->alg.base.cra_priority = alg->base.cra_priority;
@@ -1141,20 +1123,14 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl,
inst->alg.encrypt = crypto_rfc4543_encrypt;
inst->alg.decrypt = crypto_rfc4543_decrypt;
- inst->free = crypto_rfc4543_free,
+ inst->free = crypto_rfc4543_free;
err = aead_register_instance(tmpl, inst);
- if (err)
- goto out_drop_alg;
-
-out:
+ if (err) {
+err_free_inst:
+ crypto_rfc4543_free(inst);
+ }
return err;
-
-out_drop_alg:
- crypto_drop_aead(spawn);
-out_free_inst:
- kfree(inst);
- goto out;
}
static struct crypto_template crypto_gcm_tmpls[] = {
diff --git a/crypto/geniv.c b/crypto/geniv.c
index dbcc640274cd..6a90c52d49ad 100644
--- a/crypto/geniv.c
+++ b/crypto/geniv.c
@@ -41,7 +41,6 @@ static void aead_geniv_free(struct aead_instance *inst)
struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl,
struct rtattr **tb, u32 type, u32 mask)
{
- const char *name;
struct crypto_aead_spawn *spawn;
struct crypto_attr_type *algt;
struct aead_instance *inst;
@@ -57,10 +56,6 @@ struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl,
if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
return ERR_PTR(-EINVAL);
- name = crypto_attr_alg_name(tb[1]);
- if (IS_ERR(name))
- return ERR_CAST(name);
-
inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
if (!inst)
return ERR_PTR(-ENOMEM);
@@ -71,7 +66,7 @@ struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl,
mask |= crypto_requires_sync(algt->type, algt->mask);
err = crypto_grab_aead(spawn, aead_crypto_instance(inst),
- name, type, mask);
+ crypto_attr_alg_name(tb[1]), type, mask);
if (err)
goto err_free_inst;
@@ -82,17 +77,17 @@ struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl,
err = -EINVAL;
if (ivsize < sizeof(u64))
- goto err_drop_alg;
+ goto err_free_inst;
err = -ENAMETOOLONG;
if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
"%s(%s)", tmpl->name, alg->base.cra_name) >=
CRYPTO_MAX_ALG_NAME)
- goto err_drop_alg;
+ goto err_free_inst;
if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
"%s(%s)", tmpl->name, alg->base.cra_driver_name) >=
CRYPTO_MAX_ALG_NAME)
- goto err_drop_alg;
+ goto err_free_inst;
inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
inst->alg.base.cra_priority = alg->base.cra_priority;
@@ -111,10 +106,8 @@ struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl,
out:
return inst;
-err_drop_alg:
- crypto_drop_aead(spawn);
err_free_inst:
- kfree(inst);
+ aead_geniv_free(inst);
inst = ERR_PTR(err);
goto out;
}
diff --git a/crypto/lrw.c b/crypto/lrw.c
index 63c485c0d8a6..376d7ed3f1f8 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -343,15 +343,15 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
err = -EINVAL;
if (alg->base.cra_blocksize != LRW_BLOCK_SIZE)
- goto err_drop_spawn;
+ goto err_free_inst;
if (crypto_skcipher_alg_ivsize(alg))
- goto err_drop_spawn;
+ goto err_free_inst;
err = crypto_inst_setname(skcipher_crypto_instance(inst), "lrw",
&alg->base);
if (err)
- goto err_drop_spawn;
+ goto err_free_inst;
err = -EINVAL;
cipher_name = alg->base.cra_name;
@@ -364,20 +364,20 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
len = strlcpy(ecb_name, cipher_name + 4, sizeof(ecb_name));
if (len < 2 || len >= sizeof(ecb_name))
- goto err_drop_spawn;
+ goto err_free_inst;
if (ecb_name[len - 1] != ')')
- goto err_drop_spawn;
+ goto err_free_inst;
ecb_name[len - 1] = 0;
if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
"lrw(%s)", ecb_name) >= CRYPTO_MAX_ALG_NAME) {
err = -ENAMETOOLONG;
- goto err_drop_spawn;
+ goto err_free_inst;
}
} else
- goto err_drop_spawn;
+ goto err_free_inst;
inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
inst->alg.base.cra_priority = alg->base.cra_priority;
@@ -403,17 +403,11 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
inst->free = free;
err = skcipher_register_instance(tmpl, inst);
- if (err)
- goto err_drop_spawn;
-
-out:
- return err;
-
-err_drop_spawn:
- crypto_drop_skcipher(spawn);
+ if (err) {
err_free_inst:
- kfree(inst);
- goto out;
+ free(inst);
+ }
+ return err;
}
static struct crypto_template crypto_tmpl = {
diff --git a/crypto/md5.c b/crypto/md5.c
index 22dc60bc0437..72c0c46fb5ee 100644
--- a/crypto/md5.c
+++ b/crypto/md5.c
@@ -23,9 +23,6 @@
#include <linux/types.h>
#include <asm/byteorder.h>
-#define MD5_DIGEST_WORDS 4
-#define MD5_MESSAGE_BYTES 64
-
const u8 md5_zero_message_hash[MD5_DIGEST_SIZE] = {
0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04,
0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e,
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index 1b632139a8c1..8bddc65cd509 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -232,17 +232,12 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,
struct crypto_attr_type *algt;
struct aead_instance *inst;
struct aead_alg *alg;
- const char *name;
int err;
algt = crypto_get_attr_type(tb);
if (IS_ERR(algt))
return PTR_ERR(algt);
- name = crypto_attr_alg_name(tb[1]);
- if (IS_ERR(name))
- return PTR_ERR(name);
-
inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
if (!inst)
return -ENOMEM;
@@ -252,21 +247,21 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,
ctx = aead_instance_ctx(inst);
ctx->psenc = padata_alloc_shell(pencrypt);
if (!ctx->psenc)
- goto out_free_inst;
+ goto err_free_inst;
ctx->psdec = padata_alloc_shell(pdecrypt);
if (!ctx->psdec)
- goto out_free_psenc;
+ goto err_free_inst;
err = crypto_grab_aead(&ctx->spawn, aead_crypto_instance(inst),
- name, 0, 0);
+ crypto_attr_alg_name(tb[1]), 0, 0);
if (err)
- goto out_free_psdec;
+ goto err_free_inst;
alg = crypto_spawn_aead_alg(&ctx->spawn);
err = pcrypt_init_instance(aead_crypto_instance(inst), &alg->base);
if (err)
- goto out_drop_aead;
+ goto err_free_inst;
inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC;
@@ -286,21 +281,11 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,
inst->free = pcrypt_free;
err = aead_register_instance(tmpl, inst);
- if (err)
- goto out_drop_aead;
-
-out:
+ if (err) {
+err_free_inst:
+ pcrypt_free(inst);
+ }
return err;
-
-out_drop_aead:
- crypto_drop_aead(&ctx->spawn);
-out_free_psdec:
- padata_free_shell(ctx->psdec);
-out_free_psenc:
- padata_free_shell(ctx->psenc);
-out_free_inst:
- kfree(inst);
- goto out;
}
static int pcrypt_create(struct crypto_template *tmpl, struct rtattr **tb)
diff --git a/crypto/proc.c b/crypto/proc.c
index 7b91557adccb..08d8c2bc7e62 100644
--- a/crypto/proc.c
+++ b/crypto/proc.c
@@ -60,7 +60,7 @@ static int c_show(struct seq_file *m, void *p)
goto out;
}
- switch (alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL)) {
+ switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
case CRYPTO_ALG_TYPE_CIPHER:
seq_printf(m, "type : cipher\n");
seq_printf(m, "blocksize : %u\n", alg->cra_blocksize);
diff --git a/crypto/rng.c b/crypto/rng.c
index 1e21231f71c9..1490d210f1a1 100644
--- a/crypto/rng.c
+++ b/crypto/rng.c
@@ -37,12 +37,16 @@ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
crypto_stats_get(alg);
if (!seed && slen) {
buf = kmalloc(slen, GFP_KERNEL);
- if (!buf)
+ if (!buf) {
+ crypto_alg_put(alg);
return -ENOMEM;
+ }
err = get_random_bytes_wait(buf, slen);
- if (err)
+ if (err) {
+ crypto_alg_put(alg);
goto out;
+ }
seed = buf;
}
diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 176b63afec8d..d31031de51bc 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -596,14 +596,11 @@ static void pkcs1pad_free(struct akcipher_instance *inst)
static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
{
- const struct rsa_asn1_template *digest_info;
struct crypto_attr_type *algt;
u32 mask;
struct akcipher_instance *inst;
struct pkcs1pad_inst_ctx *ctx;
- struct crypto_akcipher_spawn *spawn;
struct akcipher_alg *rsa_alg;
- const char *rsa_alg_name;
const char *hash_name;
int err;
@@ -616,60 +613,49 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
mask = crypto_requires_sync(algt->type, algt->mask);
- rsa_alg_name = crypto_attr_alg_name(tb[1]);
- if (IS_ERR(rsa_alg_name))
- return PTR_ERR(rsa_alg_name);
-
- hash_name = crypto_attr_alg_name(tb[2]);
- if (IS_ERR(hash_name))
- hash_name = NULL;
-
- if (hash_name) {
- digest_info = rsa_lookup_asn1(hash_name);
- if (!digest_info)
- return -EINVAL;
- } else
- digest_info = NULL;
-
inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
if (!inst)
return -ENOMEM;
ctx = akcipher_instance_ctx(inst);
- spawn = &ctx->spawn;
- ctx->digest_info = digest_info;
- err = crypto_grab_akcipher(spawn, akcipher_crypto_instance(inst),
- rsa_alg_name, 0, mask);
+ err = crypto_grab_akcipher(&ctx->spawn, akcipher_crypto_instance(inst),
+ crypto_attr_alg_name(tb[1]), 0, mask);
if (err)
- goto out_free_inst;
+ goto err_free_inst;
- rsa_alg = crypto_spawn_akcipher_alg(spawn);
+ rsa_alg = crypto_spawn_akcipher_alg(&ctx->spawn);
err = -ENAMETOOLONG;
-
- if (!hash_name) {
+ hash_name = crypto_attr_alg_name(tb[2]);
+ if (IS_ERR(hash_name)) {
if (snprintf(inst->alg.base.cra_name,
CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)",
rsa_alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME)
- goto out_drop_alg;
+ goto err_free_inst;
if (snprintf(inst->alg.base.cra_driver_name,
CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)",
rsa_alg->base.cra_driver_name) >=
CRYPTO_MAX_ALG_NAME)
- goto out_drop_alg;
+ goto err_free_inst;
} else {
+ ctx->digest_info = rsa_lookup_asn1(hash_name);
+ if (!ctx->digest_info) {
+ err = -EINVAL;
+ goto err_free_inst;
+ }
+
if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
"pkcs1pad(%s,%s)", rsa_alg->base.cra_name,
hash_name) >= CRYPTO_MAX_ALG_NAME)
- goto out_drop_alg;
+ goto err_free_inst;
if (snprintf(inst->alg.base.cra_driver_name,
CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s,%s)",
rsa_alg->base.cra_driver_name,
hash_name) >= CRYPTO_MAX_ALG_NAME)
- goto out_drop_alg;
+ goto err_free_inst;
}
inst->alg.base.cra_flags = rsa_alg->base.cra_flags & CRYPTO_ALG_ASYNC;
@@ -691,15 +677,10 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
inst->free = pkcs1pad_free;
err = akcipher_register_instance(tmpl, inst);
- if (err)
- goto out_drop_alg;
-
- return 0;
-
-out_drop_alg:
- crypto_drop_akcipher(spawn);
-out_free_inst:
- kfree(inst);
+ if (err) {
+err_free_inst:
+ pkcs1pad_free(inst);
+ }
return err;
}
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index f42f486e90e8..ba0b7702f2e9 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1514,8 +1514,8 @@ static void test_skcipher_speed(const char *algo, int enc, unsigned int secs,
return;
}
- pr_info("\ntesting speed of async %s (%s) %s\n", algo,
- get_driver_name(crypto_skcipher, tfm), e);
+ pr_info("\ntesting speed of %s %s (%s) %s\n", async ? "async" : "sync",
+ algo, get_driver_name(crypto_skcipher, tfm), e);
req = skcipher_request_alloc(tfm, GFP_KERNEL);
if (!req) {
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index ccb3d60729fc..6863f911fcee 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -91,10 +91,11 @@ struct aead_test_suite {
unsigned int einval_allowed : 1;
/*
- * Set if the algorithm intentionally ignores the last 8 bytes of the
- * AAD buffer during decryption.
+ * Set if this algorithm requires that the IV be located at the end of
+ * the AAD buffer, in addition to being given in the normal way. The
+ * behavior when the two IV copies differ is implementation-defined.
*/
- unsigned int esp_aad : 1;
+ unsigned int aad_iv : 1;
};
struct cipher_test_suite {
@@ -2167,9 +2168,10 @@ struct aead_extra_tests_ctx {
* here means the full ciphertext including the authentication tag. The
* authentication tag (and hence also the ciphertext) is assumed to be nonempty.
*/
-static void mutate_aead_message(struct aead_testvec *vec, bool esp_aad)
+static void mutate_aead_message(struct aead_testvec *vec, bool aad_iv,
+ unsigned int ivsize)
{
- const unsigned int aad_tail_size = esp_aad ? 8 : 0;
+ const unsigned int aad_tail_size = aad_iv ? ivsize : 0;
const unsigned int authsize = vec->clen - vec->plen;
if (prandom_u32() % 2 == 0 && vec->alen > aad_tail_size) {
@@ -2207,6 +2209,9 @@ static void generate_aead_message(struct aead_request *req,
/* Generate the AAD. */
generate_random_bytes((u8 *)vec->assoc, vec->alen);
+ if (suite->aad_iv && vec->alen >= ivsize)
+ /* Avoid implementation-defined behavior. */
+ memcpy((u8 *)vec->assoc + vec->alen - ivsize, vec->iv, ivsize);
if (inauthentic && prandom_u32() % 2 == 0) {
/* Generate a random ciphertext. */
@@ -2242,7 +2247,7 @@ static void generate_aead_message(struct aead_request *req,
* Mutate the authentic (ciphertext, AAD) pair to get an
* inauthentic one.
*/
- mutate_aead_message(vec, suite->esp_aad);
+ mutate_aead_message(vec, suite->aad_iv, ivsize);
}
vec->novrfy = 1;
if (suite->einval_allowed)
@@ -2507,11 +2512,11 @@ static int test_aead_extra(const char *driver,
goto out;
}
- err = test_aead_inauthentic_inputs(ctx);
+ err = test_aead_vs_generic_impl(ctx);
if (err)
goto out;
- err = test_aead_vs_generic_impl(ctx);
+ err = test_aead_inauthentic_inputs(ctx);
out:
kfree(ctx->vec.key);
kfree(ctx->vec.iv);
@@ -5229,7 +5234,7 @@ static const struct alg_test_desc alg_test_descs[] = {
.aead = {
____VECS(aes_gcm_rfc4106_tv_template),
.einval_allowed = 1,
- .esp_aad = 1,
+ .aad_iv = 1,
}
}
}, {
@@ -5241,7 +5246,7 @@ static const struct alg_test_desc alg_test_descs[] = {
.aead = {
____VECS(aes_ccm_rfc4309_tv_template),
.einval_allowed = 1,
- .esp_aad = 1,
+ .aad_iv = 1,
}
}
}, {
@@ -5252,6 +5257,7 @@ static const struct alg_test_desc alg_test_descs[] = {
.aead = {
____VECS(aes_gcm_rfc4543_tv_template),
.einval_allowed = 1,
+ .aad_iv = 1,
}
}
}, {
@@ -5267,7 +5273,7 @@ static const struct alg_test_desc alg_test_descs[] = {
.aead = {
____VECS(rfc7539esp_tv_template),
.einval_allowed = 1,
- .esp_aad = 1,
+ .aad_iv = 1,
}
}
}, {
diff --git a/crypto/xts.c b/crypto/xts.c
index 29efa15f1495..dbdd8af629e6 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -379,15 +379,15 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
err = -EINVAL;
if (alg->base.cra_blocksize != XTS_BLOCK_SIZE)
- goto err_drop_spawn;
+ goto err_free_inst;
if (crypto_skcipher_alg_ivsize(alg))
- goto err_drop_spawn;
+ goto err_free_inst;
err = crypto_inst_setname(skcipher_crypto_instance(inst), "xts",
&alg->base);
if (err)
- goto err_drop_spawn;
+ goto err_free_inst;
err = -EINVAL;
cipher_name = alg->base.cra_name;
@@ -400,20 +400,20 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
len = strlcpy(ctx->name, cipher_name + 4, sizeof(ctx->name));
if (len < 2 || len >= sizeof(ctx->name))
- goto err_drop_spawn;
+ goto err_free_inst;
if (ctx->name[len - 1] != ')')
- goto err_drop_spawn;
+ goto err_free_inst;
ctx->name[len - 1] = 0;
if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
"xts(%s)", ctx->name) >= CRYPTO_MAX_ALG_NAME) {
err = -ENAMETOOLONG;
- goto err_drop_spawn;
+ goto err_free_inst;
}
} else
- goto err_drop_spawn;
+ goto err_free_inst;
inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
inst->alg.base.cra_priority = alg->base.cra_priority;
@@ -437,17 +437,11 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
inst->free = free;
err = skcipher_register_instance(tmpl, inst);
- if (err)
- goto err_drop_spawn;
-
-out:
- return err;
-
-err_drop_spawn:
- crypto_drop_skcipher(&ctx->spawn);
+ if (err) {
err_free_inst:
- kfree(inst);
- goto out;
+ free(inst);
+ }
+ return err;
}
static struct crypto_template crypto_tmpl = {
diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c
index c78d10ea641f..40526da5c6a6 100644
--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
+++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
@@ -26,6 +26,8 @@
*/
#define FSL_MC_DEFAULT_DMA_MASK (~0ULL)
+static struct fsl_mc_version mc_version;
+
/**
* struct fsl_mc - Private data of a "fsl,qoriq-mc" platform device
* @root_mc_bus_dev: fsl-mc device representing the root DPRC
@@ -55,20 +57,6 @@ struct fsl_mc_addr_translation_range {
};
/**
- * struct mc_version
- * @major: Major version number: incremented on API compatibility changes
- * @minor: Minor version number: incremented on API additions (that are
- * backward compatible); reset when major version is incremented
- * @revision: Internal revision number: incremented on implementation changes
- * and/or bug fixes that have no impact on API
- */
-struct mc_version {
- u32 major;
- u32 minor;
- u32 revision;
-};
-
-/**
* fsl_mc_bus_match - device to driver matching callback
* @dev: the fsl-mc device to match against
* @drv: the device driver to search for matching fsl-mc object type
@@ -338,7 +326,7 @@ EXPORT_SYMBOL_GPL(fsl_mc_driver_unregister);
*/
static int mc_get_version(struct fsl_mc_io *mc_io,
u32 cmd_flags,
- struct mc_version *mc_ver_info)
+ struct fsl_mc_version *mc_ver_info)
{
struct fsl_mc_command cmd = { 0 };
struct dpmng_rsp_get_version *rsp_params;
@@ -364,6 +352,20 @@ static int mc_get_version(struct fsl_mc_io *mc_io,
}
/**
+ * fsl_mc_get_version - function to retrieve the MC f/w version information
+ *
+ * Return: mc version when called after fsl-mc-bus probe; NULL otherwise.
+ */
+struct fsl_mc_version *fsl_mc_get_version(void)
+{
+ if (mc_version.major)
+ return &mc_version;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(fsl_mc_get_version);
+
+/**
* fsl_mc_get_root_dprc - function to traverse to the root dprc
*/
static void fsl_mc_get_root_dprc(struct device *dev,
@@ -862,7 +864,6 @@ static int fsl_mc_bus_probe(struct platform_device *pdev)
int container_id;
phys_addr_t mc_portal_phys_addr;
u32 mc_portal_size;
- struct mc_version mc_version;
struct resource res;
mc = devm_kzalloc(&pdev->dev, sizeof(*mc), GFP_KERNEL);
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 914e293ba62b..9bc46da8d77a 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -244,7 +244,8 @@ config HW_RANDOM_MXC_RNGA
config HW_RANDOM_IMX_RNGC
tristate "Freescale i.MX RNGC Random Number Generator"
- depends on ARCH_MXC
+ depends on HAS_IOMEM && HAVE_CLK
+ depends on SOC_IMX25 || COMPILE_TEST
default HW_RANDOM
---help---
This driver provides kernel-side support for the Random Number
@@ -466,6 +467,13 @@ config HW_RANDOM_NPCM
If unsure, say Y.
+config HW_RANDOM_KEYSTONE
+ depends on ARCH_KEYSTONE || COMPILE_TEST
+ default HW_RANDOM
+ tristate "TI Keystone NETCP SA Hardware random number generator"
+ help
+ This option enables Keystone's hardware random generator.
+
endif # HW_RANDOM
config UML_RANDOM
@@ -482,10 +490,3 @@ config UML_RANDOM
(check your distro, or download from
http://sourceforge.net/projects/gkernel/). rngd periodically reads
/dev/hwrng and injects the entropy into /dev/random.
-
-config HW_RANDOM_KEYSTONE
- depends on ARCH_KEYSTONE || COMPILE_TEST
- default HW_RANDOM
- tristate "TI Keystone NETCP SA Hardware random number generator"
- help
- This option enables Keystone's hardware random generator.
diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c
index 30cf00f8e9a0..9c47e431ce90 100644
--- a/drivers/char/hw_random/imx-rngc.c
+++ b/drivers/char/hw_random/imx-rngc.c
@@ -18,12 +18,22 @@
#include <linux/completion.h>
#include <linux/io.h>
+#define RNGC_VER_ID 0x0000
#define RNGC_COMMAND 0x0004
#define RNGC_CONTROL 0x0008
#define RNGC_STATUS 0x000C
#define RNGC_ERROR 0x0010
#define RNGC_FIFO 0x0014
+/* the fields in the ver id register */
+#define RNGC_TYPE_SHIFT 28
+#define RNGC_VER_MAJ_SHIFT 8
+
+/* the rng_type field */
+#define RNGC_TYPE_RNGB 0x1
+#define RNGC_TYPE_RNGC 0x2
+
+
#define RNGC_CMD_CLR_ERR 0x00000020
#define RNGC_CMD_CLR_INT 0x00000010
#define RNGC_CMD_SEED 0x00000002
@@ -31,6 +41,7 @@
#define RNGC_CTRL_MASK_ERROR 0x00000040
#define RNGC_CTRL_MASK_DONE 0x00000020
+#define RNGC_CTRL_AUTO_SEED 0x00000010
#define RNGC_STATUS_ERROR 0x00010000
#define RNGC_STATUS_FIFO_LEVEL_MASK 0x00000f00
@@ -100,15 +111,11 @@ static int imx_rngc_self_test(struct imx_rngc *rngc)
writel(cmd | RNGC_CMD_SELF_TEST, rngc->base + RNGC_COMMAND);
ret = wait_for_completion_timeout(&rngc->rng_op_done, RNGC_TIMEOUT);
- if (!ret) {
- imx_rngc_irq_mask_clear(rngc);
+ imx_rngc_irq_mask_clear(rngc);
+ if (!ret)
return -ETIMEDOUT;
- }
- if (rngc->err_reg != 0)
- return -EIO;
-
- return 0;
+ return rngc->err_reg ? -EIO : 0;
}
static int imx_rngc_read(struct hwrng *rng, void *data, size_t max, bool wait)
@@ -165,17 +172,17 @@ static irqreturn_t imx_rngc_irq(int irq, void *priv)
static int imx_rngc_init(struct hwrng *rng)
{
struct imx_rngc *rngc = container_of(rng, struct imx_rngc, rng);
- u32 cmd;
+ u32 cmd, ctrl;
int ret;
/* clear error */
cmd = readl(rngc->base + RNGC_COMMAND);
writel(cmd | RNGC_CMD_CLR_ERR, rngc->base + RNGC_COMMAND);
+ imx_rngc_irq_unmask(rngc);
+
/* create seed, repeat while there is some statistical error */
do {
- imx_rngc_irq_unmask(rngc);
-
/* seed creation */
cmd = readl(rngc->base + RNGC_COMMAND);
writel(cmd | RNGC_CMD_SEED, rngc->base + RNGC_COMMAND);
@@ -184,13 +191,42 @@ static int imx_rngc_init(struct hwrng *rng)
RNGC_TIMEOUT);
if (!ret) {
- imx_rngc_irq_mask_clear(rngc);
- return -ETIMEDOUT;
+ ret = -ETIMEDOUT;
+ goto err;
}
} while (rngc->err_reg == RNGC_ERROR_STATUS_STAT_ERR);
- return rngc->err_reg ? -EIO : 0;
+ if (rngc->err_reg) {
+ ret = -EIO;
+ goto err;
+ }
+
+ /*
+ * enable automatic seeding, the rngc creates a new seed automatically
+ * after serving 2^20 random 160-bit words
+ */
+ ctrl = readl(rngc->base + RNGC_CONTROL);
+ ctrl |= RNGC_CTRL_AUTO_SEED;
+ writel(ctrl, rngc->base + RNGC_CONTROL);
+
+ /*
+ * if initialisation was successful, we keep the interrupt
+ * unmasked until imx_rngc_cleanup is called
+ * we mask the interrupt ourselves if we return an error
+ */
+ return 0;
+
+err:
+ imx_rngc_irq_mask_clear(rngc);
+ return ret;
+}
+
+static void imx_rngc_cleanup(struct hwrng *rng)
+{
+ struct imx_rngc *rngc = container_of(rng, struct imx_rngc, rng);
+
+ imx_rngc_irq_mask_clear(rngc);
}
static int imx_rngc_probe(struct platform_device *pdev)
@@ -198,6 +234,8 @@ static int imx_rngc_probe(struct platform_device *pdev)
struct imx_rngc *rngc;
int ret;
int irq;
+ u32 ver_id;
+ u8 rng_type;
rngc = devm_kzalloc(&pdev->dev, sizeof(*rngc), GFP_KERNEL);
if (!rngc)
@@ -223,6 +261,17 @@ static int imx_rngc_probe(struct platform_device *pdev)
if (ret)
return ret;
+ ver_id = readl(rngc->base + RNGC_VER_ID);
+ rng_type = ver_id >> RNGC_TYPE_SHIFT;
+ /*
+ * This driver supports only RNGC and RNGB. (There's a different
+ * driver for RNGA.)
+ */
+ if (rng_type != RNGC_TYPE_RNGC && rng_type != RNGC_TYPE_RNGB) {
+ ret = -ENODEV;
+ goto err;
+ }
+
ret = devm_request_irq(&pdev->dev,
irq, imx_rngc_irq, 0, pdev->name, (void *)rngc);
if (ret) {
@@ -235,6 +284,7 @@ static int imx_rngc_probe(struct platform_device *pdev)
rngc->rng.name = pdev->name;
rngc->rng.init = imx_rngc_init;
rngc->rng.read = imx_rngc_read;
+ rngc->rng.cleanup = imx_rngc_cleanup;
rngc->dev = &pdev->dev;
platform_set_drvdata(pdev, rngc);
@@ -244,18 +294,21 @@ static int imx_rngc_probe(struct platform_device *pdev)
if (self_test) {
ret = imx_rngc_self_test(rngc);
if (ret) {
- dev_err(rngc->dev, "FSL RNGC self test failed.\n");
+ dev_err(rngc->dev, "self test failed\n");
goto err;
}
}
ret = hwrng_register(&rngc->rng);
if (ret) {
- dev_err(&pdev->dev, "FSL RNGC registering failed (%d)\n", ret);
+ dev_err(&pdev->dev, "hwrng registration failed\n");
goto err;
}
- dev_info(&pdev->dev, "Freescale RNGC registered.\n");
+ dev_info(&pdev->dev,
+ "Freescale RNG%c registered (HW revision %d.%02d)\n",
+ rng_type == RNGC_TYPE_RNGB ? 'B' : 'C',
+ (ver_id >> RNGC_VER_MAJ_SHIFT) & 0xff, ver_id & 0xff);
return 0;
err:
diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c
index e08a8887e718..a431c5cbe2be 100644
--- a/drivers/char/hw_random/omap3-rom-rng.c
+++ b/drivers/char/hw_random/omap3-rom-rng.c
@@ -18,6 +18,7 @@
#include <linux/workqueue.h>
#include <linux/clk.h>
#include <linux/err.h>
+#include <linux/io.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/platform_device.h>
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index c2767ed54dfe..2c887e4d005a 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -233,20 +233,6 @@ config CRYPTO_CRC32_S390
It is available with IBM z13 or later.
-config CRYPTO_DEV_MARVELL_CESA
- tristate "Marvell's Cryptographic Engine driver"
- depends on PLAT_ORION || ARCH_MVEBU
- select CRYPTO_LIB_AES
- select CRYPTO_LIB_DES
- select CRYPTO_SKCIPHER
- select CRYPTO_HASH
- select SRAM
- help
- This driver allows you to utilize the Cryptographic Engines and
- Security Accelerator (CESA) which can be found on MVEBU and ORION
- platforms.
- This driver supports CPU offload through DMA transfers.
-
config CRYPTO_DEV_NIAGARA2
tristate "Niagara2 Stream Processing Unit driver"
select CRYPTO_LIB_DES
@@ -606,6 +592,7 @@ config CRYPTO_DEV_MXS_DCP
source "drivers/crypto/qat/Kconfig"
source "drivers/crypto/cavium/cpt/Kconfig"
source "drivers/crypto/cavium/nitrox/Kconfig"
+source "drivers/crypto/marvell/Kconfig"
config CRYPTO_DEV_CAVIUM_ZIP
tristate "Cavium ZIP driver"
@@ -685,6 +672,29 @@ choice
endchoice
+config CRYPTO_DEV_QCE_SW_MAX_LEN
+ int "Default maximum request size to use software for AES"
+ depends on CRYPTO_DEV_QCE && CRYPTO_DEV_QCE_SKCIPHER
+ default 512
+ help
+ This sets the default maximum request size to perform AES requests
+ using software instead of the crypto engine. It can be changed by
+ setting the aes_sw_max_len parameter.
+
+ Small blocks are processed faster in software than hardware.
+ Considering the 256-bit ciphers, software is 2-3 times faster than
+ qce at 256-bytes, 30% faster at 512, and about even at 768-bytes.
+ With 128-bit keys, the break-even point would be around 1024-bytes.
+
+ The default is set a little lower, to 512 bytes, to balance the
+ cost in CPU usage. The minimum recommended setting is 16-bytes
+ (1 AES block), since AES-GCM will fail if you set it lower.
+ Setting this to zero will send all requests to the hardware.
+
+ Note that 192-bit keys are not supported by the hardware and are
+ always processed by the software fallback, and all DES requests
+ are done by the hardware.
+
config CRYPTO_DEV_QCOM_RNG
tristate "Qualcomm Random Number Generator Driver"
depends on ARCH_QCOM || COMPILE_TEST
@@ -731,6 +741,18 @@ config CRYPTO_DEV_ROCKCHIP
This driver interfaces with the hardware crypto accelerator.
Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode.
+config CRYPTO_DEV_ZYNQMP_AES
+ tristate "Support for Xilinx ZynqMP AES hw accelerator"
+ depends on ZYNQMP_FIRMWARE || COMPILE_TEST
+ select CRYPTO_AES
+ select CRYPTO_ENGINE
+ select CRYPTO_AEAD
+ help
+ Xilinx ZynqMP has AES-GCM engine used for symmetric key
+ encryption and decryption. This driver interfaces with AES hw
+ accelerator. Select this if you want to use the ZynqMP module
+ for AES algorithms.
+
config CRYPTO_DEV_MEDIATEK
tristate "MediaTek's EIP97 Cryptographic Engine driver"
depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 40229d499476..944ed7226e37 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o
obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
-obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell/
+obj-$(CONFIG_CRYPTO_DEV_MARVELL) += marvell/
obj-$(CONFIG_CRYPTO_DEV_MEDIATEK) += mediatek/
obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o
obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
@@ -47,5 +47,6 @@ obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/
obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/
+obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_AES) += xilinx/
obj-y += hisilicon/
obj-$(CONFIG_CRYPTO_DEV_AMLOGIC_GXL) += amlogic/
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
index f72346a44e69..3e4e4bbda34c 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
@@ -565,10 +565,8 @@ static int sun8i_ce_probe(struct platform_device *pdev)
/* Get Non Secure IRQ */
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- dev_err(ce->dev, "Cannot get CryptoEngine Non-secure IRQ\n");
+ if (irq < 0)
return irq;
- }
ce->reset = devm_reset_control_get(&pdev->dev, NULL);
if (IS_ERR(ce->reset)) {
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
index 8f8404c84a4d..0e9eac397e1b 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
@@ -214,7 +214,7 @@ struct sun8i_cipher_tfm_ctx {
* this template
* @alg: one of sub struct must be used
* @stat_req: number of request done on this template
- * @stat_fb: total of all data len done on this template
+ * @stat_fb: number of request which has fallbacked
*/
struct sun8i_ce_alg_template {
u32 type;
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
index b5f855f3de10..29c44f279112 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
@@ -186,7 +186,7 @@ struct sun8i_cipher_tfm_ctx {
* this template
* @alg: one of sub struct must be used
* @stat_req: number of request done on this template
- * @stat_fb: total of all data len done on this template
+ * @stat_fb: number of request which has fallbacked
*/
struct sun8i_ss_alg_template {
u32 type;
diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c
index 1d3355913b40..e8e8281e027d 100644
--- a/drivers/crypto/atmel-i2c.c
+++ b/drivers/crypto/atmel-i2c.c
@@ -176,7 +176,8 @@ static int atmel_i2c_wakeup(struct i2c_client *client)
* device is idle, asleep or during waking up. Don't check for error
* when waking up the device.
*/
- i2c_master_send(client, i2c_priv->wake_token, i2c_priv->wake_token_sz);
+ i2c_transfer_buffer_flags(client, i2c_priv->wake_token,
+ i2c_priv->wake_token_sz, I2C_M_IGNORE_NAK);
/*
* Wait to wake the device. Typical execution times for ecdh and genkey
diff --git a/drivers/crypto/bcm/util.c b/drivers/crypto/bcm/util.c
index cd7504101acd..2b304fc78059 100644
--- a/drivers/crypto/bcm/util.c
+++ b/drivers/crypto/bcm/util.c
@@ -366,88 +366,88 @@ static ssize_t spu_debugfs_read(struct file *filp, char __user *ubuf,
ipriv = filp->private_data;
out_offset = 0;
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"Number of SPUs.........%u\n",
ipriv->spu.num_spu);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"Current sessions.......%u\n",
atomic_read(&ipriv->session_count));
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"Session count..........%u\n",
atomic_read(&ipriv->stream_count));
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"Cipher setkey..........%u\n",
atomic_read(&ipriv->setkey_cnt[SPU_OP_CIPHER]));
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"Cipher Ops.............%u\n",
atomic_read(&ipriv->op_counts[SPU_OP_CIPHER]));
for (alg = 0; alg < CIPHER_ALG_LAST; alg++) {
for (mode = 0; mode < CIPHER_MODE_LAST; mode++) {
op_cnt = atomic_read(&ipriv->cipher_cnt[alg][mode]);
if (op_cnt) {
- out_offset += snprintf(buf + out_offset,
+ out_offset += scnprintf(buf + out_offset,
out_count - out_offset,
" %-13s%11u\n",
spu_alg_name(alg, mode), op_cnt);
}
}
}
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"Hash Ops...............%u\n",
atomic_read(&ipriv->op_counts[SPU_OP_HASH]));
for (alg = 0; alg < HASH_ALG_LAST; alg++) {
op_cnt = atomic_read(&ipriv->hash_cnt[alg]);
if (op_cnt) {
- out_offset += snprintf(buf + out_offset,
+ out_offset += scnprintf(buf + out_offset,
out_count - out_offset,
" %-13s%11u\n",
hash_alg_name[alg], op_cnt);
}
}
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"HMAC setkey............%u\n",
atomic_read(&ipriv->setkey_cnt[SPU_OP_HMAC]));
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"HMAC Ops...............%u\n",
atomic_read(&ipriv->op_counts[SPU_OP_HMAC]));
for (alg = 0; alg < HASH_ALG_LAST; alg++) {
op_cnt = atomic_read(&ipriv->hmac_cnt[alg]);
if (op_cnt) {
- out_offset += snprintf(buf + out_offset,
+ out_offset += scnprintf(buf + out_offset,
out_count - out_offset,
" %-13s%11u\n",
hash_alg_name[alg], op_cnt);
}
}
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"AEAD setkey............%u\n",
atomic_read(&ipriv->setkey_cnt[SPU_OP_AEAD]));
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"AEAD Ops...............%u\n",
atomic_read(&ipriv->op_counts[SPU_OP_AEAD]));
for (alg = 0; alg < AEAD_TYPE_LAST; alg++) {
op_cnt = atomic_read(&ipriv->aead_cnt[alg]);
if (op_cnt) {
- out_offset += snprintf(buf + out_offset,
+ out_offset += scnprintf(buf + out_offset,
out_count - out_offset,
" %-13s%11u\n",
aead_alg_name[alg], op_cnt);
}
}
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"Bytes of req data......%llu\n",
(u64)atomic64_read(&ipriv->bytes_out));
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"Bytes of resp data.....%llu\n",
(u64)atomic64_read(&ipriv->bytes_in));
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"Mailbox full...........%u\n",
atomic_read(&ipriv->mb_no_spc));
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"Mailbox send failures..%u\n",
atomic_read(&ipriv->mb_send_fail));
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
"Check ICV errors.......%u\n",
atomic_read(&ipriv->bad_icv));
if (ipriv->spu.spu_type == SPU_TYPE_SPUM)
@@ -455,7 +455,7 @@ static ssize_t spu_debugfs_read(struct file *filp, char __user *ubuf,
spu_ofifo_ctrl = ioread32(ipriv->spu.reg_vbase[i] +
SPU_OFIFO_CTRL);
fifo_len = spu_ofifo_ctrl & SPU_FIFO_WATERMARK;
- out_offset += snprintf(buf + out_offset,
+ out_offset += scnprintf(buf + out_offset,
out_count - out_offset,
"SPU %d output FIFO high water.....%u\n",
i, fifo_len);
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index fac5b2e26610..a62f228be6da 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -13,6 +13,7 @@ config CRYPTO_DEV_FSL_CAAM
depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE
select SOC_BUS
select CRYPTO_DEV_FSL_CAAM_COMMON
+ imply FSL_MC_BUS
help
Enables the driver module for Freescale's Cryptographic Accelerator
and Assurance Module (CAAM), also known as the SEC version 4 (SEC4).
@@ -33,6 +34,7 @@ config CRYPTO_DEV_FSL_CAAM_DEBUG
menuconfig CRYPTO_DEV_FSL_CAAM_JR
tristate "Freescale CAAM Job Ring driver backend"
+ select CRYPTO_ENGINE
default y
help
Enables the driver module for Job Rings which are part of
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index ef1a65f4fc92..b7bb7c30adeb 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -56,6 +56,7 @@
#include "sg_sw_sec4.h"
#include "key_gen.h"
#include "caamalg_desc.h"
+#include <crypto/engine.h>
/*
* crypto alg
@@ -101,6 +102,7 @@ struct caam_skcipher_alg {
* per-session context
*/
struct caam_ctx {
+ struct crypto_engine_ctx enginectx;
u32 sh_desc_enc[DESC_MAX_USED_LEN];
u32 sh_desc_dec[DESC_MAX_USED_LEN];
u8 key[CAAM_MAX_KEY_SIZE];
@@ -114,6 +116,14 @@ struct caam_ctx {
unsigned int authsize;
};
+struct caam_skcipher_req_ctx {
+ struct skcipher_edesc *edesc;
+};
+
+struct caam_aead_req_ctx {
+ struct aead_edesc *edesc;
+};
+
static int aead_null_set_sh_desc(struct crypto_aead *aead)
{
struct caam_ctx *ctx = crypto_aead_ctx(aead);
@@ -858,6 +868,7 @@ static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
* @mapped_src_nents: number of segments in input h/w link table
* @mapped_dst_nents: number of segments in output h/w link table
* @sec4_sg_bytes: length of dma mapped sec4_sg space
+ * @bklog: stored to determine if the request needs backlog
* @sec4_sg_dma: bus physical mapped address of h/w link table
* @sec4_sg: pointer to h/w link table
* @hw_desc: the h/w job descriptor followed by any referenced link tables
@@ -868,6 +879,7 @@ struct aead_edesc {
int mapped_src_nents;
int mapped_dst_nents;
int sec4_sg_bytes;
+ bool bklog;
dma_addr_t sec4_sg_dma;
struct sec4_sg_entry *sec4_sg;
u32 hw_desc[];
@@ -881,6 +893,7 @@ struct aead_edesc {
* @mapped_dst_nents: number of segments in output h/w link table
* @iv_dma: dma address of iv for checking continuity and link table
* @sec4_sg_bytes: length of dma mapped sec4_sg space
+ * @bklog: stored to determine if the request needs backlog
* @sec4_sg_dma: bus physical mapped address of h/w link table
* @sec4_sg: pointer to h/w link table
* @hw_desc: the h/w job descriptor followed by any referenced link tables
@@ -893,9 +906,10 @@ struct skcipher_edesc {
int mapped_dst_nents;
dma_addr_t iv_dma;
int sec4_sg_bytes;
+ bool bklog;
dma_addr_t sec4_sg_dma;
struct sec4_sg_entry *sec4_sg;
- u32 hw_desc[0];
+ u32 hw_desc[];
};
static void caam_unmap(struct device *dev, struct scatterlist *src,
@@ -941,37 +955,18 @@ static void skcipher_unmap(struct device *dev, struct skcipher_edesc *edesc,
edesc->sec4_sg_dma, edesc->sec4_sg_bytes);
}
-static void aead_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
- void *context)
-{
- struct aead_request *req = context;
- struct aead_edesc *edesc;
- int ecode = 0;
-
- dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-
- edesc = container_of(desc, struct aead_edesc, hw_desc[0]);
-
- if (err)
- ecode = caam_jr_strstatus(jrdev, err);
-
- aead_unmap(jrdev, edesc, req);
-
- kfree(edesc);
-
- aead_request_complete(req, ecode);
-}
-
-static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
- void *context)
+static void aead_crypt_done(struct device *jrdev, u32 *desc, u32 err,
+ void *context)
{
struct aead_request *req = context;
+ struct caam_aead_req_ctx *rctx = aead_request_ctx(req);
+ struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev);
struct aead_edesc *edesc;
int ecode = 0;
dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
- edesc = container_of(desc, struct aead_edesc, hw_desc[0]);
+ edesc = rctx->edesc;
if (err)
ecode = caam_jr_strstatus(jrdev, err);
@@ -980,61 +975,30 @@ static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
kfree(edesc);
- aead_request_complete(req, ecode);
-}
-
-static void skcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
- void *context)
-{
- struct skcipher_request *req = context;
- struct skcipher_edesc *edesc;
- struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
- int ivsize = crypto_skcipher_ivsize(skcipher);
- int ecode = 0;
-
- dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-
- edesc = container_of(desc, struct skcipher_edesc, hw_desc[0]);
-
- if (err)
- ecode = caam_jr_strstatus(jrdev, err);
-
- skcipher_unmap(jrdev, edesc, req);
-
/*
- * The crypto API expects us to set the IV (req->iv) to the last
- * ciphertext block (CBC mode) or last counter (CTR mode).
- * This is used e.g. by the CTS mode.
+ * If no backlog flag, the completion of the request is done
+ * by CAAM, not crypto engine.
*/
- if (ivsize && !ecode) {
- memcpy(req->iv, (u8 *)edesc->sec4_sg + edesc->sec4_sg_bytes,
- ivsize);
- print_hex_dump_debug("dstiv @"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
- edesc->src_nents > 1 ? 100 : ivsize, 1);
- }
-
- caam_dump_sg("dst @" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
- edesc->dst_nents > 1 ? 100 : req->cryptlen, 1);
-
- kfree(edesc);
-
- skcipher_request_complete(req, ecode);
+ if (!edesc->bklog)
+ aead_request_complete(req, ecode);
+ else
+ crypto_finalize_aead_request(jrp->engine, req, ecode);
}
-static void skcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
- void *context)
+static void skcipher_crypt_done(struct device *jrdev, u32 *desc, u32 err,
+ void *context)
{
struct skcipher_request *req = context;
struct skcipher_edesc *edesc;
+ struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req);
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+ struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev);
int ivsize = crypto_skcipher_ivsize(skcipher);
int ecode = 0;
dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
- edesc = container_of(desc, struct skcipher_edesc, hw_desc[0]);
+ edesc = rctx->edesc;
if (err)
ecode = caam_jr_strstatus(jrdev, err);
@@ -1060,7 +1024,14 @@ static void skcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
kfree(edesc);
- skcipher_request_complete(req, ecode);
+ /*
+ * If no backlog flag, the completion of the request is done
+ * by CAAM, not crypto engine.
+ */
+ if (!edesc->bklog)
+ skcipher_request_complete(req, ecode);
+ else
+ crypto_finalize_skcipher_request(jrp->engine, req, ecode);
}
/*
@@ -1306,6 +1277,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct caam_ctx *ctx = crypto_aead_ctx(aead);
struct device *jrdev = ctx->jrdev;
+ struct caam_aead_req_ctx *rctx = aead_request_ctx(req);
gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
GFP_KERNEL : GFP_ATOMIC;
int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
@@ -1406,6 +1378,9 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
edesc->mapped_dst_nents = mapped_dst_nents;
edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) +
desc_bytes;
+
+ rctx->edesc = edesc;
+
*all_contig_ptr = !(mapped_src_nents > 1);
sec4_sg_index = 0;
@@ -1436,41 +1411,34 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
return edesc;
}
-static int gcm_encrypt(struct aead_request *req)
+static int aead_enqueue_req(struct device *jrdev, struct aead_request *req)
{
- struct aead_edesc *edesc;
- struct crypto_aead *aead = crypto_aead_reqtfm(req);
- struct caam_ctx *ctx = crypto_aead_ctx(aead);
- struct device *jrdev = ctx->jrdev;
- bool all_contig;
- u32 *desc;
- int ret = 0;
-
- /* allocate extended descriptor */
- edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, true);
- if (IS_ERR(edesc))
- return PTR_ERR(edesc);
-
- /* Create and submit job descriptor */
- init_gcm_job(req, edesc, all_contig, true);
+ struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
+ struct caam_aead_req_ctx *rctx = aead_request_ctx(req);
+ struct aead_edesc *edesc = rctx->edesc;
+ u32 *desc = edesc->hw_desc;
+ int ret;
- print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
- desc_bytes(edesc->hw_desc), 1);
+ /*
+ * Only the backlog request are sent to crypto-engine since the others
+ * can be handled by CAAM, if free, especially since JR has up to 1024
+ * entries (more than the 10 entries from crypto-engine).
+ */
+ if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
+ ret = crypto_transfer_aead_request_to_engine(jrpriv->engine,
+ req);
+ else
+ ret = caam_jr_enqueue(jrdev, desc, aead_crypt_done, req);
- desc = edesc->hw_desc;
- ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req);
- if (!ret) {
- ret = -EINPROGRESS;
- } else {
+ if ((ret != -EINPROGRESS) && (ret != -EBUSY)) {
aead_unmap(jrdev, edesc, req);
- kfree(edesc);
+ kfree(rctx->edesc);
}
return ret;
}
-static int chachapoly_encrypt(struct aead_request *req)
+static inline int chachapoly_crypt(struct aead_request *req, bool encrypt)
{
struct aead_edesc *edesc;
struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -1478,180 +1446,130 @@ static int chachapoly_encrypt(struct aead_request *req)
struct device *jrdev = ctx->jrdev;
bool all_contig;
u32 *desc;
- int ret;
edesc = aead_edesc_alloc(req, CHACHAPOLY_DESC_JOB_IO_LEN, &all_contig,
- true);
+ encrypt);
if (IS_ERR(edesc))
return PTR_ERR(edesc);
desc = edesc->hw_desc;
- init_chachapoly_job(req, edesc, all_contig, true);
+ init_chachapoly_job(req, edesc, all_contig, encrypt);
print_hex_dump_debug("chachapoly jobdesc@" __stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
1);
- ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req);
- if (!ret) {
- ret = -EINPROGRESS;
- } else {
- aead_unmap(jrdev, edesc, req);
- kfree(edesc);
- }
-
- return ret;
+ return aead_enqueue_req(jrdev, req);
}
-static int chachapoly_decrypt(struct aead_request *req)
+static int chachapoly_encrypt(struct aead_request *req)
{
- struct aead_edesc *edesc;
- struct crypto_aead *aead = crypto_aead_reqtfm(req);
- struct caam_ctx *ctx = crypto_aead_ctx(aead);
- struct device *jrdev = ctx->jrdev;
- bool all_contig;
- u32 *desc;
- int ret;
-
- edesc = aead_edesc_alloc(req, CHACHAPOLY_DESC_JOB_IO_LEN, &all_contig,
- false);
- if (IS_ERR(edesc))
- return PTR_ERR(edesc);
-
- desc = edesc->hw_desc;
-
- init_chachapoly_job(req, edesc, all_contig, false);
- print_hex_dump_debug("chachapoly jobdesc@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
- 1);
-
- ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req);
- if (!ret) {
- ret = -EINPROGRESS;
- } else {
- aead_unmap(jrdev, edesc, req);
- kfree(edesc);
- }
-
- return ret;
+ return chachapoly_crypt(req, true);
}
-static int ipsec_gcm_encrypt(struct aead_request *req)
+static int chachapoly_decrypt(struct aead_request *req)
{
- return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_encrypt(req);
+ return chachapoly_crypt(req, false);
}
-static int aead_encrypt(struct aead_request *req)
+static inline int aead_crypt(struct aead_request *req, bool encrypt)
{
struct aead_edesc *edesc;
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct caam_ctx *ctx = crypto_aead_ctx(aead);
struct device *jrdev = ctx->jrdev;
bool all_contig;
- u32 *desc;
- int ret = 0;
/* allocate extended descriptor */
edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN,
- &all_contig, true);
+ &all_contig, encrypt);
if (IS_ERR(edesc))
return PTR_ERR(edesc);
/* Create and submit job descriptor */
- init_authenc_job(req, edesc, all_contig, true);
+ init_authenc_job(req, edesc, all_contig, encrypt);
print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
desc_bytes(edesc->hw_desc), 1);
- desc = edesc->hw_desc;
- ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req);
- if (!ret) {
- ret = -EINPROGRESS;
- } else {
- aead_unmap(jrdev, edesc, req);
- kfree(edesc);
- }
+ return aead_enqueue_req(jrdev, req);
+}
- return ret;
+static int aead_encrypt(struct aead_request *req)
+{
+ return aead_crypt(req, true);
}
-static int gcm_decrypt(struct aead_request *req)
+static int aead_decrypt(struct aead_request *req)
{
- struct aead_edesc *edesc;
- struct crypto_aead *aead = crypto_aead_reqtfm(req);
- struct caam_ctx *ctx = crypto_aead_ctx(aead);
- struct device *jrdev = ctx->jrdev;
- bool all_contig;
- u32 *desc;
- int ret = 0;
+ return aead_crypt(req, false);
+}
- /* allocate extended descriptor */
- edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, false);
- if (IS_ERR(edesc))
- return PTR_ERR(edesc);
+static int aead_do_one_req(struct crypto_engine *engine, void *areq)
+{
+ struct aead_request *req = aead_request_cast(areq);
+ struct caam_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+ struct caam_aead_req_ctx *rctx = aead_request_ctx(req);
+ u32 *desc = rctx->edesc->hw_desc;
+ int ret;
- /* Create and submit job descriptor*/
- init_gcm_job(req, edesc, all_contig, false);
+ rctx->edesc->bklog = true;
- print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
- desc_bytes(edesc->hw_desc), 1);
+ ret = caam_jr_enqueue(ctx->jrdev, desc, aead_crypt_done, req);
- desc = edesc->hw_desc;
- ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req);
- if (!ret) {
- ret = -EINPROGRESS;
+ if (ret != -EINPROGRESS) {
+ aead_unmap(ctx->jrdev, rctx->edesc, req);
+ kfree(rctx->edesc);
} else {
- aead_unmap(jrdev, edesc, req);
- kfree(edesc);
+ ret = 0;
}
return ret;
}
-static int ipsec_gcm_decrypt(struct aead_request *req)
-{
- return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_decrypt(req);
-}
-
-static int aead_decrypt(struct aead_request *req)
+static inline int gcm_crypt(struct aead_request *req, bool encrypt)
{
struct aead_edesc *edesc;
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct caam_ctx *ctx = crypto_aead_ctx(aead);
struct device *jrdev = ctx->jrdev;
bool all_contig;
- u32 *desc;
- int ret = 0;
-
- caam_dump_sg("dec src@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, req->src,
- req->assoclen + req->cryptlen, 1);
/* allocate extended descriptor */
- edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN,
- &all_contig, false);
+ edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig,
+ encrypt);
if (IS_ERR(edesc))
return PTR_ERR(edesc);
- /* Create and submit job descriptor*/
- init_authenc_job(req, edesc, all_contig, false);
+ /* Create and submit job descriptor */
+ init_gcm_job(req, edesc, all_contig, encrypt);
print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
desc_bytes(edesc->hw_desc), 1);
- desc = edesc->hw_desc;
- ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req);
- if (!ret) {
- ret = -EINPROGRESS;
- } else {
- aead_unmap(jrdev, edesc, req);
- kfree(edesc);
- }
+ return aead_enqueue_req(jrdev, req);
+}
- return ret;
+static int gcm_encrypt(struct aead_request *req)
+{
+ return gcm_crypt(req, true);
+}
+
+static int gcm_decrypt(struct aead_request *req)
+{
+ return gcm_crypt(req, false);
+}
+
+static int ipsec_gcm_encrypt(struct aead_request *req)
+{
+ return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_encrypt(req);
+}
+
+static int ipsec_gcm_decrypt(struct aead_request *req)
+{
+ return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_decrypt(req);
}
/*
@@ -1662,6 +1580,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
{
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+ struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req);
struct device *jrdev = ctx->jrdev;
gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
GFP_KERNEL : GFP_ATOMIC;
@@ -1760,6 +1679,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
edesc->sec4_sg_bytes = sec4_sg_bytes;
edesc->sec4_sg = (struct sec4_sg_entry *)((u8 *)edesc->hw_desc +
desc_bytes);
+ rctx->edesc = edesc;
/* Make sure IV is located in a DMAable area */
if (ivsize) {
@@ -1815,49 +1735,35 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
return edesc;
}
-static int skcipher_encrypt(struct skcipher_request *req)
+static int skcipher_do_one_req(struct crypto_engine *engine, void *areq)
{
- struct skcipher_edesc *edesc;
- struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
- struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
- struct device *jrdev = ctx->jrdev;
- u32 *desc;
- int ret = 0;
-
- if (!req->cryptlen)
- return 0;
-
- /* allocate extended descriptor */
- edesc = skcipher_edesc_alloc(req, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
- if (IS_ERR(edesc))
- return PTR_ERR(edesc);
-
- /* Create and submit job descriptor*/
- init_skcipher_job(req, edesc, true);
+ struct skcipher_request *req = skcipher_request_cast(areq);
+ struct caam_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+ struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req);
+ u32 *desc = rctx->edesc->hw_desc;
+ int ret;
- print_hex_dump_debug("skcipher jobdesc@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
- desc_bytes(edesc->hw_desc), 1);
+ rctx->edesc->bklog = true;
- desc = edesc->hw_desc;
- ret = caam_jr_enqueue(jrdev, desc, skcipher_encrypt_done, req);
+ ret = caam_jr_enqueue(ctx->jrdev, desc, skcipher_crypt_done, req);
- if (!ret) {
- ret = -EINPROGRESS;
+ if (ret != -EINPROGRESS) {
+ skcipher_unmap(ctx->jrdev, rctx->edesc, req);
+ kfree(rctx->edesc);
} else {
- skcipher_unmap(jrdev, edesc, req);
- kfree(edesc);
+ ret = 0;
}
return ret;
}
-static int skcipher_decrypt(struct skcipher_request *req)
+static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt)
{
struct skcipher_edesc *edesc;
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
struct device *jrdev = ctx->jrdev;
+ struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
u32 *desc;
int ret = 0;
@@ -1870,17 +1776,25 @@ static int skcipher_decrypt(struct skcipher_request *req)
return PTR_ERR(edesc);
/* Create and submit job descriptor*/
- init_skcipher_job(req, edesc, false);
- desc = edesc->hw_desc;
+ init_skcipher_job(req, edesc, encrypt);
print_hex_dump_debug("skcipher jobdesc@" __stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
desc_bytes(edesc->hw_desc), 1);
- ret = caam_jr_enqueue(jrdev, desc, skcipher_decrypt_done, req);
- if (!ret) {
- ret = -EINPROGRESS;
- } else {
+ desc = edesc->hw_desc;
+ /*
+ * Only the backlog request are sent to crypto-engine since the others
+ * can be handled by CAAM, if free, especially since JR has up to 1024
+ * entries (more than the 10 entries from crypto-engine).
+ */
+ if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
+ ret = crypto_transfer_skcipher_request_to_engine(jrpriv->engine,
+ req);
+ else
+ ret = caam_jr_enqueue(jrdev, desc, skcipher_crypt_done, req);
+
+ if ((ret != -EINPROGRESS) && (ret != -EBUSY)) {
skcipher_unmap(jrdev, edesc, req);
kfree(edesc);
}
@@ -1888,6 +1802,16 @@ static int skcipher_decrypt(struct skcipher_request *req)
return ret;
}
+static int skcipher_encrypt(struct skcipher_request *req)
+{
+ return skcipher_crypt(req, true);
+}
+
+static int skcipher_decrypt(struct skcipher_request *req)
+{
+ return skcipher_crypt(req, false);
+}
+
static struct caam_skcipher_alg driver_algs[] = {
{
.skcipher = {
@@ -3391,6 +3315,8 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam,
{
dma_addr_t dma_addr;
struct caam_drv_private *priv;
+ const size_t sh_desc_enc_offset = offsetof(struct caam_ctx,
+ sh_desc_enc);
ctx->jrdev = caam_jr_alloc();
if (IS_ERR(ctx->jrdev)) {
@@ -3406,7 +3332,8 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam,
dma_addr = dma_map_single_attrs(ctx->jrdev, ctx->sh_desc_enc,
offsetof(struct caam_ctx,
- sh_desc_enc_dma),
+ sh_desc_enc_dma) -
+ sh_desc_enc_offset,
ctx->dir, DMA_ATTR_SKIP_CPU_SYNC);
if (dma_mapping_error(ctx->jrdev, dma_addr)) {
dev_err(ctx->jrdev, "unable to map key, shared descriptors\n");
@@ -3416,8 +3343,10 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam,
ctx->sh_desc_enc_dma = dma_addr;
ctx->sh_desc_dec_dma = dma_addr + offsetof(struct caam_ctx,
- sh_desc_dec);
- ctx->key_dma = dma_addr + offsetof(struct caam_ctx, key);
+ sh_desc_dec) -
+ sh_desc_enc_offset;
+ ctx->key_dma = dma_addr + offsetof(struct caam_ctx, key) -
+ sh_desc_enc_offset;
/* copy descriptor header template value */
ctx->cdata.algtype = OP_TYPE_CLASS1_ALG | caam->class1_alg_type;
@@ -3431,6 +3360,11 @@ static int caam_cra_init(struct crypto_skcipher *tfm)
struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
struct caam_skcipher_alg *caam_alg =
container_of(alg, typeof(*caam_alg), skcipher);
+ struct caam_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct caam_skcipher_req_ctx));
+
+ ctx->enginectx.op.do_one_request = skcipher_do_one_req;
return caam_init_common(crypto_skcipher_ctx(tfm), &caam_alg->caam,
false);
@@ -3443,13 +3377,18 @@ static int caam_aead_init(struct crypto_aead *tfm)
container_of(alg, struct caam_aead_alg, aead);
struct caam_ctx *ctx = crypto_aead_ctx(tfm);
+ crypto_aead_set_reqsize(tfm, sizeof(struct caam_aead_req_ctx));
+
+ ctx->enginectx.op.do_one_request = aead_do_one_req;
+
return caam_init_common(ctx, &caam_alg->caam, !caam_alg->caam.nodkp);
}
static void caam_exit_common(struct caam_ctx *ctx)
{
dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_enc_dma,
- offsetof(struct caam_ctx, sh_desc_enc_dma),
+ offsetof(struct caam_ctx, sh_desc_enc_dma) -
+ offsetof(struct caam_ctx, sh_desc_enc),
ctx->dir, DMA_ATTR_SKIP_CPU_SYNC);
caam_jr_free(ctx->jrdev);
}
diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c
index aa9ccca67045..d6c58184bb57 100644
--- a/drivers/crypto/caam/caamalg_desc.c
+++ b/drivers/crypto/caam/caamalg_desc.c
@@ -1379,6 +1379,9 @@ void cnstr_shdsc_skcipher_encap(u32 * const desc, struct alginfo *cdata,
const u32 ctx1_iv_off)
{
u32 *key_jump_cmd;
+ u32 options = cdata->algtype | OP_ALG_AS_INIT | OP_ALG_ENCRYPT;
+ bool is_chacha20 = ((cdata->algtype & OP_ALG_ALGSEL_MASK) ==
+ OP_ALG_ALGSEL_CHACHA20);
init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
/* Skip if already shared */
@@ -1417,14 +1420,15 @@ void cnstr_shdsc_skcipher_encap(u32 * const desc, struct alginfo *cdata,
LDST_OFFSET_SHIFT));
/* Load operation */
- append_operation(desc, cdata->algtype | OP_ALG_AS_INIT |
- OP_ALG_ENCRYPT);
+ if (is_chacha20)
+ options |= OP_ALG_AS_FINALIZE;
+ append_operation(desc, options);
/* Perform operation */
skcipher_append_src_dst(desc);
/* Store IV */
- if (ivsize)
+ if (!is_chacha20 && ivsize)
append_seq_store(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT |
LDST_CLASS_1_CCB | (ctx1_iv_off <<
LDST_OFFSET_SHIFT));
@@ -1451,6 +1455,8 @@ void cnstr_shdsc_skcipher_decap(u32 * const desc, struct alginfo *cdata,
const u32 ctx1_iv_off)
{
u32 *key_jump_cmd;
+ bool is_chacha20 = ((cdata->algtype & OP_ALG_ALGSEL_MASK) ==
+ OP_ALG_ALGSEL_CHACHA20);
init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
/* Skip if already shared */
@@ -1499,7 +1505,7 @@ void cnstr_shdsc_skcipher_decap(u32 * const desc, struct alginfo *cdata,
skcipher_append_src_dst(desc);
/* Store IV */
- if (ivsize)
+ if (!is_chacha20 && ivsize)
append_seq_store(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT |
LDST_CLASS_1_CCB | (ctx1_iv_off <<
LDST_OFFSET_SHIFT));
@@ -1518,7 +1524,13 @@ EXPORT_SYMBOL(cnstr_shdsc_skcipher_decap);
*/
void cnstr_shdsc_xts_skcipher_encap(u32 * const desc, struct alginfo *cdata)
{
- __be64 sector_size = cpu_to_be64(512);
+ /*
+ * Set sector size to a big value, practically disabling
+ * sector size segmentation in xts implementation. We cannot
+ * take full advantage of this HW feature with existing
+ * crypto API / dm-crypt SW architecture.
+ */
+ __be64 sector_size = cpu_to_be64(BIT(15));
u32 *key_jump_cmd;
init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
@@ -1571,7 +1583,13 @@ EXPORT_SYMBOL(cnstr_shdsc_xts_skcipher_encap);
*/
void cnstr_shdsc_xts_skcipher_decap(u32 * const desc, struct alginfo *cdata)
{
- __be64 sector_size = cpu_to_be64(512);
+ /*
+ * Set sector size to a big value, practically disabling
+ * sector size segmentation in xts implementation. We cannot
+ * take full advantage of this HW feature with existing
+ * crypto API / dm-crypt SW architecture.
+ */
+ __be64 sector_size = cpu_to_be64(BIT(15));
u32 *key_jump_cmd;
init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index 4a29e0ef9d63..27e36bdf6163 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -783,7 +783,7 @@ struct aead_edesc {
unsigned int assoclen;
dma_addr_t assoclen_dma;
struct caam_drv_req drv_req;
- struct qm_sg_entry sgt[0];
+ struct qm_sg_entry sgt[];
};
/*
@@ -803,7 +803,7 @@ struct skcipher_edesc {
int qm_sg_bytes;
dma_addr_t qm_sg_dma;
struct caam_drv_req drv_req;
- struct qm_sg_entry sgt[0];
+ struct qm_sg_entry sgt[];
};
static struct caam_drv_ctx *get_drv_ctx(struct caam_ctx *ctx,
diff --git a/drivers/crypto/caam/caamalg_qi2.h b/drivers/crypto/caam/caamalg_qi2.h
index 706736776b47..f29cb7bd7dd3 100644
--- a/drivers/crypto/caam/caamalg_qi2.h
+++ b/drivers/crypto/caam/caamalg_qi2.h
@@ -114,7 +114,7 @@ struct aead_edesc {
dma_addr_t qm_sg_dma;
unsigned int assoclen;
dma_addr_t assoclen_dma;
- struct dpaa2_sg_entry sgt[0];
+ struct dpaa2_sg_entry sgt[];
};
/*
@@ -132,7 +132,7 @@ struct skcipher_edesc {
dma_addr_t iv_dma;
int qm_sg_bytes;
dma_addr_t qm_sg_dma;
- struct dpaa2_sg_entry sgt[0];
+ struct dpaa2_sg_entry sgt[];
};
/*
@@ -146,7 +146,7 @@ struct ahash_edesc {
dma_addr_t qm_sg_dma;
int src_nents;
int qm_sg_bytes;
- struct dpaa2_sg_entry sgt[0];
+ struct dpaa2_sg_entry sgt[];
};
/**
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 8d9143407fc5..943bc0296267 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -65,6 +65,7 @@
#include "sg_sw_sec4.h"
#include "key_gen.h"
#include "caamhash_desc.h"
+#include <crypto/engine.h>
#define CAAM_CRA_PRIORITY 3000
@@ -86,6 +87,7 @@ static struct list_head hash_list;
/* ahash per-session context */
struct caam_hash_ctx {
+ struct crypto_engine_ctx enginectx;
u32 sh_desc_update[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
@@ -111,9 +113,12 @@ struct caam_hash_state {
int buflen;
int next_buflen;
u8 caam_ctx[MAX_CTX_LEN] ____cacheline_aligned;
- int (*update)(struct ahash_request *req);
+ int (*update)(struct ahash_request *req) ____cacheline_aligned;
int (*final)(struct ahash_request *req);
int (*finup)(struct ahash_request *req);
+ struct ahash_edesc *edesc;
+ void (*ahash_op_done)(struct device *jrdev, u32 *desc, u32 err,
+ void *context);
};
struct caam_export_state {
@@ -395,7 +400,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key,
init_completion(&result.completion);
ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result);
- if (!ret) {
+ if (ret == -EINPROGRESS) {
/* in progress */
wait_for_completion(&result.completion);
ret = result.err;
@@ -521,6 +526,7 @@ static int acmac_setkey(struct crypto_ahash *ahash, const u8 *key,
* @sec4_sg_dma: physical mapped address of h/w link table
* @src_nents: number of segments in input scatterlist
* @sec4_sg_bytes: length of dma mapped sec4_sg space
+ * @bklog: stored to determine if the request needs backlog
* @hw_desc: the h/w job descriptor followed by any referenced link tables
* @sec4_sg: h/w link table
*/
@@ -528,8 +534,9 @@ struct ahash_edesc {
dma_addr_t sec4_sg_dma;
int src_nents;
int sec4_sg_bytes;
+ bool bklog;
u32 hw_desc[DESC_JOB_IO_LEN_MAX / sizeof(u32)] ____cacheline_aligned;
- struct sec4_sg_entry sec4_sg[0];
+ struct sec4_sg_entry sec4_sg[];
};
static inline void ahash_unmap(struct device *dev,
@@ -565,10 +572,11 @@ static inline void ahash_unmap_ctx(struct device *dev,
ahash_unmap(dev, edesc, req, dst_len);
}
-static void ahash_done(struct device *jrdev, u32 *desc, u32 err,
- void *context)
+static inline void ahash_done_cpy(struct device *jrdev, u32 *desc, u32 err,
+ void *context, enum dma_data_direction dir)
{
struct ahash_request *req = context;
+ struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev);
struct ahash_edesc *edesc;
struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
int digestsize = crypto_ahash_digestsize(ahash);
@@ -578,11 +586,12 @@ static void ahash_done(struct device *jrdev, u32 *desc, u32 err,
dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
- edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
+ edesc = state->edesc;
+
if (err)
ecode = caam_jr_strstatus(jrdev, err);
- ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
+ ahash_unmap_ctx(jrdev, edesc, req, digestsize, dir);
memcpy(req->result, state->caam_ctx, digestsize);
kfree(edesc);
@@ -590,81 +599,33 @@ static void ahash_done(struct device *jrdev, u32 *desc, u32 err,
DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
ctx->ctx_len, 1);
- req->base.complete(&req->base, ecode);
+ /*
+ * If no backlog flag, the completion of the request is done
+ * by CAAM, not crypto engine.
+ */
+ if (!edesc->bklog)
+ req->base.complete(&req->base, ecode);
+ else
+ crypto_finalize_hash_request(jrp->engine, req, ecode);
}
-static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err,
- void *context)
+static void ahash_done(struct device *jrdev, u32 *desc, u32 err,
+ void *context)
{
- struct ahash_request *req = context;
- struct ahash_edesc *edesc;
- struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
- struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
- struct caam_hash_state *state = ahash_request_ctx(req);
- int digestsize = crypto_ahash_digestsize(ahash);
- int ecode = 0;
-
- dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-
- edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
- if (err)
- ecode = caam_jr_strstatus(jrdev, err);
-
- ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL);
- kfree(edesc);
-
- scatterwalk_map_and_copy(state->buf, req->src,
- req->nbytes - state->next_buflen,
- state->next_buflen, 0);
- state->buflen = state->next_buflen;
-
- print_hex_dump_debug("buf@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, state->buf,
- state->buflen, 1);
-
- print_hex_dump_debug("ctx@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
- ctx->ctx_len, 1);
- if (req->result)
- print_hex_dump_debug("result@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, req->result,
- digestsize, 1);
-
- req->base.complete(&req->base, ecode);
+ ahash_done_cpy(jrdev, desc, err, context, DMA_FROM_DEVICE);
}
static void ahash_done_ctx_src(struct device *jrdev, u32 *desc, u32 err,
void *context)
{
- struct ahash_request *req = context;
- struct ahash_edesc *edesc;
- struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
- int digestsize = crypto_ahash_digestsize(ahash);
- struct caam_hash_state *state = ahash_request_ctx(req);
- struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
- int ecode = 0;
-
- dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-
- edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
- if (err)
- ecode = caam_jr_strstatus(jrdev, err);
-
- ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL);
- memcpy(req->result, state->caam_ctx, digestsize);
- kfree(edesc);
-
- print_hex_dump_debug("ctx@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
- ctx->ctx_len, 1);
-
- req->base.complete(&req->base, ecode);
+ ahash_done_cpy(jrdev, desc, err, context, DMA_BIDIRECTIONAL);
}
-static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
- void *context)
+static inline void ahash_done_switch(struct device *jrdev, u32 *desc, u32 err,
+ void *context, enum dma_data_direction dir)
{
struct ahash_request *req = context;
+ struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev);
struct ahash_edesc *edesc;
struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
@@ -674,11 +635,11 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
- edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
+ edesc = state->edesc;
if (err)
ecode = caam_jr_strstatus(jrdev, err);
- ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_FROM_DEVICE);
+ ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, dir);
kfree(edesc);
scatterwalk_map_and_copy(state->buf, req->src,
@@ -698,18 +659,42 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
DUMP_PREFIX_ADDRESS, 16, 4, req->result,
digestsize, 1);
- req->base.complete(&req->base, ecode);
+ /*
+ * If no backlog flag, the completion of the request is done
+ * by CAAM, not crypto engine.
+ */
+ if (!edesc->bklog)
+ req->base.complete(&req->base, ecode);
+ else
+ crypto_finalize_hash_request(jrp->engine, req, ecode);
+
+}
+
+static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err,
+ void *context)
+{
+ ahash_done_switch(jrdev, desc, err, context, DMA_BIDIRECTIONAL);
+}
+
+static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
+ void *context)
+{
+ ahash_done_switch(jrdev, desc, err, context, DMA_FROM_DEVICE);
}
/*
* Allocate an enhanced descriptor, which contains the hardware descriptor
* and space for hardware scatter table containing sg_num entries.
*/
-static struct ahash_edesc *ahash_edesc_alloc(struct caam_hash_ctx *ctx,
+static struct ahash_edesc *ahash_edesc_alloc(struct ahash_request *req,
int sg_num, u32 *sh_desc,
- dma_addr_t sh_desc_dma,
- gfp_t flags)
+ dma_addr_t sh_desc_dma)
{
+ struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+ struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+ struct caam_hash_state *state = ahash_request_ctx(req);
+ gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+ GFP_KERNEL : GFP_ATOMIC;
struct ahash_edesc *edesc;
unsigned int sg_size = sg_num * sizeof(struct sec4_sg_entry);
@@ -719,6 +704,8 @@ static struct ahash_edesc *ahash_edesc_alloc(struct caam_hash_ctx *ctx,
return NULL;
}
+ state->edesc = edesc;
+
init_job_desc_shared(edesc->hw_desc, sh_desc_dma, desc_len(sh_desc),
HDR_SHARE_DEFER | HDR_REVERSE);
@@ -761,6 +748,62 @@ static int ahash_edesc_add_src(struct caam_hash_ctx *ctx,
return 0;
}
+static int ahash_do_one_req(struct crypto_engine *engine, void *areq)
+{
+ struct ahash_request *req = ahash_request_cast(areq);
+ struct caam_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
+ struct caam_hash_state *state = ahash_request_ctx(req);
+ struct device *jrdev = ctx->jrdev;
+ u32 *desc = state->edesc->hw_desc;
+ int ret;
+
+ state->edesc->bklog = true;
+
+ ret = caam_jr_enqueue(jrdev, desc, state->ahash_op_done, req);
+
+ if (ret != -EINPROGRESS) {
+ ahash_unmap(jrdev, state->edesc, req, 0);
+ kfree(state->edesc);
+ } else {
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int ahash_enqueue_req(struct device *jrdev,
+ void (*cbk)(struct device *jrdev, u32 *desc,
+ u32 err, void *context),
+ struct ahash_request *req,
+ int dst_len, enum dma_data_direction dir)
+{
+ struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
+ struct caam_hash_state *state = ahash_request_ctx(req);
+ struct ahash_edesc *edesc = state->edesc;
+ u32 *desc = edesc->hw_desc;
+ int ret;
+
+ state->ahash_op_done = cbk;
+
+ /*
+ * Only the backlog request are sent to crypto-engine since the others
+ * can be handled by CAAM, if free, especially since JR has up to 1024
+ * entries (more than the 10 entries from crypto-engine).
+ */
+ if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
+ ret = crypto_transfer_hash_request_to_engine(jrpriv->engine,
+ req);
+ else
+ ret = caam_jr_enqueue(jrdev, desc, cbk, req);
+
+ if ((ret != -EINPROGRESS) && (ret != -EBUSY)) {
+ ahash_unmap_ctx(jrdev, edesc, req, dst_len, dir);
+ kfree(edesc);
+ }
+
+ return ret;
+}
+
/* submit update job descriptor */
static int ahash_update_ctx(struct ahash_request *req)
{
@@ -768,8 +811,6 @@ static int ahash_update_ctx(struct ahash_request *req)
struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
struct caam_hash_state *state = ahash_request_ctx(req);
struct device *jrdev = ctx->jrdev;
- gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
- GFP_KERNEL : GFP_ATOMIC;
u8 *buf = state->buf;
int *buflen = &state->buflen;
int *next_buflen = &state->next_buflen;
@@ -823,8 +864,8 @@ static int ahash_update_ctx(struct ahash_request *req)
* allocate space for base edesc and hw desc commands,
* link tables
*/
- edesc = ahash_edesc_alloc(ctx, pad_nents, ctx->sh_desc_update,
- ctx->sh_desc_update_dma, flags);
+ edesc = ahash_edesc_alloc(req, pad_nents, ctx->sh_desc_update,
+ ctx->sh_desc_update_dma);
if (!edesc) {
dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
return -ENOMEM;
@@ -870,11 +911,8 @@ static int ahash_update_ctx(struct ahash_request *req)
DUMP_PREFIX_ADDRESS, 16, 4, desc,
desc_bytes(desc), 1);
- ret = caam_jr_enqueue(jrdev, desc, ahash_done_bi, req);
- if (ret)
- goto unmap_ctx;
-
- ret = -EINPROGRESS;
+ ret = ahash_enqueue_req(jrdev, ahash_done_bi, req,
+ ctx->ctx_len, DMA_BIDIRECTIONAL);
} else if (*next_buflen) {
scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
req->nbytes, 0);
@@ -898,8 +936,6 @@ static int ahash_final_ctx(struct ahash_request *req)
struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
struct caam_hash_state *state = ahash_request_ctx(req);
struct device *jrdev = ctx->jrdev;
- gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
- GFP_KERNEL : GFP_ATOMIC;
int buflen = state->buflen;
u32 *desc;
int sec4_sg_bytes;
@@ -911,8 +947,8 @@ static int ahash_final_ctx(struct ahash_request *req)
sizeof(struct sec4_sg_entry);
/* allocate space for base edesc and hw desc commands, link tables */
- edesc = ahash_edesc_alloc(ctx, 4, ctx->sh_desc_fin,
- ctx->sh_desc_fin_dma, flags);
+ edesc = ahash_edesc_alloc(req, 4, ctx->sh_desc_fin,
+ ctx->sh_desc_fin_dma);
if (!edesc)
return -ENOMEM;
@@ -947,11 +983,8 @@ static int ahash_final_ctx(struct ahash_request *req)
DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
1);
- ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req);
- if (ret)
- goto unmap_ctx;
-
- return -EINPROGRESS;
+ return ahash_enqueue_req(jrdev, ahash_done_ctx_src, req,
+ digestsize, DMA_BIDIRECTIONAL);
unmap_ctx:
ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL);
kfree(edesc);
@@ -964,8 +997,6 @@ static int ahash_finup_ctx(struct ahash_request *req)
struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
struct caam_hash_state *state = ahash_request_ctx(req);
struct device *jrdev = ctx->jrdev;
- gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
- GFP_KERNEL : GFP_ATOMIC;
int buflen = state->buflen;
u32 *desc;
int sec4_sg_src_index;
@@ -994,9 +1025,8 @@ static int ahash_finup_ctx(struct ahash_request *req)
sec4_sg_src_index = 1 + (buflen ? 1 : 0);
/* allocate space for base edesc and hw desc commands, link tables */
- edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents,
- ctx->sh_desc_fin, ctx->sh_desc_fin_dma,
- flags);
+ edesc = ahash_edesc_alloc(req, sec4_sg_src_index + mapped_nents,
+ ctx->sh_desc_fin, ctx->sh_desc_fin_dma);
if (!edesc) {
dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
return -ENOMEM;
@@ -1027,11 +1057,8 @@ static int ahash_finup_ctx(struct ahash_request *req)
DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
1);
- ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req);
- if (ret)
- goto unmap_ctx;
-
- return -EINPROGRESS;
+ return ahash_enqueue_req(jrdev, ahash_done_ctx_src, req,
+ digestsize, DMA_BIDIRECTIONAL);
unmap_ctx:
ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL);
kfree(edesc);
@@ -1044,8 +1071,6 @@ static int ahash_digest(struct ahash_request *req)
struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
struct caam_hash_state *state = ahash_request_ctx(req);
struct device *jrdev = ctx->jrdev;
- gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
- GFP_KERNEL : GFP_ATOMIC;
u32 *desc;
int digestsize = crypto_ahash_digestsize(ahash);
int src_nents, mapped_nents;
@@ -1072,9 +1097,8 @@ static int ahash_digest(struct ahash_request *req)
}
/* allocate space for base edesc and hw desc commands, link tables */
- edesc = ahash_edesc_alloc(ctx, mapped_nents > 1 ? mapped_nents : 0,
- ctx->sh_desc_digest, ctx->sh_desc_digest_dma,
- flags);
+ edesc = ahash_edesc_alloc(req, mapped_nents > 1 ? mapped_nents : 0,
+ ctx->sh_desc_digest, ctx->sh_desc_digest_dma);
if (!edesc) {
dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
return -ENOMEM;
@@ -1103,15 +1127,8 @@ static int ahash_digest(struct ahash_request *req)
DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
1);
- ret = caam_jr_enqueue(jrdev, desc, ahash_done, req);
- if (!ret) {
- ret = -EINPROGRESS;
- } else {
- ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
- kfree(edesc);
- }
-
- return ret;
+ return ahash_enqueue_req(jrdev, ahash_done, req, digestsize,
+ DMA_FROM_DEVICE);
}
/* submit ahash final if it the first job descriptor */
@@ -1121,8 +1138,6 @@ static int ahash_final_no_ctx(struct ahash_request *req)
struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
struct caam_hash_state *state = ahash_request_ctx(req);
struct device *jrdev = ctx->jrdev;
- gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
- GFP_KERNEL : GFP_ATOMIC;
u8 *buf = state->buf;
int buflen = state->buflen;
u32 *desc;
@@ -1131,8 +1146,8 @@ static int ahash_final_no_ctx(struct ahash_request *req)
int ret;
/* allocate space for base edesc and hw desc commands, link tables */
- edesc = ahash_edesc_alloc(ctx, 0, ctx->sh_desc_digest,
- ctx->sh_desc_digest_dma, flags);
+ edesc = ahash_edesc_alloc(req, 0, ctx->sh_desc_digest,
+ ctx->sh_desc_digest_dma);
if (!edesc)
return -ENOMEM;
@@ -1157,20 +1172,12 @@ static int ahash_final_no_ctx(struct ahash_request *req)
DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
1);
- ret = caam_jr_enqueue(jrdev, desc, ahash_done, req);
- if (!ret) {
- ret = -EINPROGRESS;
- } else {
- ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
- kfree(edesc);
- }
-
- return ret;
+ return ahash_enqueue_req(jrdev, ahash_done, req,
+ digestsize, DMA_FROM_DEVICE);
unmap:
ahash_unmap(jrdev, edesc, req, digestsize);
kfree(edesc);
return -ENOMEM;
-
}
/* submit ahash update if it the first job descriptor after update */
@@ -1180,8 +1187,6 @@ static int ahash_update_no_ctx(struct ahash_request *req)
struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
struct caam_hash_state *state = ahash_request_ctx(req);
struct device *jrdev = ctx->jrdev;
- gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
- GFP_KERNEL : GFP_ATOMIC;
u8 *buf = state->buf;
int *buflen = &state->buflen;
int *next_buflen = &state->next_buflen;
@@ -1234,10 +1239,9 @@ static int ahash_update_no_ctx(struct ahash_request *req)
* allocate space for base edesc and hw desc commands,
* link tables
*/
- edesc = ahash_edesc_alloc(ctx, pad_nents,
+ edesc = ahash_edesc_alloc(req, pad_nents,
ctx->sh_desc_update_first,
- ctx->sh_desc_update_first_dma,
- flags);
+ ctx->sh_desc_update_first_dma);
if (!edesc) {
dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
return -ENOMEM;
@@ -1273,11 +1277,10 @@ static int ahash_update_no_ctx(struct ahash_request *req)
DUMP_PREFIX_ADDRESS, 16, 4, desc,
desc_bytes(desc), 1);
- ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req);
- if (ret)
- goto unmap_ctx;
-
- ret = -EINPROGRESS;
+ ret = ahash_enqueue_req(jrdev, ahash_done_ctx_dst, req,
+ ctx->ctx_len, DMA_TO_DEVICE);
+ if ((ret != -EINPROGRESS) && (ret != -EBUSY))
+ return ret;
state->update = ahash_update_ctx;
state->finup = ahash_finup_ctx;
state->final = ahash_final_ctx;
@@ -1305,8 +1308,6 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
struct caam_hash_state *state = ahash_request_ctx(req);
struct device *jrdev = ctx->jrdev;
- gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
- GFP_KERNEL : GFP_ATOMIC;
int buflen = state->buflen;
u32 *desc;
int sec4_sg_bytes, sec4_sg_src_index, src_nents, mapped_nents;
@@ -1336,9 +1337,8 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
sizeof(struct sec4_sg_entry);
/* allocate space for base edesc and hw desc commands, link tables */
- edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents,
- ctx->sh_desc_digest, ctx->sh_desc_digest_dma,
- flags);
+ edesc = ahash_edesc_alloc(req, sec4_sg_src_index + mapped_nents,
+ ctx->sh_desc_digest, ctx->sh_desc_digest_dma);
if (!edesc) {
dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
return -ENOMEM;
@@ -1368,15 +1368,8 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
1);
- ret = caam_jr_enqueue(jrdev, desc, ahash_done, req);
- if (!ret) {
- ret = -EINPROGRESS;
- } else {
- ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
- kfree(edesc);
- }
-
- return ret;
+ return ahash_enqueue_req(jrdev, ahash_done, req,
+ digestsize, DMA_FROM_DEVICE);
unmap:
ahash_unmap(jrdev, edesc, req, digestsize);
kfree(edesc);
@@ -1391,8 +1384,6 @@ static int ahash_update_first(struct ahash_request *req)
struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
struct caam_hash_state *state = ahash_request_ctx(req);
struct device *jrdev = ctx->jrdev;
- gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
- GFP_KERNEL : GFP_ATOMIC;
u8 *buf = state->buf;
int *buflen = &state->buflen;
int *next_buflen = &state->next_buflen;
@@ -1440,11 +1431,10 @@ static int ahash_update_first(struct ahash_request *req)
* allocate space for base edesc and hw desc commands,
* link tables
*/
- edesc = ahash_edesc_alloc(ctx, mapped_nents > 1 ?
+ edesc = ahash_edesc_alloc(req, mapped_nents > 1 ?
mapped_nents : 0,
ctx->sh_desc_update_first,
- ctx->sh_desc_update_first_dma,
- flags);
+ ctx->sh_desc_update_first_dma);
if (!edesc) {
dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
return -ENOMEM;
@@ -1467,11 +1457,10 @@ static int ahash_update_first(struct ahash_request *req)
DUMP_PREFIX_ADDRESS, 16, 4, desc,
desc_bytes(desc), 1);
- ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req);
- if (ret)
- goto unmap_ctx;
-
- ret = -EINPROGRESS;
+ ret = ahash_enqueue_req(jrdev, ahash_done_ctx_dst, req,
+ ctx->ctx_len, DMA_TO_DEVICE);
+ if ((ret != -EINPROGRESS) && (ret != -EBUSY))
+ return ret;
state->update = ahash_update_ctx;
state->finup = ahash_finup_ctx;
state->final = ahash_final_ctx;
@@ -1774,6 +1763,8 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
HASH_MSG_LEN + SHA256_DIGEST_SIZE,
HASH_MSG_LEN + 64,
HASH_MSG_LEN + SHA512_DIGEST_SIZE };
+ const size_t sh_desc_update_offset = offsetof(struct caam_hash_ctx,
+ sh_desc_update);
dma_addr_t dma_addr;
struct caam_drv_private *priv;
@@ -1826,7 +1817,8 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
}
dma_addr = dma_map_single_attrs(ctx->jrdev, ctx->sh_desc_update,
- offsetof(struct caam_hash_ctx, key),
+ offsetof(struct caam_hash_ctx, key) -
+ sh_desc_update_offset,
ctx->dir, DMA_ATTR_SKIP_CPU_SYNC);
if (dma_mapping_error(ctx->jrdev, dma_addr)) {
dev_err(ctx->jrdev, "unable to map shared descriptors\n");
@@ -1844,11 +1836,16 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
ctx->sh_desc_update_dma = dma_addr;
ctx->sh_desc_update_first_dma = dma_addr +
offsetof(struct caam_hash_ctx,
- sh_desc_update_first);
+ sh_desc_update_first) -
+ sh_desc_update_offset;
ctx->sh_desc_fin_dma = dma_addr + offsetof(struct caam_hash_ctx,
- sh_desc_fin);
+ sh_desc_fin) -
+ sh_desc_update_offset;
ctx->sh_desc_digest_dma = dma_addr + offsetof(struct caam_hash_ctx,
- sh_desc_digest);
+ sh_desc_digest) -
+ sh_desc_update_offset;
+
+ ctx->enginectx.op.do_one_request = ahash_do_one_req;
crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
sizeof(struct caam_hash_state));
@@ -1865,7 +1862,8 @@ static void caam_hash_cra_exit(struct crypto_tfm *tfm)
struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm);
dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_update_dma,
- offsetof(struct caam_hash_ctx, key),
+ offsetof(struct caam_hash_ctx, key) -
+ offsetof(struct caam_hash_ctx, sh_desc_update),
ctx->dir, DMA_ATTR_SKIP_CPU_SYNC);
if (ctx->key_dir != DMA_NONE)
dma_unmap_single_attrs(ctx->jrdev, ctx->adata.key_dma,
diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index 6619c512ef1a..4fcae37a2e33 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -117,76 +117,69 @@ static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc,
static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context)
{
struct akcipher_request *req = context;
+ struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
+ struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
struct rsa_edesc *edesc;
int ecode = 0;
if (err)
ecode = caam_jr_strstatus(dev, err);
- edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
+ edesc = req_ctx->edesc;
rsa_pub_unmap(dev, edesc, req);
rsa_io_unmap(dev, edesc, req);
kfree(edesc);
- akcipher_request_complete(req, ecode);
-}
-
-static void rsa_priv_f1_done(struct device *dev, u32 *desc, u32 err,
- void *context)
-{
- struct akcipher_request *req = context;
- struct rsa_edesc *edesc;
- int ecode = 0;
-
- if (err)
- ecode = caam_jr_strstatus(dev, err);
-
- edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
-
- rsa_priv_f1_unmap(dev, edesc, req);
- rsa_io_unmap(dev, edesc, req);
- kfree(edesc);
-
- akcipher_request_complete(req, ecode);
-}
-
-static void rsa_priv_f2_done(struct device *dev, u32 *desc, u32 err,
- void *context)
-{
- struct akcipher_request *req = context;
- struct rsa_edesc *edesc;
- int ecode = 0;
-
- if (err)
- ecode = caam_jr_strstatus(dev, err);
-
- edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
-
- rsa_priv_f2_unmap(dev, edesc, req);
- rsa_io_unmap(dev, edesc, req);
- kfree(edesc);
-
- akcipher_request_complete(req, ecode);
+ /*
+ * If no backlog flag, the completion of the request is done
+ * by CAAM, not crypto engine.
+ */
+ if (!edesc->bklog)
+ akcipher_request_complete(req, ecode);
+ else
+ crypto_finalize_akcipher_request(jrp->engine, req, ecode);
}
-static void rsa_priv_f3_done(struct device *dev, u32 *desc, u32 err,
- void *context)
+static void rsa_priv_f_done(struct device *dev, u32 *desc, u32 err,
+ void *context)
{
struct akcipher_request *req = context;
+ struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+ struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
+ struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+ struct caam_rsa_key *key = &ctx->key;
+ struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
struct rsa_edesc *edesc;
int ecode = 0;
if (err)
ecode = caam_jr_strstatus(dev, err);
- edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
+ edesc = req_ctx->edesc;
+
+ switch (key->priv_form) {
+ case FORM1:
+ rsa_priv_f1_unmap(dev, edesc, req);
+ break;
+ case FORM2:
+ rsa_priv_f2_unmap(dev, edesc, req);
+ break;
+ case FORM3:
+ rsa_priv_f3_unmap(dev, edesc, req);
+ }
- rsa_priv_f3_unmap(dev, edesc, req);
rsa_io_unmap(dev, edesc, req);
kfree(edesc);
- akcipher_request_complete(req, ecode);
+ /*
+ * If no backlog flag, the completion of the request is done
+ * by CAAM, not crypto engine.
+ */
+ if (!edesc->bklog)
+ akcipher_request_complete(req, ecode);
+ else
+ crypto_finalize_akcipher_request(jrp->engine, req, ecode);
}
/**
@@ -334,6 +327,8 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
edesc->src_nents = src_nents;
edesc->dst_nents = dst_nents;
+ req_ctx->edesc = edesc;
+
if (!sec4_sg_bytes)
return edesc;
@@ -364,6 +359,33 @@ src_fail:
return ERR_PTR(-ENOMEM);
}
+static int akcipher_do_one_req(struct crypto_engine *engine, void *areq)
+{
+ struct akcipher_request *req = container_of(areq,
+ struct akcipher_request,
+ base);
+ struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+ struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
+ struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+ struct device *jrdev = ctx->dev;
+ u32 *desc = req_ctx->edesc->hw_desc;
+ int ret;
+
+ req_ctx->edesc->bklog = true;
+
+ ret = caam_jr_enqueue(jrdev, desc, req_ctx->akcipher_op_done, req);
+
+ if (ret != -EINPROGRESS) {
+ rsa_pub_unmap(jrdev, req_ctx->edesc, req);
+ rsa_io_unmap(jrdev, req_ctx->edesc, req);
+ kfree(req_ctx->edesc);
+ } else {
+ ret = 0;
+ }
+
+ return ret;
+}
+
static int set_rsa_pub_pdb(struct akcipher_request *req,
struct rsa_edesc *edesc)
{
@@ -627,6 +649,53 @@ unmap_p:
return -ENOMEM;
}
+static int akcipher_enqueue_req(struct device *jrdev,
+ void (*cbk)(struct device *jrdev, u32 *desc,
+ u32 err, void *context),
+ struct akcipher_request *req)
+{
+ struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
+ struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+ struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+ struct caam_rsa_key *key = &ctx->key;
+ struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
+ struct rsa_edesc *edesc = req_ctx->edesc;
+ u32 *desc = edesc->hw_desc;
+ int ret;
+
+ req_ctx->akcipher_op_done = cbk;
+ /*
+ * Only the backlog request are sent to crypto-engine since the others
+ * can be handled by CAAM, if free, especially since JR has up to 1024
+ * entries (more than the 10 entries from crypto-engine).
+ */
+ if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
+ ret = crypto_transfer_akcipher_request_to_engine(jrpriv->engine,
+ req);
+ else
+ ret = caam_jr_enqueue(jrdev, desc, cbk, req);
+
+ if ((ret != -EINPROGRESS) && (ret != -EBUSY)) {
+ switch (key->priv_form) {
+ case FORM1:
+ rsa_priv_f1_unmap(jrdev, edesc, req);
+ break;
+ case FORM2:
+ rsa_priv_f2_unmap(jrdev, edesc, req);
+ break;
+ case FORM3:
+ rsa_priv_f3_unmap(jrdev, edesc, req);
+ break;
+ default:
+ rsa_pub_unmap(jrdev, edesc, req);
+ }
+ rsa_io_unmap(jrdev, edesc, req);
+ kfree(edesc);
+ }
+
+ return ret;
+}
+
static int caam_rsa_enc(struct akcipher_request *req)
{
struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
@@ -658,11 +727,7 @@ static int caam_rsa_enc(struct akcipher_request *req)
/* Initialize Job Descriptor */
init_rsa_pub_desc(edesc->hw_desc, &edesc->pdb.pub);
- ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_pub_done, req);
- if (!ret)
- return -EINPROGRESS;
-
- rsa_pub_unmap(jrdev, edesc, req);
+ return akcipher_enqueue_req(jrdev, rsa_pub_done, req);
init_fail:
rsa_io_unmap(jrdev, edesc, req);
@@ -691,11 +756,7 @@ static int caam_rsa_dec_priv_f1(struct akcipher_request *req)
/* Initialize Job Descriptor */
init_rsa_priv_f1_desc(edesc->hw_desc, &edesc->pdb.priv_f1);
- ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f1_done, req);
- if (!ret)
- return -EINPROGRESS;
-
- rsa_priv_f1_unmap(jrdev, edesc, req);
+ return akcipher_enqueue_req(jrdev, rsa_priv_f_done, req);
init_fail:
rsa_io_unmap(jrdev, edesc, req);
@@ -724,11 +785,7 @@ static int caam_rsa_dec_priv_f2(struct akcipher_request *req)
/* Initialize Job Descriptor */
init_rsa_priv_f2_desc(edesc->hw_desc, &edesc->pdb.priv_f2);
- ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f2_done, req);
- if (!ret)
- return -EINPROGRESS;
-
- rsa_priv_f2_unmap(jrdev, edesc, req);
+ return akcipher_enqueue_req(jrdev, rsa_priv_f_done, req);
init_fail:
rsa_io_unmap(jrdev, edesc, req);
@@ -757,11 +814,7 @@ static int caam_rsa_dec_priv_f3(struct akcipher_request *req)
/* Initialize Job Descriptor */
init_rsa_priv_f3_desc(edesc->hw_desc, &edesc->pdb.priv_f3);
- ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f3_done, req);
- if (!ret)
- return -EINPROGRESS;
-
- rsa_priv_f3_unmap(jrdev, edesc, req);
+ return akcipher_enqueue_req(jrdev, rsa_priv_f_done, req);
init_fail:
rsa_io_unmap(jrdev, edesc, req);
@@ -1054,6 +1107,8 @@ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm)
return -ENOMEM;
}
+ ctx->enginectx.op.do_one_request = akcipher_do_one_req;
+
return 0;
}
diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h
index c68fb4c03ee6..cc889a525e2f 100644
--- a/drivers/crypto/caam/caampkc.h
+++ b/drivers/crypto/caam/caampkc.h
@@ -12,6 +12,7 @@
#define _PKC_DESC_H_
#include "compat.h"
#include "pdb.h"
+#include <crypto/engine.h>
/**
* caam_priv_key_form - CAAM RSA private key representation
@@ -87,11 +88,13 @@ struct caam_rsa_key {
/**
* caam_rsa_ctx - per session context.
+ * @enginectx : crypto engine context
* @key : RSA key in DMA zone
* @dev : device structure
* @padding_dma : dma address of padding, for adding it to the input
*/
struct caam_rsa_ctx {
+ struct crypto_engine_ctx enginectx;
struct caam_rsa_key key;
struct device *dev;
dma_addr_t padding_dma;
@@ -103,11 +106,16 @@ struct caam_rsa_ctx {
* @src : input scatterlist (stripped of leading zeros)
* @fixup_src : input scatterlist (that might be stripped of leading zeros)
* @fixup_src_len : length of the fixup_src input scatterlist
+ * @edesc : s/w-extended rsa descriptor
+ * @akcipher_op_done : callback used when operation is done
*/
struct caam_rsa_req_ctx {
struct scatterlist src[2];
struct scatterlist *fixup_src;
unsigned int fixup_src_len;
+ struct rsa_edesc *edesc;
+ void (*akcipher_op_done)(struct device *jrdev, u32 *desc, u32 err,
+ void *context);
};
/**
@@ -117,6 +125,7 @@ struct caam_rsa_req_ctx {
* @mapped_src_nents: number of segments in input h/w link table
* @mapped_dst_nents: number of segments in output h/w link table
* @sec4_sg_bytes : length of h/w link table
+ * @bklog : stored to determine if the request needs backlog
* @sec4_sg_dma : dma address of h/w link table
* @sec4_sg : pointer to h/w link table
* @pdb : specific RSA Protocol Data Block (PDB)
@@ -128,6 +137,7 @@ struct rsa_edesc {
int mapped_src_nents;
int mapped_dst_nents;
int sec4_sg_bytes;
+ bool bklog;
dma_addr_t sec4_sg_dma;
struct sec4_sg_entry *sec4_sg;
union {
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index e8baacaabe07..77d048dfe5d0 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -7,35 +7,12 @@
*
* Based on caamalg.c crypto API driver.
*
- * relationship between job descriptors to shared descriptors:
- *
- * --------------- --------------
- * | JobDesc #0 |-------------------->| ShareDesc |
- * | *(buffer 0) | |------------->| (generate) |
- * --------------- | | (move) |
- * | | (store) |
- * --------------- | --------------
- * | JobDesc #1 |------|
- * | *(buffer 1) |
- * ---------------
- *
- * A job desc looks like this:
- *
- * ---------------------
- * | Header |
- * | ShareDesc Pointer |
- * | SEQ_OUT_PTR |
- * | (output buffer) |
- * ---------------------
- *
- * The SharedDesc never changes, and each job descriptor points to one of two
- * buffers for each device, from which the data will be copied into the
- * requested destination
*/
#include <linux/hw_random.h>
#include <linux/completion.h>
#include <linux/atomic.h>
+#include <linux/kfifo.h>
#include "compat.h"
@@ -45,278 +22,205 @@
#include "jr.h"
#include "error.h"
+#define CAAM_RNG_MAX_FIFO_STORE_SIZE 16
+
/*
- * Maximum buffer size: maximum number of random, cache-aligned bytes that
- * will be generated and moved to seq out ptr (extlen not allowed)
+ * Length of used descriptors, see caam_init_desc()
*/
-#define RN_BUF_SIZE (0xffff / L1_CACHE_BYTES * \
- L1_CACHE_BYTES)
-
-/* length of descriptors */
-#define DESC_JOB_O_LEN (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ_MAX * 2)
-#define DESC_RNG_LEN (3 * CAAM_CMD_SZ)
-
-/* Buffer, its dma address and lock */
-struct buf_data {
- u8 buf[RN_BUF_SIZE] ____cacheline_aligned;
- dma_addr_t addr;
- struct completion filled;
- u32 hw_desc[DESC_JOB_O_LEN];
-#define BUF_NOT_EMPTY 0
-#define BUF_EMPTY 1
-#define BUF_PENDING 2 /* Empty, but with job pending --don't submit another */
- atomic_t empty;
-};
+#define CAAM_RNG_DESC_LEN (CAAM_CMD_SZ + \
+ CAAM_CMD_SZ + \
+ CAAM_CMD_SZ + CAAM_PTR_SZ_MAX)
/* rng per-device context */
struct caam_rng_ctx {
+ struct hwrng rng;
struct device *jrdev;
- dma_addr_t sh_desc_dma;
- u32 sh_desc[DESC_RNG_LEN];
- unsigned int cur_buf_idx;
- int current_buf;
- struct buf_data bufs[2];
+ struct device *ctrldev;
+ void *desc_async;
+ void *desc_sync;
+ struct work_struct worker;
+ struct kfifo fifo;
};
-static struct caam_rng_ctx *rng_ctx;
-
-/*
- * Variable used to avoid double free of resources in case
- * algorithm registration was unsuccessful
- */
-static bool init_done;
-
-static inline void rng_unmap_buf(struct device *jrdev, struct buf_data *bd)
-{
- if (bd->addr)
- dma_unmap_single(jrdev, bd->addr, RN_BUF_SIZE,
- DMA_FROM_DEVICE);
-}
+struct caam_rng_job_ctx {
+ struct completion *done;
+ int *err;
+};
-static inline void rng_unmap_ctx(struct caam_rng_ctx *ctx)
+static struct caam_rng_ctx *to_caam_rng_ctx(struct hwrng *r)
{
- struct device *jrdev = ctx->jrdev;
-
- if (ctx->sh_desc_dma)
- dma_unmap_single(jrdev, ctx->sh_desc_dma,
- desc_bytes(ctx->sh_desc), DMA_TO_DEVICE);
- rng_unmap_buf(jrdev, &ctx->bufs[0]);
- rng_unmap_buf(jrdev, &ctx->bufs[1]);
+ return (struct caam_rng_ctx *)r->priv;
}
-static void rng_done(struct device *jrdev, u32 *desc, u32 err, void *context)
+static void caam_rng_done(struct device *jrdev, u32 *desc, u32 err,
+ void *context)
{
- struct buf_data *bd;
-
- bd = container_of(desc, struct buf_data, hw_desc[0]);
+ struct caam_rng_job_ctx *jctx = context;
if (err)
- caam_jr_strstatus(jrdev, err);
+ *jctx->err = caam_jr_strstatus(jrdev, err);
- atomic_set(&bd->empty, BUF_NOT_EMPTY);
- complete(&bd->filled);
-
- /* Buffer refilled, invalidate cache */
- dma_sync_single_for_cpu(jrdev, bd->addr, RN_BUF_SIZE, DMA_FROM_DEVICE);
-
- print_hex_dump_debug("rng refreshed buf@: ", DUMP_PREFIX_ADDRESS, 16, 4,
- bd->buf, RN_BUF_SIZE, 1);
+ complete(jctx->done);
}
-static inline int submit_job(struct caam_rng_ctx *ctx, int to_current)
+static u32 *caam_init_desc(u32 *desc, dma_addr_t dst_dma)
{
- struct buf_data *bd = &ctx->bufs[!(to_current ^ ctx->current_buf)];
- struct device *jrdev = ctx->jrdev;
- u32 *desc = bd->hw_desc;
- int err;
-
- dev_dbg(jrdev, "submitting job %d\n", !(to_current ^ ctx->current_buf));
- init_completion(&bd->filled);
- err = caam_jr_enqueue(jrdev, desc, rng_done, ctx);
- if (err)
- complete(&bd->filled); /* don't wait on failed job*/
- else
- atomic_inc(&bd->empty); /* note if pending */
-
- return err;
+ init_job_desc(desc, 0); /* + 1 cmd_sz */
+ /* Generate random bytes: + 1 cmd_sz */
+ append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG |
+ OP_ALG_PR_ON);
+ /* Store bytes: + 1 cmd_sz + caam_ptr_sz */
+ append_fifo_store(desc, dst_dma,
+ CAAM_RNG_MAX_FIFO_STORE_SIZE, FIFOST_TYPE_RNGSTORE);
+
+ print_hex_dump_debug("rng job desc@: ", DUMP_PREFIX_ADDRESS,
+ 16, 4, desc, desc_bytes(desc), 1);
+
+ return desc;
}
-static int caam_read(struct hwrng *rng, void *data, size_t max, bool wait)
+static int caam_rng_read_one(struct device *jrdev,
+ void *dst, int len,
+ void *desc,
+ struct completion *done)
{
- struct caam_rng_ctx *ctx = rng_ctx;
- struct buf_data *bd = &ctx->bufs[ctx->current_buf];
- int next_buf_idx, copied_idx;
- int err;
-
- if (atomic_read(&bd->empty)) {
- /* try to submit job if there wasn't one */
- if (atomic_read(&bd->empty) == BUF_EMPTY) {
- err = submit_job(ctx, 1);
- /* if can't submit job, can't even wait */
- if (err)
- return 0;
- }
- /* no immediate data, so exit if not waiting */
- if (!wait)
- return 0;
-
- /* waiting for pending job */
- if (atomic_read(&bd->empty))
- wait_for_completion(&bd->filled);
+ dma_addr_t dst_dma;
+ int err, ret = 0;
+ struct caam_rng_job_ctx jctx = {
+ .done = done,
+ .err = &ret,
+ };
+
+ len = CAAM_RNG_MAX_FIFO_STORE_SIZE;
+
+ dst_dma = dma_map_single(jrdev, dst, len, DMA_FROM_DEVICE);
+ if (dma_mapping_error(jrdev, dst_dma)) {
+ dev_err(jrdev, "unable to map destination memory\n");
+ return -ENOMEM;
}
- next_buf_idx = ctx->cur_buf_idx + max;
- dev_dbg(ctx->jrdev, "%s: start reading at buffer %d, idx %d\n",
- __func__, ctx->current_buf, ctx->cur_buf_idx);
-
- /* if enough data in current buffer */
- if (next_buf_idx < RN_BUF_SIZE) {
- memcpy(data, bd->buf + ctx->cur_buf_idx, max);
- ctx->cur_buf_idx = next_buf_idx;
- return max;
+ init_completion(done);
+ err = caam_jr_enqueue(jrdev,
+ caam_init_desc(desc, dst_dma),
+ caam_rng_done, &jctx);
+ if (err == -EINPROGRESS) {
+ wait_for_completion(done);
+ err = 0;
}
- /* else, copy what's left... */
- copied_idx = RN_BUF_SIZE - ctx->cur_buf_idx;
- memcpy(data, bd->buf + ctx->cur_buf_idx, copied_idx);
- ctx->cur_buf_idx = 0;
- atomic_set(&bd->empty, BUF_EMPTY);
-
- /* ...refill... */
- submit_job(ctx, 1);
+ dma_unmap_single(jrdev, dst_dma, len, DMA_FROM_DEVICE);
- /* and use next buffer */
- ctx->current_buf = !ctx->current_buf;
- dev_dbg(ctx->jrdev, "switched to buffer %d\n", ctx->current_buf);
-
- /* since there already is some data read, don't wait */
- return copied_idx + caam_read(rng, data + copied_idx,
- max - copied_idx, false);
+ return err ?: (ret ?: len);
}
-static inline int rng_create_sh_desc(struct caam_rng_ctx *ctx)
+static void caam_rng_fill_async(struct caam_rng_ctx *ctx)
{
- struct device *jrdev = ctx->jrdev;
- u32 *desc = ctx->sh_desc;
-
- init_sh_desc(desc, HDR_SHARE_SERIAL);
-
- /* Generate random bytes */
- append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG);
-
- /* Store bytes */
- append_seq_fifo_store(desc, RN_BUF_SIZE, FIFOST_TYPE_RNGSTORE);
+ struct scatterlist sg[1];
+ struct completion done;
+ int len, nents;
+
+ sg_init_table(sg, ARRAY_SIZE(sg));
+ nents = kfifo_dma_in_prepare(&ctx->fifo, sg, ARRAY_SIZE(sg),
+ CAAM_RNG_MAX_FIFO_STORE_SIZE);
+ if (!nents)
+ return;
- ctx->sh_desc_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
- DMA_TO_DEVICE);
- if (dma_mapping_error(jrdev, ctx->sh_desc_dma)) {
- dev_err(jrdev, "unable to map shared descriptor\n");
- return -ENOMEM;
- }
+ len = caam_rng_read_one(ctx->jrdev, sg_virt(&sg[0]),
+ sg[0].length,
+ ctx->desc_async,
+ &done);
+ if (len < 0)
+ return;
- print_hex_dump_debug("rng shdesc@: ", DUMP_PREFIX_ADDRESS, 16, 4,
- desc, desc_bytes(desc), 1);
+ kfifo_dma_in_finish(&ctx->fifo, len);
+}
- return 0;
+static void caam_rng_worker(struct work_struct *work)
+{
+ struct caam_rng_ctx *ctx = container_of(work, struct caam_rng_ctx,
+ worker);
+ caam_rng_fill_async(ctx);
}
-static inline int rng_create_job_desc(struct caam_rng_ctx *ctx, int buf_id)
+static int caam_read(struct hwrng *rng, void *dst, size_t max, bool wait)
{
- struct device *jrdev = ctx->jrdev;
- struct buf_data *bd = &ctx->bufs[buf_id];
- u32 *desc = bd->hw_desc;
- int sh_len = desc_len(ctx->sh_desc);
+ struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng);
+ int out;
- init_job_desc_shared(desc, ctx->sh_desc_dma, sh_len, HDR_SHARE_DEFER |
- HDR_REVERSE);
+ if (wait) {
+ struct completion done;
- bd->addr = dma_map_single(jrdev, bd->buf, RN_BUF_SIZE, DMA_FROM_DEVICE);
- if (dma_mapping_error(jrdev, bd->addr)) {
- dev_err(jrdev, "unable to map dst\n");
- return -ENOMEM;
+ return caam_rng_read_one(ctx->jrdev, dst, max,
+ ctx->desc_sync, &done);
}
- append_seq_out_ptr_intlen(desc, bd->addr, RN_BUF_SIZE, 0);
-
- print_hex_dump_debug("rng job desc@: ", DUMP_PREFIX_ADDRESS, 16, 4,
- desc, desc_bytes(desc), 1);
+ out = kfifo_out(&ctx->fifo, dst, max);
+ if (kfifo_is_empty(&ctx->fifo))
+ schedule_work(&ctx->worker);
- return 0;
+ return out;
}
static void caam_cleanup(struct hwrng *rng)
{
- int i;
- struct buf_data *bd;
-
- for (i = 0; i < 2; i++) {
- bd = &rng_ctx->bufs[i];
- if (atomic_read(&bd->empty) == BUF_PENDING)
- wait_for_completion(&bd->filled);
- }
+ struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng);
- rng_unmap_ctx(rng_ctx);
+ flush_work(&ctx->worker);
+ caam_jr_free(ctx->jrdev);
+ kfifo_free(&ctx->fifo);
}
-static int caam_init_buf(struct caam_rng_ctx *ctx, int buf_id)
+static int caam_init(struct hwrng *rng)
{
- struct buf_data *bd = &ctx->bufs[buf_id];
+ struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng);
int err;
- err = rng_create_job_desc(ctx, buf_id);
- if (err)
- return err;
-
- atomic_set(&bd->empty, BUF_EMPTY);
- submit_job(ctx, buf_id == ctx->current_buf);
- wait_for_completion(&bd->filled);
+ ctx->desc_sync = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN,
+ GFP_DMA | GFP_KERNEL);
+ if (!ctx->desc_sync)
+ return -ENOMEM;
- return 0;
-}
+ ctx->desc_async = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN,
+ GFP_DMA | GFP_KERNEL);
+ if (!ctx->desc_async)
+ return -ENOMEM;
-static int caam_init_rng(struct caam_rng_ctx *ctx, struct device *jrdev)
-{
- int err;
+ if (kfifo_alloc(&ctx->fifo, CAAM_RNG_MAX_FIFO_STORE_SIZE,
+ GFP_DMA | GFP_KERNEL))
+ return -ENOMEM;
- ctx->jrdev = jrdev;
+ INIT_WORK(&ctx->worker, caam_rng_worker);
- err = rng_create_sh_desc(ctx);
- if (err)
+ ctx->jrdev = caam_jr_alloc();
+ err = PTR_ERR_OR_ZERO(ctx->jrdev);
+ if (err) {
+ kfifo_free(&ctx->fifo);
+ pr_err("Job Ring Device allocation for transform failed\n");
return err;
+ }
- ctx->current_buf = 0;
- ctx->cur_buf_idx = 0;
+ /*
+ * Fill async buffer to have early randomness data for
+ * hw_random
+ */
+ caam_rng_fill_async(ctx);
- err = caam_init_buf(ctx, 0);
- if (err)
- return err;
-
- return caam_init_buf(ctx, 1);
+ return 0;
}
-static struct hwrng caam_rng = {
- .name = "rng-caam",
- .cleanup = caam_cleanup,
- .read = caam_read,
-};
+int caam_rng_init(struct device *ctrldev);
-void caam_rng_exit(void)
+void caam_rng_exit(struct device *ctrldev)
{
- if (!init_done)
- return;
-
- caam_jr_free(rng_ctx->jrdev);
- hwrng_unregister(&caam_rng);
- kfree(rng_ctx);
+ devres_release_group(ctrldev, caam_rng_init);
}
int caam_rng_init(struct device *ctrldev)
{
- struct device *dev;
+ struct caam_rng_ctx *ctx;
u32 rng_inst;
struct caam_drv_private *priv = dev_get_drvdata(ctrldev);
- int err;
- init_done = false;
+ int ret;
/* Check for an instantiated RNG before registration */
if (priv->era < 10)
@@ -328,31 +232,30 @@ int caam_rng_init(struct device *ctrldev)
if (!rng_inst)
return 0;
- dev = caam_jr_alloc();
- if (IS_ERR(dev)) {
- pr_err("Job Ring Device allocation for transform failed\n");
- return PTR_ERR(dev);
- }
- rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA | GFP_KERNEL);
- if (!rng_ctx) {
- err = -ENOMEM;
- goto free_caam_alloc;
- }
- err = caam_init_rng(rng_ctx, dev);
- if (err)
- goto free_rng_ctx;
+ if (!devres_open_group(ctrldev, caam_rng_init, GFP_KERNEL))
+ return -ENOMEM;
- dev_info(dev, "registering rng-caam\n");
+ ctx = devm_kzalloc(ctrldev, sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
- err = hwrng_register(&caam_rng);
- if (!err) {
- init_done = true;
- return err;
+ ctx->ctrldev = ctrldev;
+
+ ctx->rng.name = "rng-caam";
+ ctx->rng.init = caam_init;
+ ctx->rng.cleanup = caam_cleanup;
+ ctx->rng.read = caam_read;
+ ctx->rng.priv = (unsigned long)ctx;
+ ctx->rng.quality = 1024;
+
+ dev_info(ctrldev, "registering rng-caam\n");
+
+ ret = devm_hwrng_register(ctrldev, &ctx->rng);
+ if (ret) {
+ caam_rng_exit(ctrldev);
+ return ret;
}
-free_rng_ctx:
- kfree(rng_ctx);
-free_caam_alloc:
- caam_jr_free(dev);
- return err;
+ devres_close_group(ctrldev, caam_rng_init);
+ return 0;
}
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 7139366da016..4fcdd262e581 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -10,6 +10,7 @@
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/sys_soc.h>
+#include <linux/fsl/mc.h>
#include "compat.h"
#include "regs.h"
@@ -36,7 +37,8 @@ static void build_instantiation_desc(u32 *desc, int handle, int do_sk)
init_job_desc(desc, 0);
op_flags = OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG |
- (handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INIT;
+ (handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INIT |
+ OP_ALG_PR_ON;
/* INIT RNG in non-test mode */
append_operation(desc, op_flags);
@@ -196,7 +198,7 @@ static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask)
u32 *desc, status;
int sh_idx, ret = 0;
- desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL);
+ desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL | GFP_DMA);
if (!desc)
return -ENOMEM;
@@ -273,17 +275,30 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask,
int ret = 0, sh_idx;
ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl;
- desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL);
+ desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL | GFP_DMA);
if (!desc)
return -ENOMEM;
for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) {
+ const u32 rdsta_if = RDSTA_IF0 << sh_idx;
+ const u32 rdsta_pr = RDSTA_PR0 << sh_idx;
+ const u32 rdsta_mask = rdsta_if | rdsta_pr;
/*
* If the corresponding bit is set, this state handle
* was initialized by somebody else, so it's left alone.
*/
- if ((1 << sh_idx) & state_handle_mask)
- continue;
+ if (rdsta_if & state_handle_mask) {
+ if (rdsta_pr & state_handle_mask)
+ continue;
+
+ dev_info(ctrldev,
+ "RNG4 SH%d was previously instantiated without prediction resistance. Tearing it down\n",
+ sh_idx);
+
+ ret = deinstantiate_rng(ctrldev, rdsta_if);
+ if (ret)
+ break;
+ }
/* Create the descriptor for instantiating RNG State Handle */
build_instantiation_desc(desc, sh_idx, gen_sk);
@@ -303,9 +318,9 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask,
if (ret)
break;
- rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK;
+ rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_MASK;
if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) ||
- !(rdsta_val & (1 << sh_idx))) {
+ (rdsta_val & rdsta_mask) != rdsta_mask) {
ret = -EAGAIN;
break;
}
@@ -341,8 +356,12 @@ static void kick_trng(struct platform_device *pdev, int ent_delay)
ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl;
r4tst = &ctrl->r4tst[0];
- /* put RNG4 into program mode */
- clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM);
+ /*
+ * Setting both RTMCTL:PRGM and RTMCTL:TRNG_ACC causes TRNG to
+ * properly invalidate the entropy in the entropy register and
+ * force re-generation.
+ */
+ clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM | RTMCTL_ACC);
/*
* Performance-wise, it does not make sense to
@@ -372,7 +391,8 @@ start_rng:
* select raw sampling in both entropy shifter
* and statistical checker; ; put RNG4 into run mode
*/
- clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, RTMCTL_SAMP_MODE_RAW_ES_SC);
+ clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM | RTMCTL_ACC,
+ RTMCTL_SAMP_MODE_RAW_ES_SC);
}
static int caam_get_era_from_hw(struct caam_ctrl __iomem *ctrl)
@@ -559,6 +579,26 @@ static void caam_remove_debugfs(void *root)
}
#endif
+#ifdef CONFIG_FSL_MC_BUS
+static bool check_version(struct fsl_mc_version *mc_version, u32 major,
+ u32 minor, u32 revision)
+{
+ if (mc_version->major > major)
+ return true;
+
+ if (mc_version->major == major) {
+ if (mc_version->minor > minor)
+ return true;
+
+ if (mc_version->minor == minor &&
+ mc_version->revision > revision)
+ return true;
+ }
+
+ return false;
+}
+#endif
+
/* Probe routine for CAAM top (controller) level */
static int caam_probe(struct platform_device *pdev)
{
@@ -577,6 +617,7 @@ static int caam_probe(struct platform_device *pdev)
u8 rng_vid;
int pg_size;
int BLOCK_OFFSET = 0;
+ bool pr_support = false;
ctrlpriv = devm_kzalloc(&pdev->dev, sizeof(*ctrlpriv), GFP_KERNEL);
if (!ctrlpriv)
@@ -662,6 +703,21 @@ static int caam_probe(struct platform_device *pdev)
/* Get the IRQ of the controller (for security violations only) */
ctrlpriv->secvio_irq = irq_of_parse_and_map(nprop, 0);
+ np = of_find_compatible_node(NULL, NULL, "fsl,qoriq-mc");
+ ctrlpriv->mc_en = !!np;
+ of_node_put(np);
+
+#ifdef CONFIG_FSL_MC_BUS
+ if (ctrlpriv->mc_en) {
+ struct fsl_mc_version *mc_version;
+
+ mc_version = fsl_mc_get_version();
+ if (mc_version)
+ pr_support = check_version(mc_version, 10, 20, 0);
+ else
+ return -EPROBE_DEFER;
+ }
+#endif
/*
* Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel,
@@ -669,10 +725,6 @@ static int caam_probe(struct platform_device *pdev)
* In case of SoCs with Management Complex, MC f/w performs
* the configuration.
*/
- np = of_find_compatible_node(NULL, NULL, "fsl,qoriq-mc");
- ctrlpriv->mc_en = !!np;
- of_node_put(np);
-
if (!ctrlpriv->mc_en)
clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK,
MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF |
@@ -779,7 +831,7 @@ static int caam_probe(struct platform_device *pdev)
* already instantiated, do RNG instantiation
* In case of SoCs with Management Complex, RNG is managed by MC f/w.
*/
- if (!ctrlpriv->mc_en && rng_vid >= 4) {
+ if (!(ctrlpriv->mc_en && pr_support) && rng_vid >= 4) {
ctrlpriv->rng4_sh_init =
rd_reg32(&ctrl->r4tst[0].rdsta);
/*
@@ -789,11 +841,11 @@ static int caam_probe(struct platform_device *pdev)
* to regenerate these keys before the next POR.
*/
gen_sk = ctrlpriv->rng4_sh_init & RDSTA_SKVN ? 0 : 1;
- ctrlpriv->rng4_sh_init &= RDSTA_IFMASK;
+ ctrlpriv->rng4_sh_init &= RDSTA_MASK;
do {
int inst_handles =
rd_reg32(&ctrl->r4tst[0].rdsta) &
- RDSTA_IFMASK;
+ RDSTA_MASK;
/*
* If either SH were instantiated by somebody else
* (e.g. u-boot) then it is assumed that the entropy
@@ -833,7 +885,7 @@ static int caam_probe(struct platform_device *pdev)
* Set handles init'ed by this module as the complement of the
* already initialized ones
*/
- ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_IFMASK;
+ ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_MASK;
/* Enable RDB bit so that RNG works faster */
clrsetbits_32(&ctrl->scfgr, 0, SCFGR_RDBENABLE);
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
index 4b6854bf896a..e796d3cb9be8 100644
--- a/drivers/crypto/caam/desc.h
+++ b/drivers/crypto/caam/desc.h
@@ -1254,6 +1254,8 @@
#define OP_ALG_ICV_OFF (0 << OP_ALG_ICV_SHIFT)
#define OP_ALG_ICV_ON (1 << OP_ALG_ICV_SHIFT)
+#define OP_ALG_PR_ON BIT(1)
+
#define OP_ALG_DIR_SHIFT 0
#define OP_ALG_DIR_MASK 1
#define OP_ALG_DECRYPT 0
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index c7c10c90464b..402d6a362e8c 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -11,6 +11,7 @@
#define INTERN_H
#include "ctrl.h"
+#include <crypto/engine.h>
/* Currently comes from Kconfig param as a ^2 (driver-required) */
#define JOBR_DEPTH (1 << CONFIG_CRYPTO_DEV_FSL_CAAM_RINGSIZE)
@@ -46,6 +47,7 @@ struct caam_drv_private_jr {
struct caam_job_ring __iomem *rregs; /* JobR's register space */
struct tasklet_struct irqtask;
int irq; /* One per queue */
+ bool hwrng;
/* Number of scatterlist crypt transforms active on the JobR */
atomic_t tfm_count ____cacheline_aligned;
@@ -60,6 +62,7 @@ struct caam_drv_private_jr {
int out_ring_read_index; /* Output index "tail" */
int tail; /* entinfo (s/w ring) tail index */
void *outring; /* Base of output ring, DMA-safe */
+ struct crypto_engine *engine;
};
/*
@@ -161,7 +164,7 @@ static inline void caam_pkc_exit(void)
#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API
int caam_rng_init(struct device *dev);
-void caam_rng_exit(void);
+void caam_rng_exit(struct device *dev);
#else
@@ -170,9 +173,7 @@ static inline int caam_rng_init(struct device *dev)
return 0;
}
-static inline void caam_rng_exit(void)
-{
-}
+static inline void caam_rng_exit(struct device *dev) {}
#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API */
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index fc97cde27059..4af22e7ceb4f 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -27,7 +27,8 @@ static struct jr_driver_data driver_data;
static DEFINE_MUTEX(algs_lock);
static unsigned int active_devs;
-static void register_algs(struct device *dev)
+static void register_algs(struct caam_drv_private_jr *jrpriv,
+ struct device *dev)
{
mutex_lock(&algs_lock);
@@ -37,7 +38,7 @@ static void register_algs(struct device *dev)
caam_algapi_init(dev);
caam_algapi_hash_init(dev);
caam_pkc_init(dev);
- caam_rng_init(dev);
+ jrpriv->hwrng = !caam_rng_init(dev);
caam_qi_algapi_init(dev);
algs_unlock:
@@ -53,7 +54,6 @@ static void unregister_algs(void)
caam_qi_algapi_exit();
- caam_rng_exit();
caam_pkc_exit();
caam_algapi_hash_exit();
caam_algapi_exit();
@@ -62,6 +62,15 @@ algs_unlock:
mutex_unlock(&algs_lock);
}
+static void caam_jr_crypto_engine_exit(void *data)
+{
+ struct device *jrdev = data;
+ struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
+
+ /* Free the resources of crypto-engine */
+ crypto_engine_exit(jrpriv->engine);
+}
+
static int caam_reset_hw_jr(struct device *dev)
{
struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
@@ -126,6 +135,9 @@ static int caam_jr_remove(struct platform_device *pdev)
jrdev = &pdev->dev;
jrpriv = dev_get_drvdata(jrdev);
+ if (jrpriv->hwrng)
+ caam_rng_exit(jrdev->parent);
+
/*
* Return EBUSY if job ring already allocated.
*/
@@ -324,8 +336,8 @@ void caam_jr_free(struct device *rdev)
EXPORT_SYMBOL(caam_jr_free);
/**
- * caam_jr_enqueue() - Enqueue a job descriptor head. Returns 0 if OK,
- * -EBUSY if the queue is full, -EIO if it cannot map the caller's
+ * caam_jr_enqueue() - Enqueue a job descriptor head. Returns -EINPROGRESS
+ * if OK, -ENOSPC if the queue is full, -EIO if it cannot map the caller's
* descriptor.
* @dev: device of the job ring to be used. This device should have
* been assigned prior by caam_jr_register().
@@ -377,7 +389,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
CIRC_SPACE(head, tail, JOBR_DEPTH) <= 0) {
spin_unlock_bh(&jrp->inplock);
dma_unmap_single(dev, desc_dma, desc_size, DMA_TO_DEVICE);
- return -EBUSY;
+ return -ENOSPC;
}
head_entry = &jrp->entinfo[head];
@@ -414,7 +426,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
spin_unlock_bh(&jrp->inplock);
- return 0;
+ return -EINPROGRESS;
}
EXPORT_SYMBOL(caam_jr_enqueue);
@@ -505,7 +517,7 @@ static int caam_jr_probe(struct platform_device *pdev)
int error;
jrdev = &pdev->dev;
- jrpriv = devm_kmalloc(jrdev, sizeof(*jrpriv), GFP_KERNEL);
+ jrpriv = devm_kzalloc(jrdev, sizeof(*jrpriv), GFP_KERNEL);
if (!jrpriv)
return -ENOMEM;
@@ -538,6 +550,25 @@ static int caam_jr_probe(struct platform_device *pdev)
return error;
}
+ /* Initialize crypto engine */
+ jrpriv->engine = crypto_engine_alloc_init(jrdev, false);
+ if (!jrpriv->engine) {
+ dev_err(jrdev, "Could not init crypto-engine\n");
+ return -ENOMEM;
+ }
+
+ error = devm_add_action_or_reset(jrdev, caam_jr_crypto_engine_exit,
+ jrdev);
+ if (error)
+ return error;
+
+ /* Start crypto engine */
+ error = crypto_engine_start(jrpriv->engine);
+ if (error) {
+ dev_err(jrdev, "Could not start crypto-engine\n");
+ return error;
+ }
+
/* Identify the interrupt */
jrpriv->irq = irq_of_parse_and_map(nprop, 0);
if (!jrpriv->irq) {
@@ -562,7 +593,7 @@ static int caam_jr_probe(struct platform_device *pdev)
atomic_set(&jrpriv->tfm_count, 0);
- register_algs(jrdev->parent);
+ register_algs(jrpriv, jrdev->parent);
return 0;
}
diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c
index 5a851ddc48fb..b0e8a4939b4f 100644
--- a/drivers/crypto/caam/key_gen.c
+++ b/drivers/crypto/caam/key_gen.c
@@ -108,7 +108,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out,
init_completion(&result.completion);
ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result);
- if (!ret) {
+ if (ret == -EINPROGRESS) {
/* in progress */
wait_for_completion(&result.completion);
ret = result.err;
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index dacf2fa4aa8e..b390b935db6d 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -4,7 +4,7 @@
* Queue Interface backend functionality
*
* Copyright 2013-2016 Freescale Semiconductor, Inc.
- * Copyright 2016-2017, 2019 NXP
+ * Copyright 2016-2017, 2019-2020 NXP
*/
#include <linux/cpumask.h>
@@ -124,8 +124,10 @@ int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req)
do {
ret = qman_enqueue(req->drv_ctx->req_fq, &fd);
- if (likely(!ret))
+ if (likely(!ret)) {
+ refcount_inc(&req->drv_ctx->refcnt);
return 0;
+ }
if (ret != -EBUSY)
break;
@@ -148,11 +150,6 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq,
fd = &msg->ern.fd;
- if (qm_fd_get_format(fd) != qm_fd_compound) {
- dev_err(qidev, "Non-compound FD from CAAM\n");
- return;
- }
-
drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd));
if (!drv_req) {
dev_err(qidev,
@@ -160,6 +157,13 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq,
return;
}
+ refcount_dec(&drv_req->drv_ctx->refcnt);
+
+ if (qm_fd_get_format(fd) != qm_fd_compound) {
+ dev_err(qidev, "Non-compound FD from CAAM\n");
+ return;
+ }
+
dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd),
sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL);
@@ -287,9 +291,10 @@ empty_fq:
return ret;
}
-static int empty_caam_fq(struct qman_fq *fq)
+static int empty_caam_fq(struct qman_fq *fq, struct caam_drv_ctx *drv_ctx)
{
int ret;
+ int retries = 10;
struct qm_mcr_queryfq_np np;
/* Wait till the older CAAM FQ get empty */
@@ -304,11 +309,18 @@ static int empty_caam_fq(struct qman_fq *fq)
msleep(20);
} while (1);
- /*
- * Give extra time for pending jobs from this FQ in holding tanks
- * to get processed
- */
- msleep(20);
+ /* Wait until pending jobs from this FQ are processed by CAAM */
+ do {
+ if (refcount_read(&drv_ctx->refcnt) == 1)
+ break;
+
+ msleep(20);
+ } while (--retries);
+
+ if (!retries)
+ dev_warn_once(drv_ctx->qidev, "%d frames from FQID %u still pending in CAAM\n",
+ refcount_read(&drv_ctx->refcnt), fq->fqid);
+
return 0;
}
@@ -340,7 +352,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc)
drv_ctx->req_fq = new_fq;
/* Empty and remove the older FQ */
- ret = empty_caam_fq(old_fq);
+ ret = empty_caam_fq(old_fq, drv_ctx);
if (ret) {
dev_err(qidev, "Old CAAM FQ empty failed: %d\n", ret);
@@ -453,6 +465,9 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev,
return ERR_PTR(-ENOMEM);
}
+ /* init reference counter used to track references to request FQ */
+ refcount_set(&drv_ctx->refcnt, 1);
+
drv_ctx->qidev = qidev;
return drv_ctx;
}
@@ -571,6 +586,16 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p,
return qman_cb_dqrr_stop;
fd = &dqrr->fd;
+
+ drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd));
+ if (unlikely(!drv_req)) {
+ dev_err(qidev,
+ "Can't find original request for caam response\n");
+ return qman_cb_dqrr_consume;
+ }
+
+ refcount_dec(&drv_req->drv_ctx->refcnt);
+
status = be32_to_cpu(fd->status);
if (unlikely(status)) {
u32 ssrc = status & JRSTA_SSRC_MASK;
@@ -588,13 +613,6 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p,
return qman_cb_dqrr_consume;
}
- drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd));
- if (unlikely(!drv_req)) {
- dev_err(qidev,
- "Can't find original request for caam response\n");
- return qman_cb_dqrr_consume;
- }
-
dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd),
sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL);
diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h
index 848958951f68..5894f16f8fe3 100644
--- a/drivers/crypto/caam/qi.h
+++ b/drivers/crypto/caam/qi.h
@@ -3,7 +3,7 @@
* Public definitions for the CAAM/QI (Queue Interface) backend.
*
* Copyright 2013-2016 Freescale Semiconductor, Inc.
- * Copyright 2016-2017 NXP
+ * Copyright 2016-2017, 2020 NXP
*/
#ifndef __QI_H__
@@ -52,6 +52,7 @@ enum optype {
* @context_a: shared descriptor dma address
* @req_fq: to-CAAM request frame queue
* @rsp_fq: from-CAAM response frame queue
+ * @refcnt: reference counter incremented for each frame enqueued in to-CAAM FQ
* @cpu: cpu on which to receive CAAM response
* @op_type: operation type
* @qidev: device pointer for CAAM/QI backend
@@ -62,6 +63,7 @@ struct caam_drv_ctx {
dma_addr_t context_a;
struct qman_fq *req_fq;
struct qman_fq *rsp_fq;
+ refcount_t refcnt;
int cpu;
enum optype op_type;
struct device *qidev;
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index 05127b70527d..0f810bc13b2b 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -487,7 +487,8 @@ struct rngtst {
/* RNG4 TRNG test registers */
struct rng4tst {
-#define RTMCTL_PRGM 0x00010000 /* 1 -> program mode, 0 -> run mode */
+#define RTMCTL_ACC BIT(5) /* TRNG access mode */
+#define RTMCTL_PRGM BIT(16) /* 1 -> program mode, 0 -> run mode */
#define RTMCTL_SAMP_MODE_VON_NEUMANN_ES_SC 0 /* use von Neumann data in
both entropy shifter and
statistical checker */
@@ -523,9 +524,11 @@ struct rng4tst {
u32 rsvd1[40];
#define RDSTA_SKVT 0x80000000
#define RDSTA_SKVN 0x40000000
+#define RDSTA_PR0 BIT(4)
+#define RDSTA_PR1 BIT(5)
#define RDSTA_IF0 0x00000001
#define RDSTA_IF1 0x00000002
-#define RDSTA_IFMASK (RDSTA_IF1 | RDSTA_IF0)
+#define RDSTA_MASK (RDSTA_PR1 | RDSTA_PR0 | RDSTA_IF1 | RDSTA_IF0)
u32 rdsta;
u32 rsvd2[15];
};
diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c
index c4632d84c9a1..e91be9b8b083 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_main.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_main.c
@@ -71,7 +71,7 @@ struct ucode {
char version[VERSION_LEN - 1];
__be32 code_size;
u8 raz[12];
- u64 code[0];
+ u64 code[];
};
/**
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index e95e7aa5dbf1..ae7b44599914 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -215,6 +215,9 @@ void psp_dev_destroy(struct sp_device *sp)
tee_dev_destroy(psp);
sp_free_psp_irq(sp, psp);
+
+ if (sp->clear_psp_master_device)
+ sp->clear_psp_master_device(sp);
}
void psp_set_sev_irq_handler(struct psp_device *psp, psp_irq_handler_t handler,
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index e467860f797d..896f190b9a50 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -283,11 +283,11 @@ static int sev_get_platform_state(int *state, int *error)
return rc;
}
-static int sev_ioctl_do_reset(struct sev_issue_cmd *argp)
+static int sev_ioctl_do_reset(struct sev_issue_cmd *argp, bool writable)
{
int state, rc;
- if (!capable(CAP_SYS_ADMIN))
+ if (!writable)
return -EPERM;
/*
@@ -331,12 +331,12 @@ static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
return ret;
}
-static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp)
+static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp, bool writable)
{
struct sev_device *sev = psp_master->sev_data;
int rc;
- if (!capable(CAP_SYS_ADMIN))
+ if (!writable)
return -EPERM;
if (sev->state == SEV_STATE_UNINIT) {
@@ -348,7 +348,7 @@ static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp)
return __sev_do_cmd_locked(cmd, NULL, &argp->error);
}
-static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp)
+static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable)
{
struct sev_device *sev = psp_master->sev_data;
struct sev_user_data_pek_csr input;
@@ -356,7 +356,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp)
void *blob = NULL;
int ret;
- if (!capable(CAP_SYS_ADMIN))
+ if (!writable)
return -EPERM;
if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
@@ -539,7 +539,7 @@ fw_err:
return ret;
}
-static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp)
+static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp, bool writable)
{
struct sev_device *sev = psp_master->sev_data;
struct sev_user_data_pek_cert_import input;
@@ -547,7 +547,7 @@ static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp)
void *pek_blob, *oca_blob;
int ret;
- if (!capable(CAP_SYS_ADMIN))
+ if (!writable)
return -EPERM;
if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
@@ -698,7 +698,7 @@ static int sev_ioctl_do_get_id(struct sev_issue_cmd *argp)
return ret;
}
-static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp)
+static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable)
{
struct sev_device *sev = psp_master->sev_data;
struct sev_user_data_pdh_cert_export input;
@@ -708,7 +708,7 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp)
/* If platform is not in INIT state then transition it to INIT. */
if (sev->state != SEV_STATE_INIT) {
- if (!capable(CAP_SYS_ADMIN))
+ if (!writable)
return -EPERM;
ret = __sev_platform_init_locked(&argp->error);
@@ -801,6 +801,7 @@ static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
void __user *argp = (void __user *)arg;
struct sev_issue_cmd input;
int ret = -EFAULT;
+ bool writable = file->f_mode & FMODE_WRITE;
if (!psp_master || !psp_master->sev_data)
return -ENODEV;
@@ -819,25 +820,25 @@ static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
switch (input.cmd) {
case SEV_FACTORY_RESET:
- ret = sev_ioctl_do_reset(&input);
+ ret = sev_ioctl_do_reset(&input, writable);
break;
case SEV_PLATFORM_STATUS:
ret = sev_ioctl_do_platform_status(&input);
break;
case SEV_PEK_GEN:
- ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PEK_GEN, &input);
+ ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PEK_GEN, &input, writable);
break;
case SEV_PDH_GEN:
- ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PDH_GEN, &input);
+ ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PDH_GEN, &input, writable);
break;
case SEV_PEK_CSR:
- ret = sev_ioctl_do_pek_csr(&input);
+ ret = sev_ioctl_do_pek_csr(&input, writable);
break;
case SEV_PEK_CERT_IMPORT:
- ret = sev_ioctl_do_pek_import(&input);
+ ret = sev_ioctl_do_pek_import(&input, writable);
break;
case SEV_PDH_CERT_EXPORT:
- ret = sev_ioctl_do_pdh_export(&input);
+ ret = sev_ioctl_do_pdh_export(&input, writable);
break;
case SEV_GET_ID:
pr_warn_once("SEV_GET_ID command is deprecated, use SEV_GET_ID2\n");
@@ -896,9 +897,9 @@ EXPORT_SYMBOL_GPL(sev_guest_df_flush);
static void sev_exit(struct kref *ref)
{
- struct sev_misc_dev *misc_dev = container_of(ref, struct sev_misc_dev, refcount);
-
misc_deregister(&misc_dev->misc);
+ kfree(misc_dev);
+ misc_dev = NULL;
}
static int sev_misc_init(struct sev_device *sev)
@@ -916,7 +917,7 @@ static int sev_misc_init(struct sev_device *sev)
if (!misc_dev) {
struct miscdevice *misc;
- misc_dev = devm_kzalloc(dev, sizeof(*misc_dev), GFP_KERNEL);
+ misc_dev = kzalloc(sizeof(*misc_dev), GFP_KERNEL);
if (!misc_dev)
return -ENOMEM;
diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h
index 423594608ad1..f913f1494af9 100644
--- a/drivers/crypto/ccp/sp-dev.h
+++ b/drivers/crypto/ccp/sp-dev.h
@@ -90,6 +90,7 @@ struct sp_device {
/* get and set master device */
struct sp_device*(*get_psp_master_device)(void);
void (*set_psp_master_device)(struct sp_device *);
+ void (*clear_psp_master_device)(struct sp_device *);
bool irq_registered;
bool use_tasklet;
diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
index 56c1f61c0f84..cb6cb47053f4 100644
--- a/drivers/crypto/ccp/sp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -146,6 +146,14 @@ static struct sp_device *psp_get_master(void)
return sp_dev_master;
}
+static void psp_clear_master(struct sp_device *sp)
+{
+ if (sp == sp_dev_master) {
+ sp_dev_master = NULL;
+ dev_dbg(sp->dev, "Cleared sp_dev_master\n");
+ }
+}
+
static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct sp_device *sp;
@@ -206,6 +214,7 @@ static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
pci_set_master(pdev);
sp->set_psp_master_device = psp_set_master;
sp->get_psp_master_device = psp_get_master;
+ sp->clear_psp_master_device = psp_clear_master;
ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
if (ret) {
diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c
index 2fc0e0da790b..1cf51edbc4b9 100644
--- a/drivers/crypto/ccree/cc_aead.c
+++ b/drivers/crypto/ccree/cc_aead.c
@@ -6,8 +6,9 @@
#include <crypto/algapi.h>
#include <crypto/internal/aead.h>
#include <crypto/authenc.h>
-#include <crypto/internal/des.h>
+#include <crypto/gcm.h>
#include <linux/rtnetlink.h>
+#include <crypto/internal/des.h>
#include "cc_driver.h"
#include "cc_buffer_mgr.h"
#include "cc_aead.h"
@@ -26,7 +27,7 @@
#define MAX_NONCE_SIZE CTR_RFC3686_NONCE_SIZE
struct cc_aead_handle {
- cc_sram_addr_t sram_workspace_addr;
+ u32 sram_workspace_addr;
struct list_head aead_list;
};
@@ -60,11 +61,6 @@ struct cc_aead_ctx {
enum drv_hash_mode auth_mode;
};
-static inline bool valid_assoclen(struct aead_request *req)
-{
- return ((req->assoclen == 16) || (req->assoclen == 20));
-}
-
static void cc_aead_exit(struct crypto_aead *tfm)
{
struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
@@ -417,7 +413,7 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey,
dma_addr_t key_dma_addr = 0;
struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
struct device *dev = drvdata_to_dev(ctx->drvdata);
- u32 larval_addr = cc_larval_digest_addr(ctx->drvdata, ctx->auth_mode);
+ u32 larval_addr;
struct cc_crypto_req cc_req = {};
unsigned int blocksize;
unsigned int digestsize;
@@ -448,8 +444,7 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey,
if (!key)
return -ENOMEM;
- key_dma_addr = dma_map_single(dev, (void *)key, keylen,
- DMA_TO_DEVICE);
+ key_dma_addr = dma_map_single(dev, key, keylen, DMA_TO_DEVICE);
if (dma_mapping_error(dev, key_dma_addr)) {
dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n",
key, keylen);
@@ -460,6 +455,8 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey,
/* Load hash initial state */
hw_desc_init(&desc[idx]);
set_cipher_mode(&desc[idx], hashmode);
+ larval_addr = cc_larval_digest_addr(ctx->drvdata,
+ ctx->auth_mode);
set_din_sram(&desc[idx], larval_addr, digestsize);
set_flow_mode(&desc[idx], S_DIN_to_HASH);
set_setup_mode(&desc[idx], SETUP_LOAD_STATE0);
@@ -796,7 +793,7 @@ static void cc_proc_authen_desc(struct aead_request *areq,
* assoc. + iv + data -compact in one table
* if assoclen is ZERO only IV perform
*/
- cc_sram_addr_t mlli_addr = areq_ctx->assoc.sram_addr;
+ u32 mlli_addr = areq_ctx->assoc.sram_addr;
u32 mlli_nents = areq_ctx->assoc.mlli_nents;
if (areq_ctx->is_single_pass) {
@@ -1170,7 +1167,7 @@ static void cc_mlli_to_sram(struct aead_request *req,
req_ctx->data_buff_type == CC_DMA_BUF_MLLI ||
!req_ctx->is_single_pass) && req_ctx->mlli_params.mlli_len) {
dev_dbg(dev, "Copy-to-sram: mlli_dma=%08x, mlli_size=%u\n",
- (unsigned int)ctx->drvdata->mlli_sram_addr,
+ ctx->drvdata->mlli_sram_addr,
req_ctx->mlli_params.mlli_len);
/* Copy MLLI table host-to-sram */
hw_desc_init(&desc[*seq_size]);
@@ -1222,7 +1219,7 @@ static void cc_hmac_authenc(struct aead_request *req, struct cc_hw_desc desc[],
req_ctx->is_single_pass);
if (req_ctx->is_single_pass) {
- /**
+ /*
* Single-pass flow
*/
cc_set_hmac_desc(req, desc, seq_size);
@@ -1234,7 +1231,7 @@ static void cc_hmac_authenc(struct aead_request *req, struct cc_hw_desc desc[],
return;
}
- /**
+ /*
* Double-pass flow
* Fallback for unsupported single-pass modes,
* i.e. using assoc. data of non-word-multiple
@@ -1275,7 +1272,7 @@ cc_xcbc_authenc(struct aead_request *req, struct cc_hw_desc desc[],
req_ctx->is_single_pass);
if (req_ctx->is_single_pass) {
- /**
+ /*
* Single-pass flow
*/
cc_set_xcbc_desc(req, desc, seq_size);
@@ -1286,7 +1283,7 @@ cc_xcbc_authenc(struct aead_request *req, struct cc_hw_desc desc[],
return;
}
- /**
+ /*
* Double-pass flow
* Fallback for unsupported single-pass modes,
* i.e. using assoc. data of non-word-multiple
@@ -1611,7 +1608,6 @@ static void cc_proc_rfc4309_ccm(struct aead_request *req)
memcpy(areq_ctx->ctr_iv + CCM_BLOCK_IV_OFFSET, req->iv,
CCM_BLOCK_IV_SIZE);
req->iv = areq_ctx->ctr_iv;
- areq_ctx->assoclen -= CCM_BLOCK_IV_SIZE;
}
static void cc_set_ghash_desc(struct aead_request *req,
@@ -1799,12 +1795,6 @@ static int cc_gcm(struct aead_request *req, struct cc_hw_desc desc[],
struct aead_req_ctx *req_ctx = aead_request_ctx(req);
unsigned int cipher_flow_mode;
- if (req_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT) {
- cipher_flow_mode = AES_and_HASH;
- } else { /* Encrypt */
- cipher_flow_mode = AES_to_HASH_and_DOUT;
- }
-
//in RFC4543 no data to encrypt. just copy data from src to dest.
if (req_ctx->plaintext_authenticate_only) {
cc_proc_cipher_desc(req, BYPASS, desc, seq_size);
@@ -1816,6 +1806,12 @@ static int cc_gcm(struct aead_request *req, struct cc_hw_desc desc[],
return 0;
}
+ if (req_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT) {
+ cipher_flow_mode = AES_and_HASH;
+ } else { /* Encrypt */
+ cipher_flow_mode = AES_to_HASH_and_DOUT;
+ }
+
// for gcm and rfc4106.
cc_set_ghash_desc(req, desc, seq_size);
/* process(ghash) assoc data */
@@ -1870,8 +1866,7 @@ static int config_gcm_context(struct aead_request *req)
*/
__be64 temp64;
- temp64 = cpu_to_be64((req_ctx->assoclen +
- GCM_BLOCK_RFC4_IV_SIZE + cryptlen) * 8);
+ temp64 = cpu_to_be64((req_ctx->assoclen + cryptlen) * 8);
memcpy(&req_ctx->gcm_len_block.len_a, &temp64, sizeof(temp64));
temp64 = 0;
memcpy(&req_ctx->gcm_len_block.len_c, &temp64, 8);
@@ -1891,7 +1886,6 @@ static void cc_proc_rfc4_gcm(struct aead_request *req)
memcpy(areq_ctx->ctr_iv + GCM_BLOCK_RFC4_IV_OFFSET, req->iv,
GCM_BLOCK_RFC4_IV_SIZE);
req->iv = areq_ctx->ctr_iv;
- areq_ctx->assoclen -= GCM_BLOCK_RFC4_IV_SIZE;
}
static int cc_proc_aead(struct aead_request *req,
@@ -1921,8 +1915,8 @@ static int cc_proc_aead(struct aead_request *req,
}
/* Setup request structure */
- cc_req.user_cb = (void *)cc_aead_complete;
- cc_req.user_arg = (void *)req;
+ cc_req.user_cb = cc_aead_complete;
+ cc_req.user_arg = req;
/* Setup request context */
areq_ctx->gen_ctx.op_type = direct;
@@ -1989,7 +1983,6 @@ static int cc_proc_aead(struct aead_request *req,
/* Load MLLI tables to SRAM if necessary */
cc_mlli_to_sram(req, desc, &seq_len);
- /*TODO: move seq len by reference */
switch (ctx->auth_mode) {
case DRV_HASH_SHA1:
case DRV_HASH_SHA256:
@@ -2034,9 +2027,6 @@ static int cc_aead_encrypt(struct aead_request *req)
/* No generated IV required */
areq_ctx->backup_iv = req->iv;
areq_ctx->assoclen = req->assoclen;
- areq_ctx->is_gcm4543 = false;
-
- areq_ctx->plaintext_authenticate_only = false;
rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT);
if (rc != -EINPROGRESS && rc != -EBUSY)
@@ -2050,22 +2040,17 @@ static int cc_rfc4309_ccm_encrypt(struct aead_request *req)
/* Very similar to cc_aead_encrypt() above. */
struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
- struct device *dev = drvdata_to_dev(ctx->drvdata);
- int rc = -EINVAL;
+ int rc;
- if (!valid_assoclen(req)) {
- dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
+ rc = crypto_ipsec_check_assoclen(req->assoclen);
+ if (rc)
goto out;
- }
memset(areq_ctx, 0, sizeof(*areq_ctx));
/* No generated IV required */
areq_ctx->backup_iv = req->iv;
- areq_ctx->assoclen = req->assoclen;
- areq_ctx->is_gcm4543 = true;
+ areq_ctx->assoclen = req->assoclen - CCM_BLOCK_IV_SIZE;
cc_proc_rfc4309_ccm(req);
@@ -2086,9 +2071,6 @@ static int cc_aead_decrypt(struct aead_request *req)
/* No generated IV required */
areq_ctx->backup_iv = req->iv;
areq_ctx->assoclen = req->assoclen;
- areq_ctx->is_gcm4543 = false;
-
- areq_ctx->plaintext_authenticate_only = false;
rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT);
if (rc != -EINPROGRESS && rc != -EBUSY)
@@ -2099,24 +2081,19 @@ static int cc_aead_decrypt(struct aead_request *req)
static int cc_rfc4309_ccm_decrypt(struct aead_request *req)
{
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
- struct device *dev = drvdata_to_dev(ctx->drvdata);
struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
- int rc = -EINVAL;
+ int rc;
- if (!valid_assoclen(req)) {
- dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
+ rc = crypto_ipsec_check_assoclen(req->assoclen);
+ if (rc)
goto out;
- }
memset(areq_ctx, 0, sizeof(*areq_ctx));
/* No generated IV required */
areq_ctx->backup_iv = req->iv;
- areq_ctx->assoclen = req->assoclen;
+ areq_ctx->assoclen = req->assoclen - CCM_BLOCK_IV_SIZE;
- areq_ctx->is_gcm4543 = true;
cc_proc_rfc4309_ccm(req);
rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT);
@@ -2216,28 +2193,20 @@ static int cc_rfc4543_gcm_setauthsize(struct crypto_aead *authenc,
static int cc_rfc4106_gcm_encrypt(struct aead_request *req)
{
- /* Very similar to cc_aead_encrypt() above. */
-
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
- struct device *dev = drvdata_to_dev(ctx->drvdata);
struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
- int rc = -EINVAL;
+ int rc;
- if (!valid_assoclen(req)) {
- dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
+ rc = crypto_ipsec_check_assoclen(req->assoclen);
+ if (rc)
goto out;
- }
memset(areq_ctx, 0, sizeof(*areq_ctx));
/* No generated IV required */
areq_ctx->backup_iv = req->iv;
- areq_ctx->assoclen = req->assoclen;
- areq_ctx->plaintext_authenticate_only = false;
+ areq_ctx->assoclen = req->assoclen - GCM_BLOCK_RFC4_IV_SIZE;
cc_proc_rfc4_gcm(req);
- areq_ctx->is_gcm4543 = true;
rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT);
if (rc != -EINPROGRESS && rc != -EBUSY)
@@ -2248,17 +2217,12 @@ out:
static int cc_rfc4543_gcm_encrypt(struct aead_request *req)
{
- /* Very similar to cc_aead_encrypt() above. */
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
- struct device *dev = drvdata_to_dev(ctx->drvdata);
struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
- int rc = -EINVAL;
+ int rc;
- if (!valid_assoclen(req)) {
- dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
+ rc = crypto_ipsec_check_assoclen(req->assoclen);
+ if (rc)
goto out;
- }
memset(areq_ctx, 0, sizeof(*areq_ctx));
@@ -2270,7 +2234,6 @@ static int cc_rfc4543_gcm_encrypt(struct aead_request *req)
areq_ctx->assoclen = req->assoclen;
cc_proc_rfc4_gcm(req);
- areq_ctx->is_gcm4543 = true;
rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT);
if (rc != -EINPROGRESS && rc != -EBUSY)
@@ -2281,28 +2244,20 @@ out:
static int cc_rfc4106_gcm_decrypt(struct aead_request *req)
{
- /* Very similar to cc_aead_decrypt() above. */
-
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
- struct device *dev = drvdata_to_dev(ctx->drvdata);
struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
- int rc = -EINVAL;
+ int rc;
- if (!valid_assoclen(req)) {
- dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
+ rc = crypto_ipsec_check_assoclen(req->assoclen);
+ if (rc)
goto out;
- }
memset(areq_ctx, 0, sizeof(*areq_ctx));
/* No generated IV required */
areq_ctx->backup_iv = req->iv;
- areq_ctx->assoclen = req->assoclen;
- areq_ctx->plaintext_authenticate_only = false;
+ areq_ctx->assoclen = req->assoclen - GCM_BLOCK_RFC4_IV_SIZE;
cc_proc_rfc4_gcm(req);
- areq_ctx->is_gcm4543 = true;
rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT);
if (rc != -EINPROGRESS && rc != -EBUSY)
@@ -2313,17 +2268,12 @@ out:
static int cc_rfc4543_gcm_decrypt(struct aead_request *req)
{
- /* Very similar to cc_aead_decrypt() above. */
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
- struct device *dev = drvdata_to_dev(ctx->drvdata);
struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
- int rc = -EINVAL;
+ int rc;
- if (!valid_assoclen(req)) {
- dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen);
+ rc = crypto_ipsec_check_assoclen(req->assoclen);
+ if (rc)
goto out;
- }
memset(areq_ctx, 0, sizeof(*areq_ctx));
@@ -2335,7 +2285,6 @@ static int cc_rfc4543_gcm_decrypt(struct aead_request *req)
areq_ctx->assoclen = req->assoclen;
cc_proc_rfc4_gcm(req);
- areq_ctx->is_gcm4543 = true;
rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT);
if (rc != -EINPROGRESS && rc != -EBUSY)
@@ -2614,7 +2563,7 @@ static struct cc_crypto_alg *cc_create_aead_alg(struct cc_alg_template *tmpl,
struct cc_crypto_alg *t_alg;
struct aead_alg *alg;
- t_alg = kzalloc(sizeof(*t_alg), GFP_KERNEL);
+ t_alg = devm_kzalloc(dev, sizeof(*t_alg), GFP_KERNEL);
if (!t_alg)
return ERR_PTR(-ENOMEM);
@@ -2628,6 +2577,7 @@ static struct cc_crypto_alg *cc_create_aead_alg(struct cc_alg_template *tmpl,
alg->base.cra_ctxsize = sizeof(struct cc_aead_ctx);
alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
+ alg->base.cra_blocksize = tmpl->blocksize;
alg->init = cc_aead_init;
alg->exit = cc_aead_exit;
@@ -2643,19 +2593,12 @@ static struct cc_crypto_alg *cc_create_aead_alg(struct cc_alg_template *tmpl,
int cc_aead_free(struct cc_drvdata *drvdata)
{
struct cc_crypto_alg *t_alg, *n;
- struct cc_aead_handle *aead_handle =
- (struct cc_aead_handle *)drvdata->aead_handle;
-
- if (aead_handle) {
- /* Remove registered algs */
- list_for_each_entry_safe(t_alg, n, &aead_handle->aead_list,
- entry) {
- crypto_unregister_aead(&t_alg->aead_alg);
- list_del(&t_alg->entry);
- kfree(t_alg);
- }
- kfree(aead_handle);
- drvdata->aead_handle = NULL;
+ struct cc_aead_handle *aead_handle = drvdata->aead_handle;
+
+ /* Remove registered algs */
+ list_for_each_entry_safe(t_alg, n, &aead_handle->aead_list, entry) {
+ crypto_unregister_aead(&t_alg->aead_alg);
+ list_del(&t_alg->entry);
}
return 0;
@@ -2669,7 +2612,7 @@ int cc_aead_alloc(struct cc_drvdata *drvdata)
int alg;
struct device *dev = drvdata_to_dev(drvdata);
- aead_handle = kmalloc(sizeof(*aead_handle), GFP_KERNEL);
+ aead_handle = devm_kmalloc(dev, sizeof(*aead_handle), GFP_KERNEL);
if (!aead_handle) {
rc = -ENOMEM;
goto fail0;
@@ -2682,7 +2625,6 @@ int cc_aead_alloc(struct cc_drvdata *drvdata)
MAX_HMAC_DIGEST_SIZE);
if (aead_handle->sram_workspace_addr == NULL_SRAM_ADDR) {
- dev_err(dev, "SRAM pool exhausted\n");
rc = -ENOMEM;
goto fail1;
}
@@ -2705,18 +2647,16 @@ int cc_aead_alloc(struct cc_drvdata *drvdata)
if (rc) {
dev_err(dev, "%s alg registration failed\n",
t_alg->aead_alg.base.cra_driver_name);
- goto fail2;
- } else {
- list_add_tail(&t_alg->entry, &aead_handle->aead_list);
- dev_dbg(dev, "Registered %s\n",
- t_alg->aead_alg.base.cra_driver_name);
+ goto fail1;
}
+
+ list_add_tail(&t_alg->entry, &aead_handle->aead_list);
+ dev_dbg(dev, "Registered %s\n",
+ t_alg->aead_alg.base.cra_driver_name);
}
return 0;
-fail2:
- kfree(t_alg);
fail1:
cc_aead_free(drvdata);
fail0:
diff --git a/drivers/crypto/ccree/cc_aead.h b/drivers/crypto/ccree/cc_aead.h
index f12169b57f9d..b69591550730 100644
--- a/drivers/crypto/ccree/cc_aead.h
+++ b/drivers/crypto/ccree/cc_aead.h
@@ -66,7 +66,7 @@ struct aead_req_ctx {
/* used to prevent cache coherence problem */
u8 backup_mac[MAX_MAC_SIZE];
u8 *backup_iv; /* store orig iv */
- u32 assoclen; /* internal assoclen */
+ u32 assoclen; /* size of AAD buffer to authenticate */
dma_addr_t mac_buf_dma_addr; /* internal ICV DMA buffer */
/* buffer for internal ccm configurations */
dma_addr_t ccm_iv0_dma_addr;
@@ -79,7 +79,6 @@ struct aead_req_ctx {
dma_addr_t gcm_iv_inc2_dma_addr;
dma_addr_t hkey_dma_addr; /* Phys. address of hkey */
dma_addr_t gcm_block_len_dma_addr; /* Phys. address of gcm block len */
- bool is_gcm4543;
u8 *icv_virt_addr; /* Virt. address of ICV */
struct async_gen_req_ctx gen_ctx;
diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c
index a72586eccd81..b2bd093e7013 100644
--- a/drivers/crypto/ccree/cc_buffer_mgr.c
+++ b/drivers/crypto/ccree/cc_buffer_mgr.c
@@ -13,16 +13,6 @@
#include "cc_hash.h"
#include "cc_aead.h"
-enum dma_buffer_type {
- DMA_NULL_TYPE = -1,
- DMA_SGL_TYPE = 1,
- DMA_BUFF_TYPE = 2,
-};
-
-struct buff_mgr_handle {
- struct dma_pool *mlli_buffs_pool;
-};
-
union buffer_array_entry {
struct scatterlist *sgl;
dma_addr_t buffer_dma;
@@ -34,7 +24,6 @@ struct buffer_array {
unsigned int offset[MAX_NUM_OF_BUFFERS_IN_MLLI];
int nents[MAX_NUM_OF_BUFFERS_IN_MLLI];
int total_data_len[MAX_NUM_OF_BUFFERS_IN_MLLI];
- enum dma_buffer_type type[MAX_NUM_OF_BUFFERS_IN_MLLI];
bool is_last[MAX_NUM_OF_BUFFERS_IN_MLLI];
u32 *mlli_nents[MAX_NUM_OF_BUFFERS_IN_MLLI];
};
@@ -64,11 +53,7 @@ static void cc_copy_mac(struct device *dev, struct aead_request *req,
enum cc_sg_cpy_direct dir)
{
struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- u32 skip = areq_ctx->assoclen + req->cryptlen;
-
- if (areq_ctx->is_gcm4543)
- skip += crypto_aead_ivsize(tfm);
+ u32 skip = req->assoclen + req->cryptlen;
cc_copy_sg_portion(dev, areq_ctx->backup_mac, req->src,
(skip - areq_ctx->req_authsize), skip, dir);
@@ -77,9 +62,13 @@ static void cc_copy_mac(struct device *dev, struct aead_request *req,
/**
* cc_get_sgl_nents() - Get scatterlist number of entries.
*
+ * @dev: Device object
* @sg_list: SG list
* @nbytes: [IN] Total SGL data bytes.
* @lbytes: [OUT] Returns the amount of bytes at the last entry
+ *
+ * Return:
+ * Number of entries in the scatterlist
*/
static unsigned int cc_get_sgl_nents(struct device *dev,
struct scatterlist *sg_list,
@@ -87,6 +76,8 @@ static unsigned int cc_get_sgl_nents(struct device *dev,
{
unsigned int nents = 0;
+ *lbytes = 0;
+
while (nbytes && sg_list) {
nents++;
/* get the number of bytes in the last entry */
@@ -95,6 +86,7 @@ static unsigned int cc_get_sgl_nents(struct device *dev,
nbytes : sg_list->length;
sg_list = sg_next(sg_list);
}
+
dev_dbg(dev, "nents %d last bytes %d\n", nents, *lbytes);
return nents;
}
@@ -103,11 +95,13 @@ static unsigned int cc_get_sgl_nents(struct device *dev,
* cc_copy_sg_portion() - Copy scatter list data,
* from to_skip to end, to dest and vice versa
*
- * @dest:
- * @sg:
- * @to_skip:
- * @end:
- * @direct:
+ * @dev: Device object
+ * @dest: Buffer to copy to/from
+ * @sg: SG list
+ * @to_skip: Number of bytes to skip before copying
+ * @end: Offset of last byte to copy
+ * @direct: Transfer direction (true == from SG list to buffer, false == from
+ * buffer to SG list)
*/
void cc_copy_sg_portion(struct device *dev, u8 *dest, struct scatterlist *sg,
u32 to_skip, u32 end, enum cc_sg_cpy_direct direct)
@@ -115,7 +109,7 @@ void cc_copy_sg_portion(struct device *dev, u8 *dest, struct scatterlist *sg,
u32 nents;
nents = sg_nents_for_len(sg, end);
- sg_copy_buffer(sg, nents, (void *)dest, (end - to_skip + 1), to_skip,
+ sg_copy_buffer(sg, nents, dest, (end - to_skip + 1), to_skip,
(direct == CC_SG_TO_BUF));
}
@@ -204,21 +198,15 @@ static int cc_generate_mlli(struct device *dev, struct buffer_array *sg_data,
goto build_mlli_exit;
}
/* Point to start of MLLI */
- mlli_p = (u32 *)mlli_params->mlli_virt_addr;
+ mlli_p = mlli_params->mlli_virt_addr;
/* go over all SG's and link it to one MLLI table */
for (i = 0; i < sg_data->num_of_buffers; i++) {
union buffer_array_entry *entry = &sg_data->entry[i];
u32 tot_len = sg_data->total_data_len[i];
u32 offset = sg_data->offset[i];
- if (sg_data->type[i] == DMA_SGL_TYPE)
- rc = cc_render_sg_to_mlli(dev, entry->sgl, tot_len,
- offset, &total_nents,
- &mlli_p);
- else /*DMA_BUFF_TYPE*/
- rc = cc_render_buff_to_mlli(dev, entry->buffer_dma,
- tot_len, &total_nents,
- &mlli_p);
+ rc = cc_render_sg_to_mlli(dev, entry->sgl, tot_len, offset,
+ &total_nents, &mlli_p);
if (rc)
return rc;
@@ -244,27 +232,6 @@ build_mlli_exit:
return rc;
}
-static void cc_add_buffer_entry(struct device *dev,
- struct buffer_array *sgl_data,
- dma_addr_t buffer_dma, unsigned int buffer_len,
- bool is_last_entry, u32 *mlli_nents)
-{
- unsigned int index = sgl_data->num_of_buffers;
-
- dev_dbg(dev, "index=%u single_buff=%pad buffer_len=0x%08X is_last=%d\n",
- index, &buffer_dma, buffer_len, is_last_entry);
- sgl_data->nents[index] = 1;
- sgl_data->entry[index].buffer_dma = buffer_dma;
- sgl_data->offset[index] = 0;
- sgl_data->total_data_len[index] = buffer_len;
- sgl_data->type[index] = DMA_BUFF_TYPE;
- sgl_data->is_last[index] = is_last_entry;
- sgl_data->mlli_nents[index] = mlli_nents;
- if (sgl_data->mlli_nents[index])
- *sgl_data->mlli_nents[index] = 0;
- sgl_data->num_of_buffers++;
-}
-
static void cc_add_sg_entry(struct device *dev, struct buffer_array *sgl_data,
unsigned int nents, struct scatterlist *sgl,
unsigned int data_len, unsigned int data_offset,
@@ -278,7 +245,6 @@ static void cc_add_sg_entry(struct device *dev, struct buffer_array *sgl_data,
sgl_data->entry[index].sgl = sgl;
sgl_data->offset[index] = data_offset;
sgl_data->total_data_len[index] = data_len;
- sgl_data->type[index] = DMA_SGL_TYPE;
sgl_data->is_last[index] = is_last_table;
sgl_data->mlli_nents[index] = mlli_nents;
if (sgl_data->mlli_nents[index])
@@ -290,37 +256,25 @@ static int cc_map_sg(struct device *dev, struct scatterlist *sg,
unsigned int nbytes, int direction, u32 *nents,
u32 max_sg_nents, u32 *lbytes, u32 *mapped_nents)
{
- if (sg_is_last(sg)) {
- /* One entry only case -set to DLLI */
- if (dma_map_sg(dev, sg, 1, direction) != 1) {
- dev_err(dev, "dma_map_sg() single buffer failed\n");
- return -ENOMEM;
- }
- dev_dbg(dev, "Mapped sg: dma_address=%pad page=%p addr=%pK offset=%u length=%u\n",
- &sg_dma_address(sg), sg_page(sg), sg_virt(sg),
- sg->offset, sg->length);
- *lbytes = nbytes;
- *nents = 1;
- *mapped_nents = 1;
- } else { /*sg_is_last*/
- *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes);
- if (*nents > max_sg_nents) {
- *nents = 0;
- dev_err(dev, "Too many fragments. current %d max %d\n",
- *nents, max_sg_nents);
- return -ENOMEM;
- }
- /* In case of mmu the number of mapped nents might
- * be changed from the original sgl nents
- */
- *mapped_nents = dma_map_sg(dev, sg, *nents, direction);
- if (*mapped_nents == 0) {
- *nents = 0;
- dev_err(dev, "dma_map_sg() sg buffer failed\n");
- return -ENOMEM;
- }
+ int ret = 0;
+
+ *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes);
+ if (*nents > max_sg_nents) {
+ *nents = 0;
+ dev_err(dev, "Too many fragments. current %d max %d\n",
+ *nents, max_sg_nents);
+ return -ENOMEM;
+ }
+
+ ret = dma_map_sg(dev, sg, *nents, direction);
+ if (dma_mapping_error(dev, ret)) {
+ *nents = 0;
+ dev_err(dev, "dma_map_sg() sg buffer failed %d\n", ret);
+ return -ENOMEM;
}
+ *mapped_nents = ret;
+
return 0;
}
@@ -411,7 +365,6 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx,
{
struct cipher_req_ctx *req_ctx = (struct cipher_req_ctx *)ctx;
struct mlli_params *mlli_params = &req_ctx->mlli_params;
- struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle;
struct device *dev = drvdata_to_dev(drvdata);
struct buffer_array sg_data;
u32 dummy = 0;
@@ -424,10 +377,9 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx,
/* Map IV buffer */
if (ivsize) {
- dump_byte_array("iv", (u8 *)info, ivsize);
+ dump_byte_array("iv", info, ivsize);
req_ctx->gen_ctx.iv_dma_addr =
- dma_map_single(dev, (void *)info,
- ivsize, DMA_BIDIRECTIONAL);
+ dma_map_single(dev, info, ivsize, DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, req_ctx->gen_ctx.iv_dma_addr)) {
dev_err(dev, "Mapping iv %u B at va=%pK for DMA failed\n",
ivsize, info);
@@ -476,7 +428,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx,
}
if (req_ctx->dma_buf_type == CC_DMA_BUF_MLLI) {
- mlli_params->curr_pool = buff_mgr->mlli_buffs_pool;
+ mlli_params->curr_pool = drvdata->mlli_buffs_pool;
rc = cc_generate_mlli(dev, &sg_data, mlli_params, flags);
if (rc)
goto cipher_exit;
@@ -555,11 +507,12 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req)
sg_virt(req->src), areq_ctx->src.nents, areq_ctx->assoc.nents,
areq_ctx->assoclen, req->cryptlen);
- dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_BIDIRECTIONAL);
+ dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents,
+ DMA_BIDIRECTIONAL);
if (req->src != req->dst) {
dev_dbg(dev, "Unmapping dst sgl: req->dst=%pK\n",
sg_virt(req->dst));
- dma_unmap_sg(dev, req->dst, sg_nents(req->dst),
+ dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents,
DMA_BIDIRECTIONAL);
}
if (drvdata->coherent &&
@@ -614,18 +567,6 @@ static int cc_aead_chain_iv(struct cc_drvdata *drvdata,
dev_dbg(dev, "Mapped iv %u B at va=%pK to dma=%pad\n",
hw_iv_size, req->iv, &areq_ctx->gen_ctx.iv_dma_addr);
- // TODO: what about CTR?? ask Ron
- if (do_chain && areq_ctx->plaintext_authenticate_only) {
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- unsigned int iv_size_to_authenc = crypto_aead_ivsize(tfm);
- unsigned int iv_ofs = GCM_BLOCK_RFC4_IV_OFFSET;
- /* Chain to given list */
- cc_add_buffer_entry(dev, sg_data,
- (areq_ctx->gen_ctx.iv_dma_addr + iv_ofs),
- iv_size_to_authenc, is_last,
- &areq_ctx->assoc.mlli_nents);
- areq_ctx->assoc_buff_type = CC_DMA_BUF_MLLI;
- }
chain_iv_exit:
return rc;
@@ -639,13 +580,8 @@ static int cc_aead_chain_assoc(struct cc_drvdata *drvdata,
struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
int rc = 0;
int mapped_nents = 0;
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- unsigned int size_of_assoc = areq_ctx->assoclen;
struct device *dev = drvdata_to_dev(drvdata);
- if (areq_ctx->is_gcm4543)
- size_of_assoc += crypto_aead_ivsize(tfm);
-
if (!sg_data) {
rc = -EINVAL;
goto chain_assoc_exit;
@@ -661,7 +597,7 @@ static int cc_aead_chain_assoc(struct cc_drvdata *drvdata,
goto chain_assoc_exit;
}
- mapped_nents = sg_nents_for_len(req->src, size_of_assoc);
+ mapped_nents = sg_nents_for_len(req->src, areq_ctx->assoclen);
if (mapped_nents < 0)
return mapped_nents;
@@ -854,16 +790,11 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata,
u32 src_mapped_nents = 0, dst_mapped_nents = 0;
u32 offset = 0;
/* non-inplace mode */
- unsigned int size_for_map = areq_ctx->assoclen + req->cryptlen;
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ unsigned int size_for_map = req->assoclen + req->cryptlen;
u32 sg_index = 0;
- bool is_gcm4543 = areq_ctx->is_gcm4543;
- u32 size_to_skip = areq_ctx->assoclen;
+ u32 size_to_skip = req->assoclen;
struct scatterlist *sgl;
- if (is_gcm4543)
- size_to_skip += crypto_aead_ivsize(tfm);
-
offset = size_to_skip;
if (!sg_data)
@@ -872,16 +803,13 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata,
areq_ctx->src_sgl = req->src;
areq_ctx->dst_sgl = req->dst;
- if (is_gcm4543)
- size_for_map += crypto_aead_ivsize(tfm);
-
size_for_map += (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ?
authsize : 0;
src_mapped_nents = cc_get_sgl_nents(dev, req->src, size_for_map,
&src_last_bytes);
sg_index = areq_ctx->src_sgl->length;
//check where the data starts
- while (sg_index <= size_to_skip) {
+ while (src_mapped_nents && (sg_index <= size_to_skip)) {
src_mapped_nents--;
offset -= areq_ctx->src_sgl->length;
sgl = sg_next(areq_ctx->src_sgl);
@@ -901,14 +829,15 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata,
areq_ctx->src_offset = offset;
if (req->src != req->dst) {
- size_for_map = areq_ctx->assoclen + req->cryptlen;
- size_for_map += (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ?
- authsize : 0;
- if (is_gcm4543)
- size_for_map += crypto_aead_ivsize(tfm);
+ size_for_map = req->assoclen + req->cryptlen;
+
+ if (direct == DRV_CRYPTO_DIRECTION_ENCRYPT)
+ size_for_map += authsize;
+ else
+ size_for_map -= authsize;
rc = cc_map_sg(dev, req->dst, size_for_map, DMA_BIDIRECTIONAL,
- &areq_ctx->dst.nents,
+ &areq_ctx->dst.mapped_nents,
LLI_MAX_NUM_OF_DATA_ENTRIES, &dst_last_bytes,
&dst_mapped_nents);
if (rc)
@@ -921,7 +850,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata,
offset = size_to_skip;
//check where the data starts
- while (sg_index <= size_to_skip) {
+ while (dst_mapped_nents && sg_index <= size_to_skip) {
dst_mapped_nents--;
offset -= areq_ctx->dst_sgl->length;
sgl = sg_next(areq_ctx->dst_sgl);
@@ -1012,14 +941,11 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req)
struct device *dev = drvdata_to_dev(drvdata);
struct buffer_array sg_data;
unsigned int authsize = areq_ctx->req_authsize;
- struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle;
int rc = 0;
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- bool is_gcm4543 = areq_ctx->is_gcm4543;
dma_addr_t dma_addr;
u32 mapped_nents = 0;
u32 dummy = 0; /*used for the assoc data fragments */
- u32 size_to_map = 0;
+ u32 size_to_map;
gfp_t flags = cc_gfp_flags(&req->base);
mlli_params->curr_pool = NULL;
@@ -1116,14 +1042,15 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req)
areq_ctx->gcm_iv_inc2_dma_addr = dma_addr;
}
- size_to_map = req->cryptlen + areq_ctx->assoclen;
- if (areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_ENCRYPT)
+ size_to_map = req->cryptlen + req->assoclen;
+ /* If we do in-place encryption, we also need the auth tag */
+ if ((areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_ENCRYPT) &&
+ (req->src == req->dst)) {
size_to_map += authsize;
+ }
- if (is_gcm4543)
- size_to_map += crypto_aead_ivsize(tfm);
rc = cc_map_sg(dev, req->src, size_to_map, DMA_BIDIRECTIONAL,
- &areq_ctx->src.nents,
+ &areq_ctx->src.mapped_nents,
(LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES +
LLI_MAX_NUM_OF_DATA_ENTRIES),
&dummy, &mapped_nents);
@@ -1183,7 +1110,7 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req)
*/
if (areq_ctx->assoc_buff_type == CC_DMA_BUF_MLLI ||
areq_ctx->data_buff_type == CC_DMA_BUF_MLLI) {
- mlli_params->curr_pool = buff_mgr->mlli_buffs_pool;
+ mlli_params->curr_pool = drvdata->mlli_buffs_pool;
rc = cc_generate_mlli(dev, &sg_data, mlli_params, flags);
if (rc)
goto aead_map_failure;
@@ -1211,7 +1138,6 @@ int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx,
u32 *curr_buff_cnt = cc_hash_buf_cnt(areq_ctx);
struct mlli_params *mlli_params = &areq_ctx->mlli_params;
struct buffer_array sg_data;
- struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle;
int rc = 0;
u32 dummy = 0;
u32 mapped_nents = 0;
@@ -1229,7 +1155,6 @@ int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx,
return 0;
}
- /*TODO: copy data in case that buffer is enough for operation */
/* map the previous buffer */
if (*curr_buff_cnt) {
rc = cc_set_hash_buf(dev, areq_ctx, curr_buff, *curr_buff_cnt,
@@ -1258,7 +1183,7 @@ int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx,
/*build mlli */
if (areq_ctx->data_dma_buf_type == CC_DMA_BUF_MLLI) {
- mlli_params->curr_pool = buff_mgr->mlli_buffs_pool;
+ mlli_params->curr_pool = drvdata->mlli_buffs_pool;
/* add the src data to the sg_data */
cc_add_sg_entry(dev, &sg_data, areq_ctx->in_nents, src, nbytes,
0, true, &areq_ctx->mlli_nents);
@@ -1296,7 +1221,6 @@ int cc_map_hash_request_update(struct cc_drvdata *drvdata, void *ctx,
unsigned int update_data_len;
u32 total_in_len = nbytes + *curr_buff_cnt;
struct buffer_array sg_data;
- struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle;
unsigned int swap_index = 0;
int rc = 0;
u32 dummy = 0;
@@ -1371,7 +1295,7 @@ int cc_map_hash_request_update(struct cc_drvdata *drvdata, void *ctx,
}
if (areq_ctx->data_dma_buf_type == CC_DMA_BUF_MLLI) {
- mlli_params->curr_pool = buff_mgr->mlli_buffs_pool;
+ mlli_params->curr_pool = drvdata->mlli_buffs_pool;
/* add the src data to the sg_data */
cc_add_sg_entry(dev, &sg_data, areq_ctx->in_nents, src,
(update_data_len - *curr_buff_cnt), 0, true,
@@ -1438,39 +1362,22 @@ void cc_unmap_hash_request(struct device *dev, void *ctx,
int cc_buffer_mgr_init(struct cc_drvdata *drvdata)
{
- struct buff_mgr_handle *buff_mgr_handle;
struct device *dev = drvdata_to_dev(drvdata);
- buff_mgr_handle = kmalloc(sizeof(*buff_mgr_handle), GFP_KERNEL);
- if (!buff_mgr_handle)
- return -ENOMEM;
-
- drvdata->buff_mgr_handle = buff_mgr_handle;
-
- buff_mgr_handle->mlli_buffs_pool =
+ drvdata->mlli_buffs_pool =
dma_pool_create("dx_single_mlli_tables", dev,
MAX_NUM_OF_TOTAL_MLLI_ENTRIES *
LLI_ENTRY_BYTE_SIZE,
MLLI_TABLE_MIN_ALIGNMENT, 0);
- if (!buff_mgr_handle->mlli_buffs_pool)
- goto error;
+ if (!drvdata->mlli_buffs_pool)
+ return -ENOMEM;
return 0;
-
-error:
- cc_buffer_mgr_fini(drvdata);
- return -ENOMEM;
}
int cc_buffer_mgr_fini(struct cc_drvdata *drvdata)
{
- struct buff_mgr_handle *buff_mgr_handle = drvdata->buff_mgr_handle;
-
- if (buff_mgr_handle) {
- dma_pool_destroy(buff_mgr_handle->mlli_buffs_pool);
- kfree(drvdata->buff_mgr_handle);
- drvdata->buff_mgr_handle = NULL;
- }
+ dma_pool_destroy(drvdata->mlli_buffs_pool);
return 0;
}
diff --git a/drivers/crypto/ccree/cc_buffer_mgr.h b/drivers/crypto/ccree/cc_buffer_mgr.h
index af434872c6ff..653441b6542e 100644
--- a/drivers/crypto/ccree/cc_buffer_mgr.h
+++ b/drivers/crypto/ccree/cc_buffer_mgr.h
@@ -24,14 +24,15 @@ enum cc_sg_cpy_direct {
};
struct cc_mlli {
- cc_sram_addr_t sram_addr;
+ u32 sram_addr;
+ unsigned int mapped_nents;
unsigned int nents; //sg nents
unsigned int mlli_nents; //mlli nents might be different than the above
};
struct mlli_params {
struct dma_pool *curr_pool;
- u8 *mlli_virt_addr;
+ void *mlli_virt_addr;
dma_addr_t mlli_dma_addr;
u32 mlli_len;
};
diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c
index 7d6252d892d7..a84335328f37 100644
--- a/drivers/crypto/ccree/cc_cipher.c
+++ b/drivers/crypto/ccree/cc_cipher.c
@@ -20,10 +20,6 @@
#define template_skcipher template_u.skcipher
-struct cc_cipher_handle {
- struct list_head alg_list;
-};
-
struct cc_user_key_info {
u8 *key;
dma_addr_t key_dma_addr;
@@ -184,7 +180,7 @@ static int cc_cipher_init(struct crypto_tfm *tfm)
ctx_p->user.key);
/* Map key buffer */
- ctx_p->user.key_dma_addr = dma_map_single(dev, (void *)ctx_p->user.key,
+ ctx_p->user.key_dma_addr = dma_map_single(dev, ctx_p->user.key,
max_key_buf_size,
DMA_TO_DEVICE);
if (dma_mapping_error(dev, ctx_p->user.key_dma_addr)) {
@@ -284,7 +280,7 @@ static int cc_cipher_sethkey(struct crypto_skcipher *sktfm, const u8 *key,
dev_dbg(dev, "Setting HW key in context @%p for %s. keylen=%u\n",
ctx_p, crypto_tfm_alg_name(tfm), keylen);
- dump_byte_array("key", (u8 *)key, keylen);
+ dump_byte_array("key", key, keylen);
/* STAT_PHASE_0: Init and sanity checks */
@@ -387,7 +383,7 @@ static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key,
dev_dbg(dev, "Setting key in context @%p for %s. keylen=%u\n",
ctx_p, crypto_tfm_alg_name(tfm), keylen);
- dump_byte_array("key", (u8 *)key, keylen);
+ dump_byte_array("key", key, keylen);
/* STAT_PHASE_0: Init and sanity checks */
@@ -533,14 +529,6 @@ static void cc_setup_state_desc(struct crypto_tfm *tfm,
int flow_mode = ctx_p->flow_mode;
int direction = req_ctx->gen_ctx.op_type;
dma_addr_t iv_dma_addr = req_ctx->gen_ctx.iv_dma_addr;
- unsigned int du_size = nbytes;
-
- struct cc_crypto_alg *cc_alg =
- container_of(tfm->__crt_alg, struct cc_crypto_alg,
- skcipher_alg.base);
-
- if (cc_alg->data_unit)
- du_size = cc_alg->data_unit;
switch (cipher_mode) {
case DRV_CIPHER_ECB:
@@ -753,7 +741,7 @@ static void cc_setup_mlli_desc(struct crypto_tfm *tfm,
dev_dbg(dev, " bypass params addr %pad length 0x%X addr 0x%08X\n",
&req_ctx->mlli_params.mlli_dma_addr,
req_ctx->mlli_params.mlli_len,
- (unsigned int)ctx_p->drvdata->mlli_sram_addr);
+ ctx_p->drvdata->mlli_sram_addr);
hw_desc_init(&desc[*seq_size]);
set_din_type(&desc[*seq_size], DMA_DLLI,
req_ctx->mlli_params.mlli_dma_addr,
@@ -801,16 +789,16 @@ static void cc_setup_flow_desc(struct crypto_tfm *tfm,
req_ctx->in_mlli_nents, NS_BIT);
if (req_ctx->out_nents == 0) {
dev_dbg(dev, " din/dout params addr 0x%08X addr 0x%08X\n",
- (unsigned int)ctx_p->drvdata->mlli_sram_addr,
- (unsigned int)ctx_p->drvdata->mlli_sram_addr);
+ ctx_p->drvdata->mlli_sram_addr,
+ ctx_p->drvdata->mlli_sram_addr);
set_dout_mlli(&desc[*seq_size],
ctx_p->drvdata->mlli_sram_addr,
req_ctx->in_mlli_nents, NS_BIT,
(!last_desc ? 0 : 1));
} else {
dev_dbg(dev, " din/dout params addr 0x%08X addr 0x%08X\n",
- (unsigned int)ctx_p->drvdata->mlli_sram_addr,
- (unsigned int)ctx_p->drvdata->mlli_sram_addr +
+ ctx_p->drvdata->mlli_sram_addr,
+ ctx_p->drvdata->mlli_sram_addr +
(u32)LLI_ENTRY_BYTE_SIZE * req_ctx->in_nents);
set_dout_mlli(&desc[*seq_size],
(ctx_p->drvdata->mlli_sram_addr +
@@ -871,7 +859,6 @@ static int cc_cipher_process(struct skcipher_request *req,
/* STAT_PHASE_0: Init and sanity checks */
- /* TODO: check data length according to mode */
if (validate_data_size(ctx_p, nbytes)) {
dev_dbg(dev, "Unsupported data size %d.\n", nbytes);
rc = -EINVAL;
@@ -893,8 +880,8 @@ static int cc_cipher_process(struct skcipher_request *req,
}
/* Setup request structure */
- cc_req.user_cb = (void *)cc_cipher_complete;
- cc_req.user_arg = (void *)req;
+ cc_req.user_cb = cc_cipher_complete;
+ cc_req.user_arg = req;
/* Setup CPP operation details */
if (ctx_p->key_type == CC_POLICY_PROTECTED_KEY) {
@@ -1228,6 +1215,10 @@ static const struct cc_alg_template skcipher_algs[] = {
.sec_func = true,
},
{
+ /* See https://www.mail-archive.com/linux-crypto@vger.kernel.org/msg40576.html
+ * for the reason why this differs from the generic
+ * implementation.
+ */
.name = "xts(aes)",
.driver_name = "xts-aes-ccree",
.blocksize = 1,
@@ -1423,7 +1414,7 @@ static const struct cc_alg_template skcipher_algs[] = {
{
.name = "ofb(aes)",
.driver_name = "ofb-aes-ccree",
- .blocksize = AES_BLOCK_SIZE,
+ .blocksize = 1,
.template_skcipher = {
.setkey = cc_cipher_setkey,
.encrypt = cc_cipher_encrypt,
@@ -1576,7 +1567,7 @@ static const struct cc_alg_template skcipher_algs[] = {
{
.name = "ctr(sm4)",
.driver_name = "ctr-sm4-ccree",
- .blocksize = SM4_BLOCK_SIZE,
+ .blocksize = 1,
.template_skcipher = {
.setkey = cc_cipher_setkey,
.encrypt = cc_cipher_encrypt,
@@ -1634,7 +1625,7 @@ static struct cc_crypto_alg *cc_create_alg(const struct cc_alg_template *tmpl,
struct cc_crypto_alg *t_alg;
struct skcipher_alg *alg;
- t_alg = kzalloc(sizeof(*t_alg), GFP_KERNEL);
+ t_alg = devm_kzalloc(dev, sizeof(*t_alg), GFP_KERNEL);
if (!t_alg)
return ERR_PTR(-ENOMEM);
@@ -1665,36 +1656,23 @@ static struct cc_crypto_alg *cc_create_alg(const struct cc_alg_template *tmpl,
int cc_cipher_free(struct cc_drvdata *drvdata)
{
struct cc_crypto_alg *t_alg, *n;
- struct cc_cipher_handle *cipher_handle = drvdata->cipher_handle;
-
- if (cipher_handle) {
- /* Remove registered algs */
- list_for_each_entry_safe(t_alg, n, &cipher_handle->alg_list,
- entry) {
- crypto_unregister_skcipher(&t_alg->skcipher_alg);
- list_del(&t_alg->entry);
- kfree(t_alg);
- }
- kfree(cipher_handle);
- drvdata->cipher_handle = NULL;
+
+ /* Remove registered algs */
+ list_for_each_entry_safe(t_alg, n, &drvdata->alg_list, entry) {
+ crypto_unregister_skcipher(&t_alg->skcipher_alg);
+ list_del(&t_alg->entry);
}
return 0;
}
int cc_cipher_alloc(struct cc_drvdata *drvdata)
{
- struct cc_cipher_handle *cipher_handle;
struct cc_crypto_alg *t_alg;
struct device *dev = drvdata_to_dev(drvdata);
int rc = -ENOMEM;
int alg;
- cipher_handle = kmalloc(sizeof(*cipher_handle), GFP_KERNEL);
- if (!cipher_handle)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&cipher_handle->alg_list);
- drvdata->cipher_handle = cipher_handle;
+ INIT_LIST_HEAD(&drvdata->alg_list);
/* Linux crypto */
dev_dbg(dev, "Number of algorithms = %zu\n",
@@ -1723,14 +1701,12 @@ int cc_cipher_alloc(struct cc_drvdata *drvdata)
if (rc) {
dev_err(dev, "%s alg registration failed\n",
t_alg->skcipher_alg.base.cra_driver_name);
- kfree(t_alg);
goto fail0;
- } else {
- list_add_tail(&t_alg->entry,
- &cipher_handle->alg_list);
- dev_dbg(dev, "Registered %s\n",
- t_alg->skcipher_alg.base.cra_driver_name);
}
+
+ list_add_tail(&t_alg->entry, &drvdata->alg_list);
+ dev_dbg(dev, "Registered %s\n",
+ t_alg->skcipher_alg.base.cra_driver_name);
}
return 0;
diff --git a/drivers/crypto/ccree/cc_debugfs.c b/drivers/crypto/ccree/cc_debugfs.c
index 566999738698..c454afce7781 100644
--- a/drivers/crypto/ccree/cc_debugfs.c
+++ b/drivers/crypto/ccree/cc_debugfs.c
@@ -8,10 +8,6 @@
#include "cc_crypto_ctx.h"
#include "cc_debugfs.h"
-struct cc_debugfs_ctx {
- struct dentry *dir;
-};
-
#define CC_DEBUG_REG(_X) { \
.name = __stringify(_X),\
.offset = CC_REG(_X) \
@@ -67,13 +63,8 @@ void __exit cc_debugfs_global_fini(void)
int cc_debugfs_init(struct cc_drvdata *drvdata)
{
struct device *dev = drvdata_to_dev(drvdata);
- struct cc_debugfs_ctx *ctx;
struct debugfs_regset32 *regset, *verset;
- ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
-
regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL);
if (!regset)
return -ENOMEM;
@@ -81,16 +72,18 @@ int cc_debugfs_init(struct cc_drvdata *drvdata)
regset->regs = debug_regs;
regset->nregs = ARRAY_SIZE(debug_regs);
regset->base = drvdata->cc_base;
+ regset->dev = dev;
- ctx->dir = debugfs_create_dir(drvdata->plat_dev->name, cc_debugfs_dir);
+ drvdata->dir = debugfs_create_dir(drvdata->plat_dev->name,
+ cc_debugfs_dir);
- debugfs_create_regset32("regs", 0400, ctx->dir, regset);
- debugfs_create_bool("coherent", 0400, ctx->dir, &drvdata->coherent);
+ debugfs_create_regset32("regs", 0400, drvdata->dir, regset);
+ debugfs_create_bool("coherent", 0400, drvdata->dir, &drvdata->coherent);
verset = devm_kzalloc(dev, sizeof(*verset), GFP_KERNEL);
/* Failing here is not important enough to fail the module load */
if (!verset)
- goto out;
+ return 0;
if (drvdata->hw_rev <= CC_HW_REV_712) {
ver_sig_regs[0].offset = drvdata->sig_offset;
@@ -102,17 +95,13 @@ int cc_debugfs_init(struct cc_drvdata *drvdata)
verset->nregs = ARRAY_SIZE(pid_cid_regs);
}
verset->base = drvdata->cc_base;
+ verset->dev = dev;
- debugfs_create_regset32("version", 0400, ctx->dir, verset);
-
-out:
- drvdata->debugfs = ctx;
+ debugfs_create_regset32("version", 0400, drvdata->dir, verset);
return 0;
}
void cc_debugfs_fini(struct cc_drvdata *drvdata)
{
- struct cc_debugfs_ctx *ctx = (struct cc_debugfs_ctx *)drvdata->debugfs;
-
- debugfs_remove_recursive(ctx->dir);
+ debugfs_remove_recursive(drvdata->dir);
}
diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c
index 532bc95a8373..2d50991b9a17 100644
--- a/drivers/crypto/ccree/cc_driver.c
+++ b/drivers/crypto/ccree/cc_driver.c
@@ -14,6 +14,8 @@
#include <linux/of.h>
#include <linux/clk.h>
#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/pm_runtime.h>
#include "cc_driver.h"
#include "cc_request_mgr.h"
@@ -134,7 +136,7 @@ static irqreturn_t cc_isr(int irq, void *dev_id)
/* STAT_OP_TYPE_GENERIC STAT_PHASE_0: Interrupt */
/* if driver suspended return, probably shared interrupt */
- if (cc_pm_is_dev_suspended(dev))
+ if (pm_runtime_suspended(dev))
return IRQ_NONE;
/* read the interrupt status */
@@ -269,7 +271,6 @@ static int init_cc_resources(struct platform_device *plat_dev)
u32 val, hw_rev_pidr, sig_cidr;
u64 dma_mask;
const struct cc_hw_data *hw_rev;
- const struct of_device_id *dev_id;
struct clk *clk;
int irq;
int rc = 0;
@@ -278,11 +279,7 @@ static int init_cc_resources(struct platform_device *plat_dev)
if (!new_drvdata)
return -ENOMEM;
- dev_id = of_match_node(arm_ccree_dev_of_match, np);
- if (!dev_id)
- return -ENODEV;
-
- hw_rev = (struct cc_hw_data *)dev_id->data;
+ hw_rev = of_device_get_match_data(dev);
new_drvdata->hw_rev_name = hw_rev->name;
new_drvdata->hw_rev = hw_rev->rev;
new_drvdata->std_bodies = hw_rev->std_bodies;
@@ -302,22 +299,12 @@ static int init_cc_resources(struct platform_device *plat_dev)
platform_set_drvdata(plat_dev, new_drvdata);
new_drvdata->plat_dev = plat_dev;
- clk = devm_clk_get(dev, NULL);
- if (IS_ERR(clk))
- switch (PTR_ERR(clk)) {
- /* Clock is optional so this might be fine */
- case -ENOENT:
- break;
-
- /* Clock not available, let's try again soon */
- case -EPROBE_DEFER:
- return -EPROBE_DEFER;
-
- default:
- dev_err(dev, "Error getting clock: %ld\n",
- PTR_ERR(clk));
- return PTR_ERR(clk);
- }
+ clk = devm_clk_get_optional(dev, NULL);
+ if (IS_ERR(clk)) {
+ if (PTR_ERR(clk) != -EPROBE_DEFER)
+ dev_err(dev, "Error getting clock: %pe\n", clk);
+ return PTR_ERR(clk);
+ }
new_drvdata->clk = clk;
new_drvdata->coherent = of_dma_is_coherent(np);
@@ -344,13 +331,13 @@ static int init_cc_resources(struct platform_device *plat_dev)
init_completion(&new_drvdata->hw_queue_avail);
- if (!plat_dev->dev.dma_mask)
- plat_dev->dev.dma_mask = &plat_dev->dev.coherent_dma_mask;
+ if (!dev->dma_mask)
+ dev->dma_mask = &dev->coherent_dma_mask;
dma_mask = DMA_BIT_MASK(DMA_BIT_MASK_LEN);
while (dma_mask > 0x7fffffffUL) {
- if (dma_supported(&plat_dev->dev, dma_mask)) {
- rc = dma_set_coherent_mask(&plat_dev->dev, dma_mask);
+ if (dma_supported(dev, dma_mask)) {
+ rc = dma_set_coherent_mask(dev, dma_mask);
if (!rc)
break;
}
@@ -362,7 +349,7 @@ static int init_cc_resources(struct platform_device *plat_dev)
return rc;
}
- rc = cc_clk_on(new_drvdata);
+ rc = clk_prepare_enable(new_drvdata->clk);
if (rc) {
dev_err(dev, "Failed to enable clock");
return rc;
@@ -370,7 +357,17 @@ static int init_cc_resources(struct platform_device *plat_dev)
new_drvdata->sec_disabled = cc_sec_disable;
- /* wait for Crytpcell reset completion */
+ pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_set_active(dev);
+ pm_runtime_enable(dev);
+ rc = pm_runtime_get_sync(dev);
+ if (rc < 0) {
+ dev_err(dev, "pm_runtime_get_sync() failed: %d\n", rc);
+ goto post_pm_err;
+ }
+
+ /* Wait for Cryptocell reset completion */
if (!cc_wait_for_reset_completion(new_drvdata)) {
dev_err(dev, "Cryptocell reset not completed");
}
@@ -382,7 +379,7 @@ static int init_cc_resources(struct platform_device *plat_dev)
dev_err(dev, "Invalid CC signature: SIGNATURE=0x%08X != expected=0x%08X\n",
val, hw_rev->sig);
rc = -EINVAL;
- goto post_clk_err;
+ goto post_pm_err;
}
sig_cidr = val;
hw_rev_pidr = cc_ioread(new_drvdata, new_drvdata->ver_offset);
@@ -393,7 +390,7 @@ static int init_cc_resources(struct platform_device *plat_dev)
dev_err(dev, "Invalid CC PIDR: PIDR0124=0x%08X != expected=0x%08X\n",
val, hw_rev->pidr_0124);
rc = -EINVAL;
- goto post_clk_err;
+ goto post_pm_err;
}
hw_rev_pidr = val;
@@ -402,7 +399,7 @@ static int init_cc_resources(struct platform_device *plat_dev)
dev_err(dev, "Invalid CC CIDR: CIDR0123=0x%08X != expected=0x%08X\n",
val, hw_rev->cidr_0123);
rc = -EINVAL;
- goto post_clk_err;
+ goto post_pm_err;
}
sig_cidr = val;
@@ -421,7 +418,7 @@ static int init_cc_resources(struct platform_device *plat_dev)
default:
dev_err(dev, "Unsupported engines configuration.\n");
rc = -EINVAL;
- goto post_clk_err;
+ goto post_pm_err;
}
/* Check security disable state */
@@ -447,14 +444,14 @@ static int init_cc_resources(struct platform_device *plat_dev)
new_drvdata);
if (rc) {
dev_err(dev, "Could not register to interrupt %d\n", irq);
- goto post_clk_err;
+ goto post_pm_err;
}
dev_dbg(dev, "Registered to IRQ: %d\n", irq);
rc = init_cc_regs(new_drvdata, true);
if (rc) {
dev_err(dev, "init_cc_regs failed\n");
- goto post_clk_err;
+ goto post_pm_err;
}
rc = cc_debugfs_init(new_drvdata);
@@ -477,15 +474,14 @@ static int init_cc_resources(struct platform_device *plat_dev)
new_drvdata->mlli_sram_addr =
cc_sram_alloc(new_drvdata, MAX_MLLI_BUFF_SIZE);
if (new_drvdata->mlli_sram_addr == NULL_SRAM_ADDR) {
- dev_err(dev, "Failed to alloc MLLI Sram buffer\n");
rc = -ENOMEM;
- goto post_sram_mgr_err;
+ goto post_fips_init_err;
}
rc = cc_req_mgr_init(new_drvdata);
if (rc) {
dev_err(dev, "cc_req_mgr_init failed\n");
- goto post_sram_mgr_err;
+ goto post_fips_init_err;
}
rc = cc_buffer_mgr_init(new_drvdata);
@@ -494,12 +490,6 @@ static int init_cc_resources(struct platform_device *plat_dev)
goto post_req_mgr_err;
}
- rc = cc_pm_init(new_drvdata);
- if (rc) {
- dev_err(dev, "cc_pm_init failed\n");
- goto post_buf_mgr_err;
- }
-
/* Allocate crypto algs */
rc = cc_cipher_alloc(new_drvdata);
if (rc) {
@@ -520,15 +510,13 @@ static int init_cc_resources(struct platform_device *plat_dev)
goto post_hash_err;
}
- /* All set, we can allow autosuspend */
- cc_pm_go(new_drvdata);
-
/* If we got here and FIPS mode is enabled
* it means all FIPS test passed, so let TEE
* know we're good.
*/
cc_set_ree_fips_status(new_drvdata, true);
+ pm_runtime_put(dev);
return 0;
post_hash_err:
@@ -539,16 +527,17 @@ post_buf_mgr_err:
cc_buffer_mgr_fini(new_drvdata);
post_req_mgr_err:
cc_req_mgr_fini(new_drvdata);
-post_sram_mgr_err:
- cc_sram_mgr_fini(new_drvdata);
post_fips_init_err:
cc_fips_fini(new_drvdata);
post_debugfs_err:
cc_debugfs_fini(new_drvdata);
post_regs_err:
fini_cc_regs(new_drvdata);
-post_clk_err:
- cc_clk_off(new_drvdata);
+post_pm_err:
+ pm_runtime_put_noidle(dev);
+ pm_runtime_disable(dev);
+ pm_runtime_set_suspended(dev);
+ clk_disable_unprepare(new_drvdata->clk);
return rc;
}
@@ -560,36 +549,22 @@ void fini_cc_regs(struct cc_drvdata *drvdata)
static void cleanup_cc_resources(struct platform_device *plat_dev)
{
+ struct device *dev = &plat_dev->dev;
struct cc_drvdata *drvdata =
(struct cc_drvdata *)platform_get_drvdata(plat_dev);
cc_aead_free(drvdata);
cc_hash_free(drvdata);
cc_cipher_free(drvdata);
- cc_pm_fini(drvdata);
cc_buffer_mgr_fini(drvdata);
cc_req_mgr_fini(drvdata);
- cc_sram_mgr_fini(drvdata);
cc_fips_fini(drvdata);
cc_debugfs_fini(drvdata);
fini_cc_regs(drvdata);
- cc_clk_off(drvdata);
-}
-
-int cc_clk_on(struct cc_drvdata *drvdata)
-{
- struct clk *clk = drvdata->clk;
- int rc;
-
- if (IS_ERR(clk))
- /* Not all devices have a clock associated with CCREE */
- return 0;
-
- rc = clk_prepare_enable(clk);
- if (rc)
- return rc;
-
- return 0;
+ pm_runtime_put_noidle(dev);
+ pm_runtime_disable(dev);
+ pm_runtime_set_suspended(dev);
+ clk_disable_unprepare(drvdata->clk);
}
unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata)
@@ -600,17 +575,6 @@ unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata)
return HASH_LEN_SIZE_630;
}
-void cc_clk_off(struct cc_drvdata *drvdata)
-{
- struct clk *clk = drvdata->clk;
-
- if (IS_ERR(clk))
- /* Not all devices have a clock associated with CCREE */
- return;
-
- clk_disable_unprepare(clk);
-}
-
static int ccree_probe(struct platform_device *plat_dev)
{
int rc;
@@ -653,7 +617,6 @@ static struct platform_driver ccree_driver = {
static int __init ccree_init(void)
{
- cc_hash_global_init();
cc_debugfs_global_init();
return platform_driver_register(&ccree_driver);
diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h
index c227718ba992..d938886390d2 100644
--- a/drivers/crypto/ccree/cc_driver.h
+++ b/drivers/crypto/ccree/cc_driver.h
@@ -26,7 +26,6 @@
#include <linux/clk.h>
#include <linux/platform_device.h>
-/* Registers definitions from shared/hw/ree_include */
#include "cc_host_regs.h"
#include "cc_crypto_ctx.h"
#include "cc_hw_queue_defs.h"
@@ -71,9 +70,7 @@ enum cc_std_body {
#define CC_NVM_IS_IDLE_MASK BIT(CC_NVM_IS_IDLE_VALUE_BIT_SHIFT)
-#define AXIM_MON_COMP_VALUE GENMASK(CC_AXIM_MON_COMP_VALUE_BIT_SIZE + \
- CC_AXIM_MON_COMP_VALUE_BIT_SHIFT, \
- CC_AXIM_MON_COMP_VALUE_BIT_SHIFT)
+#define AXIM_MON_COMP_VALUE CC_GENMASK(CC_AXIM_MON_COMP_VALUE)
#define CC_CPP_AES_ABORT_MASK ( \
BIT(CC_HOST_IMR_REE_OP_ABORTED_AES_0_MASK_BIT_SHIFT) | \
@@ -139,15 +136,15 @@ struct cc_drvdata {
int irq;
struct completion hw_queue_avail; /* wait for HW queue availability */
struct platform_device *plat_dev;
- cc_sram_addr_t mlli_sram_addr;
- void *buff_mgr_handle;
- void *cipher_handle;
+ u32 mlli_sram_addr;
+ struct dma_pool *mlli_buffs_pool;
+ struct list_head alg_list;
void *hash_handle;
void *aead_handle;
void *request_mgr_handle;
void *fips_handle;
- void *sram_mgr_handle;
- void *debugfs;
+ u32 sram_free_offset; /* offset to non-allocated area in SRAM */
+ struct dentry *dir; /* for debugfs */
struct clk *clk;
bool coherent;
char *hw_rev_name;
@@ -158,7 +155,6 @@ struct cc_drvdata {
int std_bodies;
bool sec_disabled;
u32 comp_mask;
- bool pm_on;
};
struct cc_crypto_alg {
@@ -212,8 +208,6 @@ static inline void dump_byte_array(const char *name, const u8 *the_array,
bool cc_wait_for_reset_completion(struct cc_drvdata *drvdata);
int init_cc_regs(struct cc_drvdata *drvdata, bool is_probe);
void fini_cc_regs(struct cc_drvdata *drvdata);
-int cc_clk_on(struct cc_drvdata *drvdata);
-void cc_clk_off(struct cc_drvdata *drvdata);
unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata);
static inline void cc_iowrite(struct cc_drvdata *drvdata, u32 reg, u32 val)
diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c
index 912e5ce5079d..d5310783af15 100644
--- a/drivers/crypto/ccree/cc_hash.c
+++ b/drivers/crypto/ccree/cc_hash.c
@@ -20,8 +20,8 @@
#define CC_SM3_HASH_LEN_SIZE 8
struct cc_hash_handle {
- cc_sram_addr_t digest_len_sram_addr; /* const value in SRAM*/
- cc_sram_addr_t larval_digest_sram_addr; /* const value in SRAM */
+ u32 digest_len_sram_addr; /* const value in SRAM*/
+ u32 larval_digest_sram_addr; /* const value in SRAM */
struct list_head hash_list;
};
@@ -39,12 +39,19 @@ static const u32 cc_sha256_init[] = {
SHA256_H3, SHA256_H2, SHA256_H1, SHA256_H0 };
static const u32 cc_digest_len_sha512_init[] = {
0x00000080, 0x00000000, 0x00000000, 0x00000000 };
-static u64 cc_sha384_init[] = {
- SHA384_H7, SHA384_H6, SHA384_H5, SHA384_H4,
- SHA384_H3, SHA384_H2, SHA384_H1, SHA384_H0 };
-static u64 cc_sha512_init[] = {
- SHA512_H7, SHA512_H6, SHA512_H5, SHA512_H4,
- SHA512_H3, SHA512_H2, SHA512_H1, SHA512_H0 };
+
+/*
+ * Due to the way the HW works, every double word in the SHA384 and SHA512
+ * larval hashes must be stored in hi/lo order
+ */
+#define hilo(x) upper_32_bits(x), lower_32_bits(x)
+static const u32 cc_sha384_init[] = {
+ hilo(SHA384_H7), hilo(SHA384_H6), hilo(SHA384_H5), hilo(SHA384_H4),
+ hilo(SHA384_H3), hilo(SHA384_H2), hilo(SHA384_H1), hilo(SHA384_H0) };
+static const u32 cc_sha512_init[] = {
+ hilo(SHA512_H7), hilo(SHA512_H6), hilo(SHA512_H5), hilo(SHA512_H4),
+ hilo(SHA512_H3), hilo(SHA512_H2), hilo(SHA512_H1), hilo(SHA512_H0) };
+
static const u32 cc_sm3_init[] = {
SM3_IVH, SM3_IVG, SM3_IVF, SM3_IVE,
SM3_IVD, SM3_IVC, SM3_IVB, SM3_IVA };
@@ -342,7 +349,6 @@ static int cc_fin_result(struct cc_hw_desc *desc, struct ahash_request *req,
/* Get final MAC result */
hw_desc_init(&desc[idx]);
set_hash_cipher_mode(&desc[idx], ctx->hw_mode, ctx->hash_mode);
- /* TODO */
set_dout_dlli(&desc[idx], state->digest_result_dma_addr, digestsize,
NS_BIT, 1);
set_queue_last_ind(ctx->drvdata, &desc[idx]);
@@ -422,8 +428,7 @@ static int cc_hash_digest(struct ahash_request *req)
bool is_hmac = ctx->is_hmac;
struct cc_crypto_req cc_req = {};
struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN];
- cc_sram_addr_t larval_digest_addr =
- cc_larval_digest_addr(ctx->drvdata, ctx->hash_mode);
+ u32 larval_digest_addr;
int idx = 0;
int rc = 0;
gfp_t flags = cc_gfp_flags(&req->base);
@@ -465,6 +470,8 @@ static int cc_hash_digest(struct ahash_request *req)
set_din_type(&desc[idx], DMA_DLLI, state->digest_buff_dma_addr,
ctx->inter_digestsize, NS_BIT);
} else {
+ larval_digest_addr = cc_larval_digest_addr(ctx->drvdata,
+ ctx->hash_mode);
set_din_sram(&desc[idx], larval_digest_addr,
ctx->inter_digestsize);
}
@@ -726,7 +733,7 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key,
int digestsize = 0;
int i, idx = 0, rc = 0;
struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN];
- cc_sram_addr_t larval_addr;
+ u32 larval_addr;
struct device *dev;
ctx = crypto_ahash_ctx(ahash);
@@ -752,7 +759,7 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key,
return -ENOMEM;
ctx->key_params.key_dma_addr =
- dma_map_single(dev, (void *)ctx->key_params.key, keylen,
+ dma_map_single(dev, ctx->key_params.key, keylen,
DMA_TO_DEVICE);
if (dma_mapping_error(dev, ctx->key_params.key_dma_addr)) {
dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n",
@@ -1067,8 +1074,8 @@ static int cc_alloc_ctx(struct cc_hash_ctx *ctx)
ctx->key_params.keylen = 0;
ctx->digest_buff_dma_addr =
- dma_map_single(dev, (void *)ctx->digest_buff,
- sizeof(ctx->digest_buff), DMA_BIDIRECTIONAL);
+ dma_map_single(dev, ctx->digest_buff, sizeof(ctx->digest_buff),
+ DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, ctx->digest_buff_dma_addr)) {
dev_err(dev, "Mapping digest len %zu B at va=%pK for DMA failed\n",
sizeof(ctx->digest_buff), ctx->digest_buff);
@@ -1079,7 +1086,7 @@ static int cc_alloc_ctx(struct cc_hash_ctx *ctx)
&ctx->digest_buff_dma_addr);
ctx->opad_tmp_keys_dma_addr =
- dma_map_single(dev, (void *)ctx->opad_tmp_keys_buff,
+ dma_map_single(dev, ctx->opad_tmp_keys_buff,
sizeof(ctx->opad_tmp_keys_buff),
DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, ctx->opad_tmp_keys_dma_addr)) {
@@ -1196,8 +1203,8 @@ static int cc_mac_update(struct ahash_request *req)
idx++;
/* Setup request structure */
- cc_req.user_cb = (void *)cc_update_complete;
- cc_req.user_arg = (void *)req;
+ cc_req.user_cb = cc_update_complete;
+ cc_req.user_arg = req;
rc = cc_send_request(ctx->drvdata, &cc_req, desc, idx, &req->base);
if (rc != -EINPROGRESS && rc != -EBUSY) {
@@ -1254,8 +1261,8 @@ static int cc_mac_final(struct ahash_request *req)
}
/* Setup request structure */
- cc_req.user_cb = (void *)cc_hash_complete;
- cc_req.user_arg = (void *)req;
+ cc_req.user_cb = cc_hash_complete;
+ cc_req.user_arg = req;
if (state->xcbc_count && rem_cnt == 0) {
/* Load key for ECB decryption */
@@ -1311,7 +1318,6 @@ static int cc_mac_final(struct ahash_request *req)
/* Get final MAC result */
hw_desc_init(&desc[idx]);
- /* TODO */
set_dout_dlli(&desc[idx], state->digest_result_dma_addr,
digestsize, NS_BIT, 1);
set_queue_last_ind(ctx->drvdata, &desc[idx]);
@@ -1369,8 +1375,8 @@ static int cc_mac_finup(struct ahash_request *req)
}
/* Setup request structure */
- cc_req.user_cb = (void *)cc_hash_complete;
- cc_req.user_arg = (void *)req;
+ cc_req.user_cb = cc_hash_complete;
+ cc_req.user_arg = req;
if (ctx->hw_mode == DRV_CIPHER_XCBC_MAC) {
key_len = CC_AES_128_BIT_KEY_SIZE;
@@ -1393,7 +1399,6 @@ static int cc_mac_finup(struct ahash_request *req)
/* Get final MAC result */
hw_desc_init(&desc[idx]);
- /* TODO */
set_dout_dlli(&desc[idx], state->digest_result_dma_addr,
digestsize, NS_BIT, 1);
set_queue_last_ind(ctx->drvdata, &desc[idx]);
@@ -1448,8 +1453,8 @@ static int cc_mac_digest(struct ahash_request *req)
}
/* Setup request structure */
- cc_req.user_cb = (void *)cc_digest_complete;
- cc_req.user_arg = (void *)req;
+ cc_req.user_cb = cc_digest_complete;
+ cc_req.user_arg = req;
if (ctx->hw_mode == DRV_CIPHER_XCBC_MAC) {
key_len = CC_AES_128_BIT_KEY_SIZE;
@@ -1820,7 +1825,7 @@ static struct cc_hash_alg *cc_alloc_hash_alg(struct cc_hash_template *template,
struct crypto_alg *alg;
struct ahash_alg *halg;
- t_crypto_alg = kzalloc(sizeof(*t_crypto_alg), GFP_KERNEL);
+ t_crypto_alg = devm_kzalloc(dev, sizeof(*t_crypto_alg), GFP_KERNEL);
if (!t_crypto_alg)
return ERR_PTR(-ENOMEM);
@@ -1857,104 +1862,85 @@ static struct cc_hash_alg *cc_alloc_hash_alg(struct cc_hash_template *template,
return t_crypto_alg;
}
+static int cc_init_copy_sram(struct cc_drvdata *drvdata, const u32 *data,
+ unsigned int size, u32 *sram_buff_ofs)
+{
+ struct cc_hw_desc larval_seq[CC_DIGEST_SIZE_MAX / sizeof(u32)];
+ unsigned int larval_seq_len = 0;
+ int rc;
+
+ cc_set_sram_desc(data, *sram_buff_ofs, size / sizeof(*data),
+ larval_seq, &larval_seq_len);
+ rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+ if (rc)
+ return rc;
+
+ *sram_buff_ofs += size;
+ return 0;
+}
+
int cc_init_hash_sram(struct cc_drvdata *drvdata)
{
struct cc_hash_handle *hash_handle = drvdata->hash_handle;
- cc_sram_addr_t sram_buff_ofs = hash_handle->digest_len_sram_addr;
- unsigned int larval_seq_len = 0;
- struct cc_hw_desc larval_seq[CC_DIGEST_SIZE_MAX / sizeof(u32)];
+ u32 sram_buff_ofs = hash_handle->digest_len_sram_addr;
bool large_sha_supported = (drvdata->hw_rev >= CC_HW_REV_712);
bool sm3_supported = (drvdata->hw_rev >= CC_HW_REV_713);
int rc = 0;
/* Copy-to-sram digest-len */
- cc_set_sram_desc(cc_digest_len_init, sram_buff_ofs,
- ARRAY_SIZE(cc_digest_len_init), larval_seq,
- &larval_seq_len);
- rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+ rc = cc_init_copy_sram(drvdata, cc_digest_len_init,
+ sizeof(cc_digest_len_init), &sram_buff_ofs);
if (rc)
goto init_digest_const_err;
- sram_buff_ofs += sizeof(cc_digest_len_init);
- larval_seq_len = 0;
-
if (large_sha_supported) {
/* Copy-to-sram digest-len for sha384/512 */
- cc_set_sram_desc(cc_digest_len_sha512_init, sram_buff_ofs,
- ARRAY_SIZE(cc_digest_len_sha512_init),
- larval_seq, &larval_seq_len);
- rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+ rc = cc_init_copy_sram(drvdata, cc_digest_len_sha512_init,
+ sizeof(cc_digest_len_sha512_init),
+ &sram_buff_ofs);
if (rc)
goto init_digest_const_err;
-
- sram_buff_ofs += sizeof(cc_digest_len_sha512_init);
- larval_seq_len = 0;
}
/* The initial digests offset */
hash_handle->larval_digest_sram_addr = sram_buff_ofs;
/* Copy-to-sram initial SHA* digests */
- cc_set_sram_desc(cc_md5_init, sram_buff_ofs, ARRAY_SIZE(cc_md5_init),
- larval_seq, &larval_seq_len);
- rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+ rc = cc_init_copy_sram(drvdata, cc_md5_init, sizeof(cc_md5_init),
+ &sram_buff_ofs);
if (rc)
goto init_digest_const_err;
- sram_buff_ofs += sizeof(cc_md5_init);
- larval_seq_len = 0;
- cc_set_sram_desc(cc_sha1_init, sram_buff_ofs,
- ARRAY_SIZE(cc_sha1_init), larval_seq,
- &larval_seq_len);
- rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+ rc = cc_init_copy_sram(drvdata, cc_sha1_init, sizeof(cc_sha1_init),
+ &sram_buff_ofs);
if (rc)
goto init_digest_const_err;
- sram_buff_ofs += sizeof(cc_sha1_init);
- larval_seq_len = 0;
- cc_set_sram_desc(cc_sha224_init, sram_buff_ofs,
- ARRAY_SIZE(cc_sha224_init), larval_seq,
- &larval_seq_len);
- rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+ rc = cc_init_copy_sram(drvdata, cc_sha224_init, sizeof(cc_sha224_init),
+ &sram_buff_ofs);
if (rc)
goto init_digest_const_err;
- sram_buff_ofs += sizeof(cc_sha224_init);
- larval_seq_len = 0;
- cc_set_sram_desc(cc_sha256_init, sram_buff_ofs,
- ARRAY_SIZE(cc_sha256_init), larval_seq,
- &larval_seq_len);
- rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+ rc = cc_init_copy_sram(drvdata, cc_sha256_init, sizeof(cc_sha256_init),
+ &sram_buff_ofs);
if (rc)
goto init_digest_const_err;
- sram_buff_ofs += sizeof(cc_sha256_init);
- larval_seq_len = 0;
if (sm3_supported) {
- cc_set_sram_desc(cc_sm3_init, sram_buff_ofs,
- ARRAY_SIZE(cc_sm3_init), larval_seq,
- &larval_seq_len);
- rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+ rc = cc_init_copy_sram(drvdata, cc_sm3_init,
+ sizeof(cc_sm3_init), &sram_buff_ofs);
if (rc)
goto init_digest_const_err;
- sram_buff_ofs += sizeof(cc_sm3_init);
- larval_seq_len = 0;
}
if (large_sha_supported) {
- cc_set_sram_desc((u32 *)cc_sha384_init, sram_buff_ofs,
- (ARRAY_SIZE(cc_sha384_init) * 2), larval_seq,
- &larval_seq_len);
- rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+ rc = cc_init_copy_sram(drvdata, cc_sha384_init,
+ sizeof(cc_sha384_init), &sram_buff_ofs);
if (rc)
goto init_digest_const_err;
- sram_buff_ofs += sizeof(cc_sha384_init);
- larval_seq_len = 0;
- cc_set_sram_desc((u32 *)cc_sha512_init, sram_buff_ofs,
- (ARRAY_SIZE(cc_sha512_init) * 2), larval_seq,
- &larval_seq_len);
- rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+ rc = cc_init_copy_sram(drvdata, cc_sha512_init,
+ sizeof(cc_sha512_init), &sram_buff_ofs);
if (rc)
goto init_digest_const_err;
}
@@ -1963,38 +1949,16 @@ init_digest_const_err:
return rc;
}
-static void __init cc_swap_dwords(u32 *buf, unsigned long size)
-{
- int i;
- u32 tmp;
-
- for (i = 0; i < size; i += 2) {
- tmp = buf[i];
- buf[i] = buf[i + 1];
- buf[i + 1] = tmp;
- }
-}
-
-/*
- * Due to the way the HW works we need to swap every
- * double word in the SHA384 and SHA512 larval hashes
- */
-void __init cc_hash_global_init(void)
-{
- cc_swap_dwords((u32 *)&cc_sha384_init, (ARRAY_SIZE(cc_sha384_init) * 2));
- cc_swap_dwords((u32 *)&cc_sha512_init, (ARRAY_SIZE(cc_sha512_init) * 2));
-}
-
int cc_hash_alloc(struct cc_drvdata *drvdata)
{
struct cc_hash_handle *hash_handle;
- cc_sram_addr_t sram_buff;
+ u32 sram_buff;
u32 sram_size_to_alloc;
struct device *dev = drvdata_to_dev(drvdata);
int rc = 0;
int alg;
- hash_handle = kzalloc(sizeof(*hash_handle), GFP_KERNEL);
+ hash_handle = devm_kzalloc(dev, sizeof(*hash_handle), GFP_KERNEL);
if (!hash_handle)
return -ENOMEM;
@@ -2016,7 +1980,6 @@ int cc_hash_alloc(struct cc_drvdata *drvdata)
sram_buff = cc_sram_alloc(drvdata, sram_size_to_alloc);
if (sram_buff == NULL_SRAM_ADDR) {
- dev_err(dev, "SRAM pool exhausted\n");
rc = -ENOMEM;
goto fail;
}
@@ -2056,12 +2019,10 @@ int cc_hash_alloc(struct cc_drvdata *drvdata)
if (rc) {
dev_err(dev, "%s alg registration failed\n",
driver_hash[alg].driver_name);
- kfree(t_alg);
goto fail;
- } else {
- list_add_tail(&t_alg->entry,
- &hash_handle->hash_list);
}
+
+ list_add_tail(&t_alg->entry, &hash_handle->hash_list);
}
if (hw_mode == DRV_CIPHER_XCBC_MAC ||
hw_mode == DRV_CIPHER_CMAC)
@@ -2081,18 +2042,16 @@ int cc_hash_alloc(struct cc_drvdata *drvdata)
if (rc) {
dev_err(dev, "%s alg registration failed\n",
driver_hash[alg].driver_name);
- kfree(t_alg);
goto fail;
- } else {
- list_add_tail(&t_alg->entry, &hash_handle->hash_list);
}
+
+ list_add_tail(&t_alg->entry, &hash_handle->hash_list);
}
return 0;
fail:
- kfree(drvdata->hash_handle);
- drvdata->hash_handle = NULL;
+ cc_hash_free(drvdata);
return rc;
}
@@ -2101,17 +2060,12 @@ int cc_hash_free(struct cc_drvdata *drvdata)
struct cc_hash_alg *t_hash_alg, *hash_n;
struct cc_hash_handle *hash_handle = drvdata->hash_handle;
- if (hash_handle) {
- list_for_each_entry_safe(t_hash_alg, hash_n,
- &hash_handle->hash_list, entry) {
- crypto_unregister_ahash(&t_hash_alg->ahash_alg);
- list_del(&t_hash_alg->entry);
- kfree(t_hash_alg);
- }
-
- kfree(hash_handle);
- drvdata->hash_handle = NULL;
+ list_for_each_entry_safe(t_hash_alg, hash_n, &hash_handle->hash_list,
+ entry) {
+ crypto_unregister_ahash(&t_hash_alg->ahash_alg);
+ list_del(&t_hash_alg->entry);
}
+
return 0;
}
@@ -2272,22 +2226,23 @@ static const void *cc_larval_digest(struct device *dev, u32 mode)
}
}
-/*!
- * Gets the address of the initial digest in SRAM
+/**
+ * cc_larval_digest_addr() - Get the address of the initial digest in SRAM
* according to the given hash mode
*
- * \param drvdata
- * \param mode The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256
+ * @drvdata: Associated device driver context
+ * @mode: The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256
*
- * \return u32 The address of the initial digest in SRAM
+ * Return:
+ * The address of the initial digest in SRAM
*/
-cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode)
+u32 cc_larval_digest_addr(void *drvdata, u32 mode)
{
struct cc_drvdata *_drvdata = (struct cc_drvdata *)drvdata;
struct cc_hash_handle *hash_handle = _drvdata->hash_handle;
struct device *dev = drvdata_to_dev(_drvdata);
bool sm3_supported = (_drvdata->hw_rev >= CC_HW_REV_713);
- cc_sram_addr_t addr;
+ u32 addr;
switch (mode) {
case DRV_HASH_NULL:
@@ -2339,12 +2294,11 @@ cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode)
return hash_handle->larval_digest_sram_addr;
}
-cc_sram_addr_t
-cc_digest_len_addr(void *drvdata, u32 mode)
+u32 cc_digest_len_addr(void *drvdata, u32 mode)
{
struct cc_drvdata *_drvdata = (struct cc_drvdata *)drvdata;
struct cc_hash_handle *hash_handle = _drvdata->hash_handle;
- cc_sram_addr_t digest_len_addr = hash_handle->digest_len_sram_addr;
+ u32 digest_len_addr = hash_handle->digest_len_sram_addr;
switch (mode) {
case DRV_HASH_SHA1:
diff --git a/drivers/crypto/ccree/cc_hash.h b/drivers/crypto/ccree/cc_hash.h
index 0d6dc61484d7..3d0f2179e07e 100644
--- a/drivers/crypto/ccree/cc_hash.h
+++ b/drivers/crypto/ccree/cc_hash.h
@@ -80,30 +80,27 @@ int cc_hash_alloc(struct cc_drvdata *drvdata);
int cc_init_hash_sram(struct cc_drvdata *drvdata);
int cc_hash_free(struct cc_drvdata *drvdata);
-/*!
- * Gets the initial digest length
+/**
+ * cc_digest_len_addr() - Gets the initial digest length
*
- * \param drvdata
- * \param mode The Hash mode. Supported modes:
- * MD5/SHA1/SHA224/SHA256/SHA384/SHA512
+ * @drvdata: Associated device driver context
+ * @mode: The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256/SHA384/SHA512
*
- * \return u32 returns the address of the initial digest length in SRAM
+ * Return:
+ * Returns the address of the initial digest length in SRAM
*/
-cc_sram_addr_t
-cc_digest_len_addr(void *drvdata, u32 mode);
+u32 cc_digest_len_addr(void *drvdata, u32 mode);
-/*!
- * Gets the address of the initial digest in SRAM
+/**
+ * cc_larval_digest_addr() - Gets the address of the initial digest in SRAM
* according to the given hash mode
*
- * \param drvdata
- * \param mode The Hash mode. Supported modes:
- * MD5/SHA1/SHA224/SHA256/SHA384/SHA512
+ * @drvdata: Associated device driver context
+ * @mode: The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256/SHA384/SHA512
*
- * \return u32 The address of the initial digest in SRAM
+ * Return:
+ * The address of the initial digest in SRAM
*/
-cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode);
-
-void cc_hash_global_init(void);
+u32 cc_larval_digest_addr(void *drvdata, u32 mode);
#endif /*__CC_HASH_H__*/
diff --git a/drivers/crypto/ccree/cc_hw_queue_defs.h b/drivers/crypto/ccree/cc_hw_queue_defs.h
index 9f4db9956e91..15df58c66911 100644
--- a/drivers/crypto/ccree/cc_hw_queue_defs.h
+++ b/drivers/crypto/ccree/cc_hw_queue_defs.h
@@ -17,46 +17,43 @@
/* Define max. available slots in HW queue */
#define HW_QUEUE_SLOTS_MAX 15
-#define CC_REG_LOW(word, name) \
- (CC_DSCRPTR_QUEUE_WORD ## word ## _ ## name ## _BIT_SHIFT)
-
-#define CC_REG_HIGH(word, name) \
- (CC_REG_LOW(word, name) + \
- CC_DSCRPTR_QUEUE_WORD ## word ## _ ## name ## _BIT_SIZE - 1)
-
-#define CC_GENMASK(word, name) \
- GENMASK(CC_REG_HIGH(word, name), CC_REG_LOW(word, name))
-
-#define WORD0_VALUE CC_GENMASK(0, VALUE)
-#define WORD0_CPP_CIPHER_MODE CC_GENMASK(0, CPP_CIPHER_MODE)
-#define WORD1_DIN_CONST_VALUE CC_GENMASK(1, DIN_CONST_VALUE)
-#define WORD1_DIN_DMA_MODE CC_GENMASK(1, DIN_DMA_MODE)
-#define WORD1_DIN_SIZE CC_GENMASK(1, DIN_SIZE)
-#define WORD1_NOT_LAST CC_GENMASK(1, NOT_LAST)
-#define WORD1_NS_BIT CC_GENMASK(1, NS_BIT)
-#define WORD1_LOCK_QUEUE CC_GENMASK(1, LOCK_QUEUE)
-#define WORD2_VALUE CC_GENMASK(2, VALUE)
-#define WORD3_DOUT_DMA_MODE CC_GENMASK(3, DOUT_DMA_MODE)
-#define WORD3_DOUT_LAST_IND CC_GENMASK(3, DOUT_LAST_IND)
-#define WORD3_DOUT_SIZE CC_GENMASK(3, DOUT_SIZE)
-#define WORD3_HASH_XOR_BIT CC_GENMASK(3, HASH_XOR_BIT)
-#define WORD3_NS_BIT CC_GENMASK(3, NS_BIT)
-#define WORD3_QUEUE_LAST_IND CC_GENMASK(3, QUEUE_LAST_IND)
-#define WORD4_ACK_NEEDED CC_GENMASK(4, ACK_NEEDED)
-#define WORD4_AES_SEL_N_HASH CC_GENMASK(4, AES_SEL_N_HASH)
-#define WORD4_AES_XOR_CRYPTO_KEY CC_GENMASK(4, AES_XOR_CRYPTO_KEY)
-#define WORD4_BYTES_SWAP CC_GENMASK(4, BYTES_SWAP)
-#define WORD4_CIPHER_CONF0 CC_GENMASK(4, CIPHER_CONF0)
-#define WORD4_CIPHER_CONF1 CC_GENMASK(4, CIPHER_CONF1)
-#define WORD4_CIPHER_CONF2 CC_GENMASK(4, CIPHER_CONF2)
-#define WORD4_CIPHER_DO CC_GENMASK(4, CIPHER_DO)
-#define WORD4_CIPHER_MODE CC_GENMASK(4, CIPHER_MODE)
-#define WORD4_CMAC_SIZE0 CC_GENMASK(4, CMAC_SIZE0)
-#define WORD4_DATA_FLOW_MODE CC_GENMASK(4, DATA_FLOW_MODE)
-#define WORD4_KEY_SIZE CC_GENMASK(4, KEY_SIZE)
-#define WORD4_SETUP_OPERATION CC_GENMASK(4, SETUP_OPERATION)
-#define WORD5_DIN_ADDR_HIGH CC_GENMASK(5, DIN_ADDR_HIGH)
-#define WORD5_DOUT_ADDR_HIGH CC_GENMASK(5, DOUT_ADDR_HIGH)
+#define CC_REG_LOW(name) (name ## _BIT_SHIFT)
+#define CC_REG_HIGH(name) (CC_REG_LOW(name) + name ## _BIT_SIZE - 1)
+#define CC_GENMASK(name) GENMASK(CC_REG_HIGH(name), CC_REG_LOW(name))
+
+#define CC_HWQ_GENMASK(word, field) \
+ CC_GENMASK(CC_DSCRPTR_QUEUE_WORD ## word ## _ ## field)
+
+#define WORD0_VALUE CC_HWQ_GENMASK(0, VALUE)
+#define WORD0_CPP_CIPHER_MODE CC_HWQ_GENMASK(0, CPP_CIPHER_MODE)
+#define WORD1_DIN_CONST_VALUE CC_HWQ_GENMASK(1, DIN_CONST_VALUE)
+#define WORD1_DIN_DMA_MODE CC_HWQ_GENMASK(1, DIN_DMA_MODE)
+#define WORD1_DIN_SIZE CC_HWQ_GENMASK(1, DIN_SIZE)
+#define WORD1_NOT_LAST CC_HWQ_GENMASK(1, NOT_LAST)
+#define WORD1_NS_BIT CC_HWQ_GENMASK(1, NS_BIT)
+#define WORD1_LOCK_QUEUE CC_HWQ_GENMASK(1, LOCK_QUEUE)
+#define WORD2_VALUE CC_HWQ_GENMASK(2, VALUE)
+#define WORD3_DOUT_DMA_MODE CC_HWQ_GENMASK(3, DOUT_DMA_MODE)
+#define WORD3_DOUT_LAST_IND CC_HWQ_GENMASK(3, DOUT_LAST_IND)
+#define WORD3_DOUT_SIZE CC_HWQ_GENMASK(3, DOUT_SIZE)
+#define WORD3_HASH_XOR_BIT CC_HWQ_GENMASK(3, HASH_XOR_BIT)
+#define WORD3_NS_BIT CC_HWQ_GENMASK(3, NS_BIT)
+#define WORD3_QUEUE_LAST_IND CC_HWQ_GENMASK(3, QUEUE_LAST_IND)
+#define WORD4_ACK_NEEDED CC_HWQ_GENMASK(4, ACK_NEEDED)
+#define WORD4_AES_SEL_N_HASH CC_HWQ_GENMASK(4, AES_SEL_N_HASH)
+#define WORD4_AES_XOR_CRYPTO_KEY CC_HWQ_GENMASK(4, AES_XOR_CRYPTO_KEY)
+#define WORD4_BYTES_SWAP CC_HWQ_GENMASK(4, BYTES_SWAP)
+#define WORD4_CIPHER_CONF0 CC_HWQ_GENMASK(4, CIPHER_CONF0)
+#define WORD4_CIPHER_CONF1 CC_HWQ_GENMASK(4, CIPHER_CONF1)
+#define WORD4_CIPHER_CONF2 CC_HWQ_GENMASK(4, CIPHER_CONF2)
+#define WORD4_CIPHER_DO CC_HWQ_GENMASK(4, CIPHER_DO)
+#define WORD4_CIPHER_MODE CC_HWQ_GENMASK(4, CIPHER_MODE)
+#define WORD4_CMAC_SIZE0 CC_HWQ_GENMASK(4, CMAC_SIZE0)
+#define WORD4_DATA_FLOW_MODE CC_HWQ_GENMASK(4, DATA_FLOW_MODE)
+#define WORD4_KEY_SIZE CC_HWQ_GENMASK(4, KEY_SIZE)
+#define WORD4_SETUP_OPERATION CC_HWQ_GENMASK(4, SETUP_OPERATION)
+#define WORD5_DIN_ADDR_HIGH CC_HWQ_GENMASK(5, DIN_ADDR_HIGH)
+#define WORD5_DOUT_ADDR_HIGH CC_HWQ_GENMASK(5, DOUT_ADDR_HIGH)
/******************************************************************************
* TYPE DEFINITIONS
@@ -207,31 +204,32 @@ enum cc_hash_cipher_pad {
/* Descriptor packing macros */
/*****************************/
-/*
- * Init a HW descriptor struct
- * @pdesc: pointer HW descriptor struct
+/**
+ * hw_desc_init() - Init a HW descriptor struct
+ * @pdesc: pointer to HW descriptor struct
*/
static inline void hw_desc_init(struct cc_hw_desc *pdesc)
{
memset(pdesc, 0, sizeof(struct cc_hw_desc));
}
-/*
- * Indicates the end of current HW descriptors flow and release the HW engines.
+/**
+ * set_queue_last_ind_bit() - Indicate the end of current HW descriptors flow
+ * and release the HW engines.
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
*/
static inline void set_queue_last_ind_bit(struct cc_hw_desc *pdesc)
{
pdesc->word[3] |= FIELD_PREP(WORD3_QUEUE_LAST_IND, 1);
}
-/*
- * Set the DIN field of a HW descriptors
+/**
+ * set_din_type() - Set the DIN field of a HW descriptor
*
- * @pdesc: pointer HW descriptor struct
- * @dma_mode: dmaMode The DMA mode: NO_DMA, SRAM, DLLI, MLLI, CONSTANT
- * @addr: dinAdr DIN address
+ * @pdesc: Pointer to HW descriptor struct
+ * @dma_mode: The DMA mode: NO_DMA, SRAM, DLLI, MLLI, CONSTANT
+ * @addr: DIN address
* @size: Data size in bytes
* @axi_sec: AXI secure bit
*/
@@ -239,20 +237,20 @@ static inline void set_din_type(struct cc_hw_desc *pdesc,
enum cc_dma_mode dma_mode, dma_addr_t addr,
u32 size, enum cc_axi_sec axi_sec)
{
- pdesc->word[0] = (u32)addr;
+ pdesc->word[0] = lower_32_bits(addr);
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- pdesc->word[5] |= FIELD_PREP(WORD5_DIN_ADDR_HIGH, ((u16)(addr >> 32)));
+ pdesc->word[5] |= FIELD_PREP(WORD5_DIN_ADDR_HIGH, upper_32_bits(addr));
#endif
pdesc->word[1] |= FIELD_PREP(WORD1_DIN_DMA_MODE, dma_mode) |
FIELD_PREP(WORD1_DIN_SIZE, size) |
FIELD_PREP(WORD1_NS_BIT, axi_sec);
}
-/*
- * Set the DIN field of a HW descriptors to NO DMA mode.
+/**
+ * set_din_no_dma() - Set the DIN field of a HW descriptor to NO DMA mode.
* Used for NOP descriptor, register patches and other special modes.
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
* @addr: DIN address
* @size: Data size in bytes
*/
@@ -262,14 +260,11 @@ static inline void set_din_no_dma(struct cc_hw_desc *pdesc, u32 addr, u32 size)
pdesc->word[1] |= FIELD_PREP(WORD1_DIN_SIZE, size);
}
-/*
- * Setup the special CPP descriptor
+/**
+ * set_cpp_crypto_key() - Setup the special CPP descriptor
*
- * @pdesc: pointer HW descriptor struct
- * @alg: cipher used (AES / SM4)
- * @mode: mode used (CTR or CBC)
- * @slot: slot number
- * @ksize: key size
+ * @pdesc: Pointer to HW descriptor struct
+ * @slot: Slot number
*/
static inline void set_cpp_crypto_key(struct cc_hw_desc *pdesc, u8 slot)
{
@@ -281,27 +276,26 @@ static inline void set_cpp_crypto_key(struct cc_hw_desc *pdesc, u8 slot)
pdesc->word[4] |= FIELD_PREP(WORD4_SETUP_OPERATION, slot);
}
-/*
- * Set the DIN field of a HW descriptors to SRAM mode.
+/**
+ * set_din_sram() - Set the DIN field of a HW descriptor to SRAM mode.
* Note: No need to check SRAM alignment since host requests do not use SRAM and
- * adaptor will enforce alignment check.
+ * the adaptor will enforce alignment checks.
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
* @addr: DIN address
- * @size Data size in bytes
+ * @size: Data size in bytes
*/
-static inline void set_din_sram(struct cc_hw_desc *pdesc, dma_addr_t addr,
- u32 size)
+static inline void set_din_sram(struct cc_hw_desc *pdesc, u32 addr, u32 size)
{
- pdesc->word[0] = (u32)addr;
+ pdesc->word[0] = addr;
pdesc->word[1] |= FIELD_PREP(WORD1_DIN_SIZE, size) |
FIELD_PREP(WORD1_DIN_DMA_MODE, DMA_SRAM);
}
-/*
- * Set the DIN field of a HW descriptors to CONST mode
+/**
+ * set_din_const() - Set the DIN field of a HW descriptor to CONST mode
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
* @val: DIN const value
* @size: Data size in bytes
*/
@@ -313,20 +307,20 @@ static inline void set_din_const(struct cc_hw_desc *pdesc, u32 val, u32 size)
FIELD_PREP(WORD1_DIN_SIZE, size);
}
-/*
- * Set the DIN not last input data indicator
+/**
+ * set_din_not_last_indication() - Set the DIN not last input data indicator
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
*/
static inline void set_din_not_last_indication(struct cc_hw_desc *pdesc)
{
pdesc->word[1] |= FIELD_PREP(WORD1_NOT_LAST, 1);
}
-/*
- * Set the DOUT field of a HW descriptors
+/**
+ * set_dout_type() - Set the DOUT field of a HW descriptor
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
* @dma_mode: The DMA mode: NO_DMA, SRAM, DLLI, MLLI, CONSTANT
* @addr: DOUT address
* @size: Data size in bytes
@@ -336,24 +330,24 @@ static inline void set_dout_type(struct cc_hw_desc *pdesc,
enum cc_dma_mode dma_mode, dma_addr_t addr,
u32 size, enum cc_axi_sec axi_sec)
{
- pdesc->word[2] = (u32)addr;
+ pdesc->word[2] = lower_32_bits(addr);
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- pdesc->word[5] |= FIELD_PREP(WORD5_DOUT_ADDR_HIGH, ((u16)(addr >> 32)));
+ pdesc->word[5] |= FIELD_PREP(WORD5_DOUT_ADDR_HIGH, upper_32_bits(addr));
#endif
pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_DMA_MODE, dma_mode) |
FIELD_PREP(WORD3_DOUT_SIZE, size) |
FIELD_PREP(WORD3_NS_BIT, axi_sec);
}
-/*
- * Set the DOUT field of a HW descriptors to DLLI type
+/**
+ * set_dout_dlli() - Set the DOUT field of a HW descriptor to DLLI type
* The LAST INDICATION is provided by the user
*
- * @pdesc pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
* @addr: DOUT address
* @size: Data size in bytes
- * @last_ind: The last indication bit
* @axi_sec: AXI secure bit
+ * @last_ind: The last indication bit
*/
static inline void set_dout_dlli(struct cc_hw_desc *pdesc, dma_addr_t addr,
u32 size, enum cc_axi_sec axi_sec,
@@ -363,29 +357,28 @@ static inline void set_dout_dlli(struct cc_hw_desc *pdesc, dma_addr_t addr,
pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_LAST_IND, last_ind);
}
-/*
- * Set the DOUT field of a HW descriptors to DLLI type
+/**
+ * set_dout_mlli() - Set the DOUT field of a HW descriptor to MLLI type
* The LAST INDICATION is provided by the user
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
* @addr: DOUT address
* @size: Data size in bytes
- * @last_ind: The last indication bit
* @axi_sec: AXI secure bit
+ * @last_ind: The last indication bit
*/
-static inline void set_dout_mlli(struct cc_hw_desc *pdesc, dma_addr_t addr,
- u32 size, enum cc_axi_sec axi_sec,
- bool last_ind)
+static inline void set_dout_mlli(struct cc_hw_desc *pdesc, u32 addr, u32 size,
+ enum cc_axi_sec axi_sec, bool last_ind)
{
set_dout_type(pdesc, DMA_MLLI, addr, size, axi_sec);
pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_LAST_IND, last_ind);
}
-/*
- * Set the DOUT field of a HW descriptors to NO DMA mode.
+/**
+ * set_dout_no_dma() - Set the DOUT field of a HW descriptor to NO DMA mode.
* Used for NOP descriptor, register patches and other special modes.
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: pointer to HW descriptor struct
* @addr: DOUT address
* @size: Data size in bytes
* @write_enable: Enables a write operation to a register
@@ -398,54 +391,55 @@ static inline void set_dout_no_dma(struct cc_hw_desc *pdesc, u32 addr,
FIELD_PREP(WORD3_DOUT_LAST_IND, write_enable);
}
-/*
- * Set the word for the XOR operation.
+/**
+ * set_xor_val() - Set the word for the XOR operation.
*
- * @pdesc: pointer HW descriptor struct
- * @val: xor data value
+ * @pdesc: Pointer to HW descriptor struct
+ * @val: XOR data value
*/
static inline void set_xor_val(struct cc_hw_desc *pdesc, u32 val)
{
pdesc->word[2] = val;
}
-/*
- * Sets the XOR indicator bit in the descriptor
+/**
+ * set_xor_active() - Set the XOR indicator bit in the descriptor
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
*/
static inline void set_xor_active(struct cc_hw_desc *pdesc)
{
pdesc->word[3] |= FIELD_PREP(WORD3_HASH_XOR_BIT, 1);
}
-/*
- * Select the AES engine instead of HASH engine when setting up combined mode
- * with AES XCBC MAC
+/**
+ * set_aes_not_hash_mode() - Select the AES engine instead of HASH engine when
+ * setting up combined mode with AES XCBC MAC
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
*/
static inline void set_aes_not_hash_mode(struct cc_hw_desc *pdesc)
{
pdesc->word[4] |= FIELD_PREP(WORD4_AES_SEL_N_HASH, 1);
}
-/*
- * Set aes xor crypto key, this in some secenrios select SM3 engine
+/**
+ * set_aes_xor_crypto_key() - Set aes xor crypto key, which in some scenarios
+ * selects the SM3 engine
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
*/
static inline void set_aes_xor_crypto_key(struct cc_hw_desc *pdesc)
{
pdesc->word[4] |= FIELD_PREP(WORD4_AES_XOR_CRYPTO_KEY, 1);
}
-/*
- * Set the DOUT field of a HW descriptors to SRAM mode
+/**
+ * set_dout_sram() - Set the DOUT field of a HW descriptor to SRAM mode
* Note: No need to check SRAM alignment since host requests do not use SRAM and
- * adaptor will enforce alignment check.
+ * the adaptor will enforce alignment checks.
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
* @addr: DOUT address
* @size: Data size in bytes
*/
@@ -456,32 +450,34 @@ static inline void set_dout_sram(struct cc_hw_desc *pdesc, u32 addr, u32 size)
FIELD_PREP(WORD3_DOUT_SIZE, size);
}
-/*
- * Sets the data unit size for XEX mode in data_out_addr[15:0]
+/**
+ * set_xex_data_unit_size() - Set the data unit size for XEX mode in
+ * data_out_addr[15:0]
*
- * @pdesc: pDesc pointer HW descriptor struct
- * @size: data unit size for XEX mode
+ * @pdesc: Pointer to HW descriptor struct
+ * @size: Data unit size for XEX mode
*/
static inline void set_xex_data_unit_size(struct cc_hw_desc *pdesc, u32 size)
{
pdesc->word[2] = size;
}
-/*
- * Set the number of rounds for Multi2 in data_out_addr[15:0]
+/**
+ * set_multi2_num_rounds() - Set the number of rounds for Multi2 in
+ * data_out_addr[15:0]
*
- * @pdesc: pointer HW descriptor struct
- * @num: number of rounds for Multi2
+ * @pdesc: Pointer to HW descriptor struct
+ * @num: Number of rounds for Multi2
*/
static inline void set_multi2_num_rounds(struct cc_hw_desc *pdesc, u32 num)
{
pdesc->word[2] = num;
}
-/*
- * Set the flow mode.
+/**
+ * set_flow_mode() - Set the flow mode.
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
* @mode: Any one of the modes defined in [CC7x-DESC]
*/
static inline void set_flow_mode(struct cc_hw_desc *pdesc,
@@ -490,22 +486,22 @@ static inline void set_flow_mode(struct cc_hw_desc *pdesc,
pdesc->word[4] |= FIELD_PREP(WORD4_DATA_FLOW_MODE, mode);
}
-/*
- * Set the cipher mode.
+/**
+ * set_cipher_mode() - Set the cipher mode.
*
- * @pdesc: pointer HW descriptor struct
- * @mode: Any one of the modes defined in [CC7x-DESC]
+ * @pdesc: Pointer to HW descriptor struct
+ * @mode: Any one of the modes defined in [CC7x-DESC]
*/
static inline void set_cipher_mode(struct cc_hw_desc *pdesc, int mode)
{
pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_MODE, mode);
}
-/*
- * Set the cipher mode for hash algorithms.
+/**
+ * set_hash_cipher_mode() - Set the cipher mode for hash algorithms.
*
- * @pdesc: pointer HW descriptor struct
- * @cipher_mode: Any one of the modes defined in [CC7x-DESC]
+ * @pdesc: Pointer to HW descriptor struct
+ * @cipher_mode: Any one of the modes defined in [CC7x-DESC]
* @hash_mode: specifies which hash is being handled
*/
static inline void set_hash_cipher_mode(struct cc_hw_desc *pdesc,
@@ -517,10 +513,10 @@ static inline void set_hash_cipher_mode(struct cc_hw_desc *pdesc,
set_aes_xor_crypto_key(pdesc);
}
-/*
- * Set the cipher configuration fields.
+/**
+ * set_cipher_config0() - Set the cipher configuration fields.
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
* @mode: Any one of the modes defined in [CC7x-DESC]
*/
static inline void set_cipher_config0(struct cc_hw_desc *pdesc, int mode)
@@ -528,11 +524,11 @@ static inline void set_cipher_config0(struct cc_hw_desc *pdesc, int mode)
pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_CONF0, mode);
}
-/*
- * Set the cipher configuration fields.
+/**
+ * set_cipher_config1() - Set the cipher configuration fields.
*
- * @pdesc: pointer HW descriptor struct
- * @config: Any one of the modes defined in [CC7x-DESC]
+ * @pdesc: Pointer to HW descriptor struct
+ * @config: Padding mode
*/
static inline void set_cipher_config1(struct cc_hw_desc *pdesc,
enum cc_hash_conf_pad config)
@@ -540,10 +536,10 @@ static inline void set_cipher_config1(struct cc_hw_desc *pdesc,
pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_CONF1, config);
}
-/*
- * Set HW key configuration fields.
+/**
+ * set_hw_crypto_key() - Set HW key configuration fields.
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
* @hw_key: The HW key slot asdefined in enum cc_hw_crypto_key
*/
static inline void set_hw_crypto_key(struct cc_hw_desc *pdesc,
@@ -555,64 +551,64 @@ static inline void set_hw_crypto_key(struct cc_hw_desc *pdesc,
(hw_key >> HW_KEY_SHIFT_CIPHER_CFG2));
}
-/*
- * Set byte order of all setup-finalize descriptors.
+/**
+ * set_bytes_swap() - Set byte order of all setup-finalize descriptors.
*
- * @pdesc: pointer HW descriptor struct
- * @config: Any one of the modes defined in [CC7x-DESC]
+ * @pdesc: Pointer to HW descriptor struct
+ * @config: True to enable byte swapping
*/
static inline void set_bytes_swap(struct cc_hw_desc *pdesc, bool config)
{
pdesc->word[4] |= FIELD_PREP(WORD4_BYTES_SWAP, config);
}
-/*
- * Set CMAC_SIZE0 mode.
+/**
+ * set_cmac_size0_mode() - Set CMAC_SIZE0 mode.
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
*/
static inline void set_cmac_size0_mode(struct cc_hw_desc *pdesc)
{
pdesc->word[4] |= FIELD_PREP(WORD4_CMAC_SIZE0, 1);
}
-/*
- * Set key size descriptor field.
+/**
+ * set_key_size() - Set key size descriptor field.
*
- * @pdesc: pointer HW descriptor struct
- * @size: key size in bytes (NOT size code)
+ * @pdesc: Pointer to HW descriptor struct
+ * @size: Key size in bytes (NOT size code)
*/
static inline void set_key_size(struct cc_hw_desc *pdesc, u32 size)
{
pdesc->word[4] |= FIELD_PREP(WORD4_KEY_SIZE, size);
}
-/*
- * Set AES key size.
+/**
+ * set_key_size_aes() - Set AES key size.
*
- * @pdesc: pointer HW descriptor struct
- * @size: key size in bytes (NOT size code)
+ * @pdesc: Pointer to HW descriptor struct
+ * @size: Key size in bytes (NOT size code)
*/
static inline void set_key_size_aes(struct cc_hw_desc *pdesc, u32 size)
{
set_key_size(pdesc, ((size >> 3) - 2));
}
-/*
- * Set DES key size.
+/**
+ * set_key_size_des() - Set DES key size.
*
- * @pdesc: pointer HW descriptor struct
- * @size: key size in bytes (NOT size code)
+ * @pdesc: Pointer to HW descriptor struct
+ * @size: Key size in bytes (NOT size code)
*/
static inline void set_key_size_des(struct cc_hw_desc *pdesc, u32 size)
{
set_key_size(pdesc, ((size >> 3) - 1));
}
-/*
- * Set the descriptor setup mode
+/**
+ * set_setup_mode() - Set the descriptor setup mode
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
* @mode: Any one of the setup modes defined in [CC7x-DESC]
*/
static inline void set_setup_mode(struct cc_hw_desc *pdesc,
@@ -621,10 +617,10 @@ static inline void set_setup_mode(struct cc_hw_desc *pdesc,
pdesc->word[4] |= FIELD_PREP(WORD4_SETUP_OPERATION, mode);
}
-/*
- * Set the descriptor cipher DO
+/**
+ * set_cipher_do() - Set the descriptor cipher DO
*
- * @pdesc: pointer HW descriptor struct
+ * @pdesc: Pointer to HW descriptor struct
* @config: Any one of the cipher do defined in [CC7x-DESC]
*/
static inline void set_cipher_do(struct cc_hw_desc *pdesc,
diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c
index 24c368b866f6..d39e1664fc7e 100644
--- a/drivers/crypto/ccree/cc_pm.c
+++ b/drivers/crypto/ccree/cc_pm.c
@@ -15,29 +15,25 @@
#define POWER_DOWN_ENABLE 0x01
#define POWER_DOWN_DISABLE 0x00
-const struct dev_pm_ops ccree_pm = {
- SET_RUNTIME_PM_OPS(cc_pm_suspend, cc_pm_resume, NULL)
-};
-
-int cc_pm_suspend(struct device *dev)
+static int cc_pm_suspend(struct device *dev)
{
struct cc_drvdata *drvdata = dev_get_drvdata(dev);
dev_dbg(dev, "set HOST_POWER_DOWN_EN\n");
fini_cc_regs(drvdata);
cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_ENABLE);
- cc_clk_off(drvdata);
+ clk_disable_unprepare(drvdata->clk);
return 0;
}
-int cc_pm_resume(struct device *dev)
+static int cc_pm_resume(struct device *dev)
{
int rc;
struct cc_drvdata *drvdata = dev_get_drvdata(dev);
dev_dbg(dev, "unset HOST_POWER_DOWN_EN\n");
/* Enables the device source clk */
- rc = cc_clk_on(drvdata);
+ rc = clk_prepare_enable(drvdata->clk);
if (rc) {
dev_err(dev, "failed getting clock back on. We're toast.\n");
return rc;
@@ -62,53 +58,19 @@ int cc_pm_resume(struct device *dev)
return 0;
}
+const struct dev_pm_ops ccree_pm = {
+ SET_RUNTIME_PM_OPS(cc_pm_suspend, cc_pm_resume, NULL)
+};
+
int cc_pm_get(struct device *dev)
{
- int rc = 0;
- struct cc_drvdata *drvdata = dev_get_drvdata(dev);
-
- if (drvdata->pm_on)
- rc = pm_runtime_get_sync(dev);
+ int rc = pm_runtime_get_sync(dev);
return (rc == 1 ? 0 : rc);
}
void cc_pm_put_suspend(struct device *dev)
{
- struct cc_drvdata *drvdata = dev_get_drvdata(dev);
-
- if (drvdata->pm_on) {
- pm_runtime_mark_last_busy(dev);
- pm_runtime_put_autosuspend(dev);
- }
-}
-
-bool cc_pm_is_dev_suspended(struct device *dev)
-{
- /* check device state using runtime api */
- return pm_runtime_suspended(dev);
-}
-
-int cc_pm_init(struct cc_drvdata *drvdata)
-{
- struct device *dev = drvdata_to_dev(drvdata);
-
- /* must be before the enabling to avoid redundant suspending */
- pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT);
- pm_runtime_use_autosuspend(dev);
- /* set us as active - note we won't do PM ops until cc_pm_go()! */
- return pm_runtime_set_active(dev);
-}
-
-/* enable the PM module*/
-void cc_pm_go(struct cc_drvdata *drvdata)
-{
- pm_runtime_enable(drvdata_to_dev(drvdata));
- drvdata->pm_on = true;
-}
-
-void cc_pm_fini(struct cc_drvdata *drvdata)
-{
- pm_runtime_disable(drvdata_to_dev(drvdata));
- drvdata->pm_on = false;
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
}
diff --git a/drivers/crypto/ccree/cc_pm.h b/drivers/crypto/ccree/cc_pm.h
index 80a18e11cae4..50cac33de118 100644
--- a/drivers/crypto/ccree/cc_pm.h
+++ b/drivers/crypto/ccree/cc_pm.h
@@ -15,26 +15,11 @@
extern const struct dev_pm_ops ccree_pm;
-int cc_pm_init(struct cc_drvdata *drvdata);
-void cc_pm_go(struct cc_drvdata *drvdata);
-void cc_pm_fini(struct cc_drvdata *drvdata);
-int cc_pm_suspend(struct device *dev);
-int cc_pm_resume(struct device *dev);
int cc_pm_get(struct device *dev);
void cc_pm_put_suspend(struct device *dev);
-bool cc_pm_is_dev_suspended(struct device *dev);
#else
-static inline int cc_pm_init(struct cc_drvdata *drvdata)
-{
- return 0;
-}
-
-static inline void cc_pm_go(struct cc_drvdata *drvdata) {}
-
-static inline void cc_pm_fini(struct cc_drvdata *drvdata) {}
-
static inline int cc_pm_get(struct device *dev)
{
return 0;
@@ -42,12 +27,6 @@ static inline int cc_pm_get(struct device *dev)
static inline void cc_pm_put_suspend(struct device *dev) {}
-static inline bool cc_pm_is_dev_suspended(struct device *dev)
-{
- /* if PM not supported device is never suspend */
- return false;
-}
-
#endif
#endif /*__POWER_MGR_H__*/
diff --git a/drivers/crypto/ccree/cc_request_mgr.c b/drivers/crypto/ccree/cc_request_mgr.c
index 9d61e6f12478..1d7649ecf44e 100644
--- a/drivers/crypto/ccree/cc_request_mgr.c
+++ b/drivers/crypto/ccree/cc_request_mgr.c
@@ -206,12 +206,13 @@ static void enqueue_seq(struct cc_drvdata *drvdata, struct cc_hw_desc seq[],
}
}
-/*!
- * Completion will take place if and only if user requested completion
- * by cc_send_sync_request().
+/**
+ * request_mgr_complete() - Completion will take place if and only if user
+ * requested completion by cc_send_sync_request().
*
- * \param dev
- * \param dx_compl_h The completion event to signal
+ * @dev: Device pointer
+ * @dx_compl_h: The completion event to signal
+ * @dummy: unused error code
*/
static void request_mgr_complete(struct device *dev, void *dx_compl_h,
int dummy)
@@ -264,15 +265,15 @@ static int cc_queues_status(struct cc_drvdata *drvdata,
return -ENOSPC;
}
-/*!
- * Enqueue caller request to crypto hardware.
+/**
+ * cc_do_send_request() - Enqueue caller request to crypto hardware.
* Need to be called with HW lock held and PM running
*
- * \param drvdata
- * \param cc_req The request to enqueue
- * \param desc The crypto sequence
- * \param len The crypto sequence length
- * \param add_comp If "true": add an artificial dout DMA to mark completion
+ * @drvdata: Associated device driver context
+ * @cc_req: The request to enqueue
+ * @desc: The crypto sequence
+ * @len: The crypto sequence length
+ * @add_comp: If "true": add an artificial dout DMA to mark completion
*
*/
static void cc_do_send_request(struct cc_drvdata *drvdata,
@@ -295,7 +296,6 @@ static void cc_do_send_request(struct cc_drvdata *drvdata,
req_mgr_h->req_queue[req_mgr_h->req_queue_head] = *cc_req;
req_mgr_h->req_queue_head = (req_mgr_h->req_queue_head + 1) &
(MAX_REQUEST_QUEUE_SIZE - 1);
- /* TODO: Use circ_buf.h ? */
dev_dbg(dev, "Enqueue request head=%u\n", req_mgr_h->req_queue_head);
@@ -377,7 +377,7 @@ static void cc_proc_backlog(struct cc_drvdata *drvdata)
rc = cc_queues_status(drvdata, mgr, bli->len);
if (rc) {
/*
- * There is still not room in the FIFO for
+ * There is still no room in the FIFO for
* this request. Bail out. We'll return here
* on the next completion irq.
*/
@@ -476,10 +476,6 @@ int cc_send_sync_request(struct cc_drvdata *drvdata,
break;
spin_unlock_bh(&mgr->hw_lock);
- if (rc != -EAGAIN) {
- cc_pm_put_suspend(dev);
- return rc;
- }
wait_for_completion_interruptible(&drvdata->hw_queue_avail);
reinit_completion(&drvdata->hw_queue_avail);
}
@@ -490,16 +486,18 @@ int cc_send_sync_request(struct cc_drvdata *drvdata,
return 0;
}
-/*!
- * Enqueue caller request to crypto hardware during init process.
- * assume this function is not called in middle of a flow,
+/**
+ * send_request_init() - Enqueue caller request to crypto hardware during init
+ * process.
+ * Assume this function is not called in the middle of a flow,
* since we set QUEUE_LAST_IND flag in the last descriptor.
*
- * \param drvdata
- * \param desc The crypto sequence
- * \param len The crypto sequence length
+ * @drvdata: Associated device driver context
+ * @desc: The crypto sequence
+ * @len: The crypto sequence length
*
- * \return int Returns "0" upon success
+ * Return:
+ * Returns "0" upon success
*/
int send_request_init(struct cc_drvdata *drvdata, struct cc_hw_desc *desc,
unsigned int len)
diff --git a/drivers/crypto/ccree/cc_request_mgr.h b/drivers/crypto/ccree/cc_request_mgr.h
index ff7746aaaf35..ae25ca843dce 100644
--- a/drivers/crypto/ccree/cc_request_mgr.h
+++ b/drivers/crypto/ccree/cc_request_mgr.h
@@ -12,18 +12,17 @@
int cc_req_mgr_init(struct cc_drvdata *drvdata);
-/*!
- * Enqueue caller request to crypto hardware.
+/**
+ * cc_send_request() - Enqueue caller request to crypto hardware.
*
- * \param drvdata
- * \param cc_req The request to enqueue
- * \param desc The crypto sequence
- * \param len The crypto sequence length
- * \param is_dout If "true": completion is handled by the caller
- * If "false": this function adds a dummy descriptor completion
- * and waits upon completion signal.
+ * @drvdata: Associated device driver context
+ * @cc_req: The request to enqueue
+ * @desc: The crypto sequence
+ * @len: The crypto sequence length
+ * @req: Asynchronous crypto request
*
- * \return int Returns -EINPROGRESS or error
+ * Return:
+ * Returns -EINPROGRESS or error
*/
int cc_send_request(struct cc_drvdata *drvdata, struct cc_crypto_req *cc_req,
struct cc_hw_desc *desc, unsigned int len,
diff --git a/drivers/crypto/ccree/cc_sram_mgr.c b/drivers/crypto/ccree/cc_sram_mgr.c
index 62c885e6e791..37a95856361f 100644
--- a/drivers/crypto/ccree/cc_sram_mgr.c
+++ b/drivers/crypto/ccree/cc_sram_mgr.c
@@ -5,88 +5,61 @@
#include "cc_sram_mgr.h"
/**
- * struct cc_sram_ctx -Internal RAM context manager
- * @sram_free_offset: the offset to the non-allocated area
- */
-struct cc_sram_ctx {
- cc_sram_addr_t sram_free_offset;
-};
-
-/**
- * cc_sram_mgr_fini() - Cleanup SRAM pool.
- *
- * @drvdata: Associated device driver context
- */
-void cc_sram_mgr_fini(struct cc_drvdata *drvdata)
-{
- /* Nothing needed */
-}
-
-/**
* cc_sram_mgr_init() - Initializes SRAM pool.
* The pool starts right at the beginning of SRAM.
* Returns zero for success, negative value otherwise.
*
* @drvdata: Associated device driver context
+ *
+ * Return:
+ * 0 for success, negative error code for failure.
*/
int cc_sram_mgr_init(struct cc_drvdata *drvdata)
{
- struct cc_sram_ctx *ctx;
- dma_addr_t start = 0;
+ u32 start = 0;
struct device *dev = drvdata_to_dev(drvdata);
if (drvdata->hw_rev < CC_HW_REV_712) {
/* Pool starts after ROM bytes */
- start = (dma_addr_t)cc_ioread(drvdata,
- CC_REG(HOST_SEP_SRAM_THRESHOLD));
-
+ start = cc_ioread(drvdata, CC_REG(HOST_SEP_SRAM_THRESHOLD));
if ((start & 0x3) != 0) {
- dev_err(dev, "Invalid SRAM offset %pad\n", &start);
+ dev_err(dev, "Invalid SRAM offset 0x%x\n", start);
return -EINVAL;
}
}
- /* Allocate "this" context */
- ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
-
- if (!ctx)
- return -ENOMEM;
-
- ctx->sram_free_offset = start;
- drvdata->sram_mgr_handle = ctx;
-
+ drvdata->sram_free_offset = start;
return 0;
}
-/*!
- * Allocated buffer from SRAM pool.
- * Note: Caller is responsible to free the LAST allocated buffer.
- * This function does not taking care of any fragmentation may occur
- * by the order of calls to alloc/free.
+/**
+ * cc_sram_alloc() - Allocate buffer from SRAM pool.
+ *
+ * @drvdata: Associated device driver context
+ * @size: The requested numer of bytes to allocate
*
- * \param drvdata
- * \param size The requested bytes to allocate
+ * Return:
+ * Address offset in SRAM or NULL_SRAM_ADDR for failure.
*/
-cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size)
+u32 cc_sram_alloc(struct cc_drvdata *drvdata, u32 size)
{
- struct cc_sram_ctx *smgr_ctx = drvdata->sram_mgr_handle;
struct device *dev = drvdata_to_dev(drvdata);
- cc_sram_addr_t p;
+ u32 p;
if ((size & 0x3)) {
dev_err(dev, "Requested buffer size (%u) is not multiple of 4",
size);
return NULL_SRAM_ADDR;
}
- if (size > (CC_CC_SRAM_SIZE - smgr_ctx->sram_free_offset)) {
- dev_err(dev, "Not enough space to allocate %u B (at offset %llu)\n",
- size, smgr_ctx->sram_free_offset);
+ if (size > (CC_CC_SRAM_SIZE - drvdata->sram_free_offset)) {
+ dev_err(dev, "Not enough space to allocate %u B (at offset %u)\n",
+ size, drvdata->sram_free_offset);
return NULL_SRAM_ADDR;
}
- p = smgr_ctx->sram_free_offset;
- smgr_ctx->sram_free_offset += size;
- dev_dbg(dev, "Allocated %u B @ %u\n", size, (unsigned int)p);
+ p = drvdata->sram_free_offset;
+ drvdata->sram_free_offset += size;
+ dev_dbg(dev, "Allocated %u B @ %u\n", size, p);
return p;
}
@@ -97,13 +70,12 @@ cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size)
*
* @src: A pointer to array of words to set as consts.
* @dst: The target SRAM buffer to set into
- * @nelements: The number of words in "src" array
+ * @nelement: The number of words in "src" array
* @seq: A pointer to the given IN/OUT descriptor sequence
* @seq_len: A pointer to the given IN/OUT sequence length
*/
-void cc_set_sram_desc(const u32 *src, cc_sram_addr_t dst,
- unsigned int nelement, struct cc_hw_desc *seq,
- unsigned int *seq_len)
+void cc_set_sram_desc(const u32 *src, u32 dst, unsigned int nelement,
+ struct cc_hw_desc *seq, unsigned int *seq_len)
{
u32 i;
unsigned int idx = *seq_len;
diff --git a/drivers/crypto/ccree/cc_sram_mgr.h b/drivers/crypto/ccree/cc_sram_mgr.h
index 1d14de9ee8c3..1c965ef83002 100644
--- a/drivers/crypto/ccree/cc_sram_mgr.h
+++ b/drivers/crypto/ccree/cc_sram_mgr.h
@@ -10,42 +10,30 @@
struct cc_drvdata;
-/**
- * Address (offset) within CC internal SRAM
- */
-
-typedef u64 cc_sram_addr_t;
-
-#define NULL_SRAM_ADDR ((cc_sram_addr_t)-1)
+#define NULL_SRAM_ADDR ((u32)-1)
-/*!
- * Initializes SRAM pool.
+/**
+ * cc_sram_mgr_init() - Initializes SRAM pool.
* The first X bytes of SRAM are reserved for ROM usage, hence, pool
* starts right after X bytes.
*
- * \param drvdata
+ * @drvdata: Associated device driver context
*
- * \return int Zero for success, negative value otherwise.
+ * Return:
+ * Zero for success, negative value otherwise.
*/
int cc_sram_mgr_init(struct cc_drvdata *drvdata);
-/*!
- * Uninits SRAM pool.
+/**
+ * cc_sram_alloc() - Allocate buffer from SRAM pool.
*
- * \param drvdata
- */
-void cc_sram_mgr_fini(struct cc_drvdata *drvdata);
-
-/*!
- * Allocated buffer from SRAM pool.
- * Note: Caller is responsible to free the LAST allocated buffer.
- * This function does not taking care of any fragmentation may occur
- * by the order of calls to alloc/free.
+ * @drvdata: Associated device driver context
+ * @size: The requested bytes to allocate
*
- * \param drvdata
- * \param size The requested bytes to allocate
+ * Return:
+ * Address offset in SRAM or NULL_SRAM_ADDR for failure.
*/
-cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size);
+u32 cc_sram_alloc(struct cc_drvdata *drvdata, u32 size);
/**
* cc_set_sram_desc() - Create const descriptors sequence to
@@ -54,12 +42,11 @@ cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size);
*
* @src: A pointer to array of words to set as consts.
* @dst: The target SRAM buffer to set into
- * @nelements: The number of words in "src" array
+ * @nelement: The number of words in "src" array
* @seq: A pointer to the given IN/OUT descriptor sequence
* @seq_len: A pointer to the given IN/OUT sequence length
*/
-void cc_set_sram_desc(const u32 *src, cc_sram_addr_t dst,
- unsigned int nelement, struct cc_hw_desc *seq,
- unsigned int *seq_len);
+void cc_set_sram_desc(const u32 *src, u32 dst, unsigned int nelement,
+ struct cc_hw_desc *seq, unsigned int *seq_len);
#endif /*__CC_SRAM_MGR_H__*/
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index b4b9b22125d1..c29b80dd30d8 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -715,6 +715,52 @@ static int chcr_cipher_fallback(struct crypto_sync_skcipher *cipher,
return err;
}
+
+static inline int get_qidxs(struct crypto_async_request *req,
+ unsigned int *txqidx, unsigned int *rxqidx)
+{
+ struct crypto_tfm *tfm = req->tfm;
+ int ret = 0;
+
+ switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
+ case CRYPTO_ALG_TYPE_AEAD:
+ {
+ struct aead_request *aead_req =
+ container_of(req, struct aead_request, base);
+ struct chcr_aead_reqctx *reqctx = aead_request_ctx(aead_req);
+ *txqidx = reqctx->txqidx;
+ *rxqidx = reqctx->rxqidx;
+ break;
+ }
+ case CRYPTO_ALG_TYPE_SKCIPHER:
+ {
+ struct skcipher_request *sk_req =
+ container_of(req, struct skcipher_request, base);
+ struct chcr_skcipher_req_ctx *reqctx =
+ skcipher_request_ctx(sk_req);
+ *txqidx = reqctx->txqidx;
+ *rxqidx = reqctx->rxqidx;
+ break;
+ }
+ case CRYPTO_ALG_TYPE_AHASH:
+ {
+ struct ahash_request *ahash_req =
+ container_of(req, struct ahash_request, base);
+ struct chcr_ahash_req_ctx *reqctx =
+ ahash_request_ctx(ahash_req);
+ *txqidx = reqctx->txqidx;
+ *rxqidx = reqctx->rxqidx;
+ break;
+ }
+ default:
+ ret = -EINVAL;
+ /* should never get here */
+ BUG();
+ break;
+ }
+ return ret;
+}
+
static inline void create_wreq(struct chcr_context *ctx,
struct chcr_wr *chcr_req,
struct crypto_async_request *req,
@@ -725,7 +771,15 @@ static inline void create_wreq(struct chcr_context *ctx,
unsigned int lcb)
{
struct uld_ctx *u_ctx = ULD_CTX(ctx);
- int qid = u_ctx->lldi.rxq_ids[ctx->rx_qidx];
+ unsigned int tx_channel_id, rx_channel_id;
+ unsigned int txqidx = 0, rxqidx = 0;
+ unsigned int qid, fid;
+
+ get_qidxs(req, &txqidx, &rxqidx);
+ qid = u_ctx->lldi.rxq_ids[rxqidx];
+ fid = u_ctx->lldi.rxq_ids[0];
+ tx_channel_id = txqidx / ctx->txq_perchan;
+ rx_channel_id = rxqidx / ctx->rxq_perchan;
chcr_req->wreq.op_to_cctx_size = FILL_WR_OP_CCTX_SIZE;
@@ -734,15 +788,12 @@ static inline void create_wreq(struct chcr_context *ctx,
chcr_req->wreq.len16_pkd =
htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP(len16, 16)));
chcr_req->wreq.cookie = cpu_to_be64((uintptr_t)req);
- chcr_req->wreq.rx_chid_to_rx_q_id =
- FILL_WR_RX_Q_ID(ctx->tx_chan_id, qid,
- !!lcb, ctx->tx_qidx);
+ chcr_req->wreq.rx_chid_to_rx_q_id = FILL_WR_RX_Q_ID(rx_channel_id, qid,
+ !!lcb, txqidx);
- chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->tx_chan_id,
- qid);
+ chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(tx_channel_id, fid);
chcr_req->ulptx.len = htonl((DIV_ROUND_UP(len16, 16) -
- ((sizeof(chcr_req->wreq)) >> 4)));
-
+ ((sizeof(chcr_req->wreq)) >> 4)));
chcr_req->sc_imm.cmd_more = FILL_CMD_MORE(!imm);
chcr_req->sc_imm.len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) +
sizeof(chcr_req->key_ctx) + sc_len);
@@ -758,7 +809,8 @@ static inline void create_wreq(struct chcr_context *ctx,
static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(wrparam->req);
- struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm));
+ struct chcr_context *ctx = c_ctx(tfm);
+ struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
struct sk_buff *skb = NULL;
struct chcr_wr *chcr_req;
struct cpl_rx_phys_dsgl *phys_cpl;
@@ -771,7 +823,8 @@ static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam)
unsigned int kctx_len;
gfp_t flags = wrparam->req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
GFP_KERNEL : GFP_ATOMIC;
- struct adapter *adap = padap(c_ctx(tfm)->dev);
+ struct adapter *adap = padap(ctx->dev);
+ unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan;
nents = sg_nents_xlen(reqctx->dstsg, wrparam->bytes, CHCR_DST_SG_SIZE,
reqctx->dst_ofst);
@@ -791,7 +844,7 @@ static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam)
}
chcr_req = __skb_put_zero(skb, transhdr_len);
chcr_req->sec_cpl.op_ivinsrtofst =
- FILL_SEC_CPL_OP_IVINSR(c_ctx(tfm)->tx_chan_id, 2, 1);
+ FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 1);
chcr_req->sec_cpl.pldlen = htonl(IV + wrparam->bytes);
chcr_req->sec_cpl.aadstart_cipherstop_hi =
@@ -1086,8 +1139,12 @@ static int chcr_final_cipher_iv(struct skcipher_request *req,
if (subtype == CRYPTO_ALG_SUB_TYPE_CTR)
ctr_add_iv(iv, req->iv, DIV_ROUND_UP(reqctx->processed,
AES_BLOCK_SIZE));
- else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS)
- ret = chcr_update_tweak(req, iv, 1);
+ else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS) {
+ if (!reqctx->partial_req)
+ memcpy(iv, reqctx->iv, AES_BLOCK_SIZE);
+ else
+ ret = chcr_update_tweak(req, iv, 1);
+ }
else if (subtype == CRYPTO_ALG_SUB_TYPE_CBC) {
/*Already updated for Decrypt*/
if (!reqctx->op)
@@ -1102,12 +1159,13 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req,
unsigned char *input, int err)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chcr_context *ctx = c_ctx(tfm);
struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm));
struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm));
struct sk_buff *skb;
struct cpl_fw6_pld *fw6_pld = (struct cpl_fw6_pld *)input;
struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
- struct cipher_wr_param wrparam;
+ struct cipher_wr_param wrparam;
struct chcr_dev *dev = c_ctx(tfm)->dev;
int bytes;
@@ -1152,7 +1210,7 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req,
if (get_cryptoalg_subtype(tfm) ==
CRYPTO_ALG_SUB_TYPE_CTR)
bytes = adjust_ctr_overflow(reqctx->iv, bytes);
- wrparam.qid = u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx];
+ wrparam.qid = u_ctx->lldi.rxq_ids[reqctx->rxqidx];
wrparam.req = req;
wrparam.bytes = bytes;
skb = create_cipher_wr(&wrparam);
@@ -1162,14 +1220,24 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req,
goto unmap;
}
skb->dev = u_ctx->lldi.ports[0];
- set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx);
+ set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx);
chcr_send_wr(skb);
reqctx->last_req_len = bytes;
reqctx->processed += bytes;
+ if (get_cryptoalg_subtype(tfm) ==
+ CRYPTO_ALG_SUB_TYPE_CBC && req->base.flags ==
+ CRYPTO_TFM_REQ_MAY_SLEEP ) {
+ complete(&ctx->cbc_aes_aio_done);
+ }
return 0;
unmap:
chcr_cipher_dma_unmap(&ULD_CTX(c_ctx(tfm))->lldi.pdev->dev, req);
complete:
+ if (get_cryptoalg_subtype(tfm) ==
+ CRYPTO_ALG_SUB_TYPE_CBC && req->base.flags ==
+ CRYPTO_TFM_REQ_MAY_SLEEP ) {
+ complete(&ctx->cbc_aes_aio_done);
+ }
chcr_dec_wrcount(dev);
req->base.complete(&req->base, err);
return err;
@@ -1188,6 +1256,7 @@ static int process_cipher(struct skcipher_request *req,
int bytes, err = -EINVAL;
reqctx->processed = 0;
+ reqctx->partial_req = 0;
if (!req->iv)
goto error;
if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) ||
@@ -1278,6 +1347,7 @@ static int process_cipher(struct skcipher_request *req,
}
reqctx->processed = bytes;
reqctx->last_req_len = bytes;
+ reqctx->partial_req = !!(req->cryptlen - reqctx->processed);
return 0;
unmap:
@@ -1289,31 +1359,43 @@ error:
static int chcr_aes_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
struct chcr_dev *dev = c_ctx(tfm)->dev;
struct sk_buff *skb = NULL;
- int err, isfull = 0;
+ int err;
struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm));
+ struct chcr_context *ctx = c_ctx(tfm);
+ unsigned int cpu;
+
+ cpu = get_cpu();
+ reqctx->txqidx = cpu % ctx->ntxq;
+ reqctx->rxqidx = cpu % ctx->nrxq;
+ put_cpu();
err = chcr_inc_wrcount(dev);
if (err)
return -ENXIO;
if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
- c_ctx(tfm)->tx_qidx))) {
- isfull = 1;
- if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+ reqctx->txqidx) &&
+ (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) {
err = -ENOSPC;
goto error;
- }
}
- err = process_cipher(req, u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx],
+ err = process_cipher(req, u_ctx->lldi.rxq_ids[reqctx->rxqidx],
&skb, CHCR_ENCRYPT_OP);
if (err || !skb)
return err;
skb->dev = u_ctx->lldi.ports[0];
- set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx);
+ set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx);
chcr_send_wr(skb);
- return isfull ? -EBUSY : -EINPROGRESS;
+ if (get_cryptoalg_subtype(tfm) ==
+ CRYPTO_ALG_SUB_TYPE_CBC && req->base.flags ==
+ CRYPTO_TFM_REQ_MAY_SLEEP ) {
+ reqctx->partial_req = 1;
+ wait_for_completion(&ctx->cbc_aes_aio_done);
+ }
+ return -EINPROGRESS;
error:
chcr_dec_wrcount(dev);
return err;
@@ -1322,44 +1404,45 @@ error:
static int chcr_aes_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm));
struct chcr_dev *dev = c_ctx(tfm)->dev;
struct sk_buff *skb = NULL;
- int err, isfull = 0;
+ int err;
+ struct chcr_context *ctx = c_ctx(tfm);
+ unsigned int cpu;
+
+ cpu = get_cpu();
+ reqctx->txqidx = cpu % ctx->ntxq;
+ reqctx->rxqidx = cpu % ctx->nrxq;
+ put_cpu();
err = chcr_inc_wrcount(dev);
if (err)
return -ENXIO;
if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
- c_ctx(tfm)->tx_qidx))) {
- isfull = 1;
- if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+ reqctx->txqidx) &&
+ (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))))
return -ENOSPC;
- }
-
- err = process_cipher(req, u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx],
+ err = process_cipher(req, u_ctx->lldi.rxq_ids[reqctx->rxqidx],
&skb, CHCR_DECRYPT_OP);
if (err || !skb)
return err;
skb->dev = u_ctx->lldi.ports[0];
- set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx);
+ set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx);
chcr_send_wr(skb);
- return isfull ? -EBUSY : -EINPROGRESS;
+ return -EINPROGRESS;
}
-
static int chcr_device_init(struct chcr_context *ctx)
{
struct uld_ctx *u_ctx = NULL;
- unsigned int id;
- int txq_perchan, txq_idx, ntxq;
- int err = 0, rxq_perchan, rxq_idx;
+ int txq_perchan, ntxq;
+ int err = 0, rxq_perchan;
- id = smp_processor_id();
if (!ctx->dev) {
u_ctx = assign_chcr_device();
if (!u_ctx) {
- err = -ENXIO;
pr_err("chcr device assignment fails\n");
goto out;
}
@@ -1367,23 +1450,10 @@ static int chcr_device_init(struct chcr_context *ctx)
ntxq = u_ctx->lldi.ntxq;
rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan;
txq_perchan = ntxq / u_ctx->lldi.nchan;
- spin_lock(&ctx->dev->lock_chcr_dev);
- ctx->tx_chan_id = ctx->dev->tx_channel_id;
- ctx->dev->tx_channel_id =
- (ctx->dev->tx_channel_id + 1) % u_ctx->lldi.nchan;
- spin_unlock(&ctx->dev->lock_chcr_dev);
- rxq_idx = ctx->tx_chan_id * rxq_perchan;
- rxq_idx += id % rxq_perchan;
- txq_idx = ctx->tx_chan_id * txq_perchan;
- txq_idx += id % txq_perchan;
- ctx->rx_qidx = rxq_idx;
- ctx->tx_qidx = txq_idx;
- /* Channel Id used by SGE to forward packet to Host.
- * Same value should be used in cpl_fw6_pld RSS_CH field
- * by FW. Driver programs PCI channel ID to be used in fw
- * at the time of queue allocation with value "pi->tx_chan"
- */
- ctx->pci_chan_id = txq_idx / txq_perchan;
+ ctx->ntxq = ntxq;
+ ctx->nrxq = u_ctx->lldi.nrxq;
+ ctx->rxq_perchan = rxq_perchan;
+ ctx->txq_perchan = txq_perchan;
}
out:
return err;
@@ -1401,7 +1471,7 @@ static int chcr_init_tfm(struct crypto_skcipher *tfm)
pr_err("failed to allocate fallback for %s\n", alg->base.cra_name);
return PTR_ERR(ablkctx->sw_cipher);
}
-
+ init_completion(&ctx->cbc_aes_aio_done);
crypto_skcipher_set_reqsize(tfm, sizeof(struct chcr_skcipher_req_ctx));
return chcr_device_init(ctx);
@@ -1485,9 +1555,10 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req,
{
struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct hmac_ctx *hmacctx = HMAC_CTX(h_ctx(tfm));
+ struct chcr_context *ctx = h_ctx(tfm);
+ struct hmac_ctx *hmacctx = HMAC_CTX(ctx);
struct sk_buff *skb = NULL;
- struct uld_ctx *u_ctx = ULD_CTX(h_ctx(tfm));
+ struct uld_ctx *u_ctx = ULD_CTX(ctx);
struct chcr_wr *chcr_req;
struct ulptx_sgl *ulptx;
unsigned int nents = 0, transhdr_len;
@@ -1496,6 +1567,7 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req,
GFP_ATOMIC;
struct adapter *adap = padap(h_ctx(tfm)->dev);
int error = 0;
+ unsigned int rx_channel_id = req_ctx->rxqidx / ctx->rxq_perchan;
transhdr_len = HASH_TRANSHDR_SIZE(param->kctx_len);
req_ctx->hctx_wr.imm = (transhdr_len + param->bfr_len +
@@ -1513,7 +1585,8 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req,
chcr_req = __skb_put_zero(skb, transhdr_len);
chcr_req->sec_cpl.op_ivinsrtofst =
- FILL_SEC_CPL_OP_IVINSR(h_ctx(tfm)->tx_chan_id, 2, 0);
+ FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 0);
+
chcr_req->sec_cpl.pldlen = htonl(param->bfr_len + param->sg_len);
chcr_req->sec_cpl.aadstart_cipherstop_hi =
@@ -1576,16 +1649,22 @@ static int chcr_ahash_update(struct ahash_request *req)
{
struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req);
- struct uld_ctx *u_ctx = NULL;
+ struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm));
+ struct chcr_context *ctx = h_ctx(rtfm);
struct chcr_dev *dev = h_ctx(rtfm)->dev;
struct sk_buff *skb;
u8 remainder = 0, bs;
unsigned int nbytes = req->nbytes;
struct hash_wr_param params;
- int error, isfull = 0;
+ int error;
+ unsigned int cpu;
+
+ cpu = get_cpu();
+ req_ctx->txqidx = cpu % ctx->ntxq;
+ req_ctx->rxqidx = cpu % ctx->nrxq;
+ put_cpu();
bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
- u_ctx = ULD_CTX(h_ctx(rtfm));
if (nbytes + req_ctx->reqlen >= bs) {
remainder = (nbytes + req_ctx->reqlen) % bs;
@@ -1603,12 +1682,10 @@ static int chcr_ahash_update(struct ahash_request *req)
* inflight count for dev guarantees that lldi and padap is valid
*/
if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
- h_ctx(rtfm)->tx_qidx))) {
- isfull = 1;
- if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+ req_ctx->txqidx) &&
+ (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) {
error = -ENOSPC;
goto err;
- }
}
chcr_init_hctx_per_wr(req_ctx);
@@ -1650,10 +1727,9 @@ static int chcr_ahash_update(struct ahash_request *req)
}
req_ctx->reqlen = remainder;
skb->dev = u_ctx->lldi.ports[0];
- set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx);
+ set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx);
chcr_send_wr(skb);
-
- return isfull ? -EBUSY : -EINPROGRESS;
+ return -EINPROGRESS;
unmap:
chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req);
err:
@@ -1678,16 +1754,22 @@ static int chcr_ahash_final(struct ahash_request *req)
struct chcr_dev *dev = h_ctx(rtfm)->dev;
struct hash_wr_param params;
struct sk_buff *skb;
- struct uld_ctx *u_ctx = NULL;
+ struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm));
+ struct chcr_context *ctx = h_ctx(rtfm);
u8 bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
int error = -EINVAL;
+ unsigned int cpu;
+
+ cpu = get_cpu();
+ req_ctx->txqidx = cpu % ctx->ntxq;
+ req_ctx->rxqidx = cpu % ctx->nrxq;
+ put_cpu();
error = chcr_inc_wrcount(dev);
if (error)
return -ENXIO;
chcr_init_hctx_per_wr(req_ctx);
- u_ctx = ULD_CTX(h_ctx(rtfm));
if (is_hmac(crypto_ahash_tfm(rtfm)))
params.opad_needed = 1;
else
@@ -1727,7 +1809,7 @@ static int chcr_ahash_final(struct ahash_request *req)
}
req_ctx->reqlen = 0;
skb->dev = u_ctx->lldi.ports[0];
- set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx);
+ set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx);
chcr_send_wr(skb);
return -EINPROGRESS;
err:
@@ -1740,25 +1822,29 @@ static int chcr_ahash_finup(struct ahash_request *req)
struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req);
struct chcr_dev *dev = h_ctx(rtfm)->dev;
- struct uld_ctx *u_ctx = NULL;
+ struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm));
+ struct chcr_context *ctx = h_ctx(rtfm);
struct sk_buff *skb;
struct hash_wr_param params;
u8 bs;
- int error, isfull = 0;
+ int error;
+ unsigned int cpu;
+
+ cpu = get_cpu();
+ req_ctx->txqidx = cpu % ctx->ntxq;
+ req_ctx->rxqidx = cpu % ctx->nrxq;
+ put_cpu();
bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
- u_ctx = ULD_CTX(h_ctx(rtfm));
error = chcr_inc_wrcount(dev);
if (error)
return -ENXIO;
if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
- h_ctx(rtfm)->tx_qidx))) {
- isfull = 1;
- if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+ req_ctx->txqidx) &&
+ (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) {
error = -ENOSPC;
goto err;
- }
}
chcr_init_hctx_per_wr(req_ctx);
error = chcr_hash_dma_map(&u_ctx->lldi.pdev->dev, req);
@@ -1816,10 +1902,9 @@ static int chcr_ahash_finup(struct ahash_request *req)
req_ctx->reqlen = 0;
req_ctx->hctx_wr.processed += params.sg_len;
skb->dev = u_ctx->lldi.ports[0];
- set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx);
+ set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx);
chcr_send_wr(skb);
-
- return isfull ? -EBUSY : -EINPROGRESS;
+ return -EINPROGRESS;
unmap:
chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req);
err:
@@ -1832,11 +1917,18 @@ static int chcr_ahash_digest(struct ahash_request *req)
struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req);
struct chcr_dev *dev = h_ctx(rtfm)->dev;
- struct uld_ctx *u_ctx = NULL;
+ struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm));
+ struct chcr_context *ctx = h_ctx(rtfm);
struct sk_buff *skb;
struct hash_wr_param params;
u8 bs;
- int error, isfull = 0;
+ int error;
+ unsigned int cpu;
+
+ cpu = get_cpu();
+ req_ctx->txqidx = cpu % ctx->ntxq;
+ req_ctx->rxqidx = cpu % ctx->nrxq;
+ put_cpu();
rtfm->init(req);
bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
@@ -1844,14 +1936,11 @@ static int chcr_ahash_digest(struct ahash_request *req)
if (error)
return -ENXIO;
- u_ctx = ULD_CTX(h_ctx(rtfm));
if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
- h_ctx(rtfm)->tx_qidx))) {
- isfull = 1;
- if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+ req_ctx->txqidx) &&
+ (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) {
error = -ENOSPC;
goto err;
- }
}
chcr_init_hctx_per_wr(req_ctx);
@@ -1907,9 +1996,9 @@ static int chcr_ahash_digest(struct ahash_request *req)
}
req_ctx->hctx_wr.processed += params.sg_len;
skb->dev = u_ctx->lldi.ports[0];
- set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx);
+ set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx);
chcr_send_wr(skb);
- return isfull ? -EBUSY : -EINPROGRESS;
+ return -EINPROGRESS;
unmap:
chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req);
err:
@@ -1922,14 +2011,20 @@ static int chcr_ahash_continue(struct ahash_request *req)
struct chcr_ahash_req_ctx *reqctx = ahash_request_ctx(req);
struct chcr_hctx_per_wr *hctx_wr = &reqctx->hctx_wr;
struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req);
- struct uld_ctx *u_ctx = NULL;
+ struct chcr_context *ctx = h_ctx(rtfm);
+ struct uld_ctx *u_ctx = ULD_CTX(ctx);
struct sk_buff *skb;
struct hash_wr_param params;
u8 bs;
int error;
+ unsigned int cpu;
+
+ cpu = get_cpu();
+ reqctx->txqidx = cpu % ctx->ntxq;
+ reqctx->rxqidx = cpu % ctx->nrxq;
+ put_cpu();
bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
- u_ctx = ULD_CTX(h_ctx(rtfm));
get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
params.kctx_len = roundup(params.alg_prm.result_size, 16);
if (is_hmac(crypto_ahash_tfm(rtfm))) {
@@ -1969,7 +2064,7 @@ static int chcr_ahash_continue(struct ahash_request *req)
}
hctx_wr->processed += params.sg_len;
skb->dev = u_ctx->lldi.ports[0];
- set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx);
+ set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx);
chcr_send_wr(skb);
return 0;
err:
@@ -2315,7 +2410,8 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
int size)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
+ struct chcr_context *ctx = a_ctx(tfm);
+ struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx);
struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
struct sk_buff *skb = NULL;
@@ -2331,7 +2427,8 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
int null = 0;
gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
GFP_ATOMIC;
- struct adapter *adap = padap(a_ctx(tfm)->dev);
+ struct adapter *adap = padap(ctx->dev);
+ unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan;
if (req->cryptlen == 0)
return NULL;
@@ -2351,7 +2448,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
snents = sg_nents_xlen(req->src, req->assoclen + req->cryptlen,
CHCR_SRC_SG_SIZE, 0);
dst_size = get_space_for_phys_dsgl(dnents);
- kctx_len = (ntohl(KEY_CONTEXT_CTX_LEN_V(aeadctx->key_ctx_hdr)) << 4)
+ kctx_len = (KEY_CONTEXT_CTX_LEN_G(ntohl(aeadctx->key_ctx_hdr)) << 4)
- sizeof(chcr_req->key_ctx);
transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size);
reqctx->imm = (transhdr_len + req->assoclen + req->cryptlen) <
@@ -2383,7 +2480,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
* to the hardware spec
*/
chcr_req->sec_cpl.op_ivinsrtofst =
- FILL_SEC_CPL_OP_IVINSR(a_ctx(tfm)->tx_chan_id, 2, 1);
+ FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 1);
chcr_req->sec_cpl.pldlen = htonl(req->assoclen + IV + req->cryptlen);
chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
null ? 0 : 1 + IV,
@@ -2471,8 +2568,9 @@ int chcr_aead_dma_map(struct device *dev,
else
reqctx->b0_dma = 0;
if (req->src == req->dst) {
- error = dma_map_sg(dev, req->src, sg_nents(req->src),
- DMA_BIDIRECTIONAL);
+ error = dma_map_sg(dev, req->src,
+ sg_nents_for_len(req->src, dst_size),
+ DMA_BIDIRECTIONAL);
if (!error)
goto err;
} else {
@@ -2558,13 +2656,14 @@ void chcr_add_aead_dst_ent(struct aead_request *req,
unsigned int authsize = crypto_aead_authsize(tfm);
struct chcr_context *ctx = a_ctx(tfm);
u32 temp;
+ unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan;
dsgl_walk_init(&dsgl_walk, phys_cpl);
dsgl_walk_add_page(&dsgl_walk, IV + reqctx->b0_len, reqctx->iv_dma);
temp = req->assoclen + req->cryptlen +
(reqctx->op ? -authsize : authsize);
dsgl_walk_add_sg(&dsgl_walk, req->dst, temp, 0);
- dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id);
+ dsgl_walk_end(&dsgl_walk, qid, rx_channel_id);
}
void chcr_add_cipher_src_ent(struct skcipher_request *req,
@@ -2599,14 +2698,14 @@ void chcr_add_cipher_dst_ent(struct skcipher_request *req,
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(wrparam->req);
struct chcr_context *ctx = c_ctx(tfm);
struct dsgl_walk dsgl_walk;
+ unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan;
dsgl_walk_init(&dsgl_walk, phys_cpl);
dsgl_walk_add_sg(&dsgl_walk, reqctx->dstsg, wrparam->bytes,
reqctx->dst_ofst);
reqctx->dstsg = dsgl_walk.last_sg;
reqctx->dst_ofst = dsgl_walk.last_sg_len;
-
- dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id);
+ dsgl_walk_end(&dsgl_walk, qid, rx_channel_id);
}
void chcr_add_hash_src_ent(struct ahash_request *req,
@@ -2804,10 +2903,12 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl,
unsigned short op_type)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
+ struct chcr_context *ctx = a_ctx(tfm);
+ struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+ struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
unsigned int cipher_mode = CHCR_SCMD_CIPHER_MODE_AES_CCM;
unsigned int mac_mode = CHCR_SCMD_AUTH_MODE_CBCMAC;
- unsigned int c_id = a_ctx(tfm)->tx_chan_id;
+ unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan;
unsigned int ccm_xtra;
unsigned char tag_offset = 0, auth_offset = 0;
unsigned int assoclen;
@@ -2828,9 +2929,7 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl,
auth_offset = 0;
}
-
- sec_cpl->op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(c_id,
- 2, 1);
+ sec_cpl->op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 1);
sec_cpl->pldlen =
htonl(req->assoclen + IV + req->cryptlen + ccm_xtra);
/* For CCM there wil be b0 always. So AAD start will be 1 always */
@@ -2973,7 +3072,8 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
int size)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
+ struct chcr_context *ctx = a_ctx(tfm);
+ struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
struct sk_buff *skb = NULL;
struct chcr_wr *chcr_req;
@@ -2986,7 +3086,8 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
u8 *ivptr;
gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
GFP_ATOMIC;
- struct adapter *adap = padap(a_ctx(tfm)->dev);
+ struct adapter *adap = padap(ctx->dev);
+ unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan;
if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106)
assoclen = req->assoclen - 8;
@@ -3028,7 +3129,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
//Offset of tag from end
temp = (reqctx->op == CHCR_ENCRYPT_OP) ? 0 : authsize;
chcr_req->sec_cpl.op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(
- a_ctx(tfm)->tx_chan_id, 2, 1);
+ rx_channel_id, 2, 1);
chcr_req->sec_cpl.pldlen =
htonl(req->assoclen + IV + req->cryptlen);
chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
@@ -3576,9 +3677,9 @@ static int chcr_aead_op(struct aead_request *req,
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
- struct uld_ctx *u_ctx;
+ struct chcr_context *ctx = a_ctx(tfm);
+ struct uld_ctx *u_ctx = ULD_CTX(ctx);
struct sk_buff *skb;
- int isfull = 0;
struct chcr_dev *cdev;
cdev = a_ctx(tfm)->dev;
@@ -3594,18 +3695,15 @@ static int chcr_aead_op(struct aead_request *req,
return chcr_aead_fallback(req, reqctx->op);
}
- u_ctx = ULD_CTX(a_ctx(tfm));
if (cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
- a_ctx(tfm)->tx_qidx)) {
- isfull = 1;
- if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+ reqctx->txqidx) &&
+ (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))) {
chcr_dec_wrcount(cdev);
return -ENOSPC;
- }
}
/* Form a WR from req */
- skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[a_ctx(tfm)->rx_qidx], size);
+ skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[reqctx->rxqidx], size);
if (IS_ERR_OR_NULL(skb)) {
chcr_dec_wrcount(cdev);
@@ -3613,15 +3711,22 @@ static int chcr_aead_op(struct aead_request *req,
}
skb->dev = u_ctx->lldi.ports[0];
- set_wr_txq(skb, CPL_PRIORITY_DATA, a_ctx(tfm)->tx_qidx);
+ set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx);
chcr_send_wr(skb);
- return isfull ? -EBUSY : -EINPROGRESS;
+ return -EINPROGRESS;
}
static int chcr_aead_encrypt(struct aead_request *req)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+ struct chcr_context *ctx = a_ctx(tfm);
+ unsigned int cpu;
+
+ cpu = get_cpu();
+ reqctx->txqidx = cpu % ctx->ntxq;
+ reqctx->rxqidx = cpu % ctx->nrxq;
+ put_cpu();
reqctx->verify = VERIFY_HW;
reqctx->op = CHCR_ENCRYPT_OP;
@@ -3643,9 +3748,16 @@ static int chcr_aead_encrypt(struct aead_request *req)
static int chcr_aead_decrypt(struct aead_request *req)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
+ struct chcr_context *ctx = a_ctx(tfm);
+ struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
int size;
+ unsigned int cpu;
+
+ cpu = get_cpu();
+ reqctx->txqidx = cpu % ctx->ntxq;
+ reqctx->rxqidx = cpu % ctx->nrxq;
+ put_cpu();
if (aeadctx->mayverify == VERIFY_SW) {
size = crypto_aead_maxauthsize(tfm);
diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c
index dfb53e746e51..ffd4ec0c7374 100644
--- a/drivers/crypto/chelsio/chcr_core.c
+++ b/drivers/crypto/chelsio/chcr_core.c
@@ -195,6 +195,7 @@ static void *chcr_uld_add(const struct cxgb4_lld_info *lld)
struct uld_ctx *u_ctx;
/* Create the device and add it in the device list */
+ pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION);
if (!(lld->ulp_crypto & ULP_CRYPTO_LOOKASIDE))
return ERR_PTR(-EOPNOTSUPP);
@@ -287,6 +288,8 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state)
case CXGB4_STATE_DETACH:
chcr_detach_device(u_ctx);
+ if (!atomic_read(&drv_data.dev_count))
+ stop_crypto();
break;
case CXGB4_STATE_START_RECOVERY:
diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h
index b5b371b8d343..2c09672e00a4 100644
--- a/drivers/crypto/chelsio/chcr_core.h
+++ b/drivers/crypto/chelsio/chcr_core.h
@@ -43,7 +43,8 @@
#include "cxgb4_uld.h"
#define DRV_MODULE_NAME "chcr"
-#define DRV_VERSION "1.0.0.0"
+#define DRV_VERSION "1.0.0.0-ko"
+#define DRV_DESC "Chelsio T6 Crypto Co-processor Driver"
#define MAX_PENDING_REQ_TO_HW 20
#define CHCR_TEST_RESPONSE_TIMEOUT 1000
@@ -67,7 +68,7 @@ struct _key_ctx {
__be32 ctx_hdr;
u8 salt[MAX_SALT];
__be64 iv_to_auth;
- unsigned char key[0];
+ unsigned char key[];
};
#define KEYCTX_TX_WR_IV_S 55
@@ -147,7 +148,6 @@ struct chcr_dev {
int wqretry;
struct delayed_work detach_work;
struct completion detach_comp;
- unsigned char tx_channel_id;
};
struct uld_ctx {
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index 6db2df8c8a05..542bebae001f 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -187,6 +187,8 @@ struct chcr_aead_reqctx {
unsigned int op;
u16 imm;
u16 verify;
+ u16 txqidx;
+ u16 rxqidx;
u8 iv[CHCR_MAX_CRYPTO_IV_LEN + MAX_SCRATCH_PAD_SIZE];
u8 *scratch_pad;
};
@@ -250,10 +252,11 @@ struct __crypto_ctx {
struct chcr_context {
struct chcr_dev *dev;
- unsigned char tx_qidx;
- unsigned char rx_qidx;
- unsigned char tx_chan_id;
- unsigned char pci_chan_id;
+ unsigned char rxq_perchan;
+ unsigned char txq_perchan;
+ unsigned int ntxq;
+ unsigned int nrxq;
+ struct completion cbc_aes_aio_done;
struct __crypto_ctx crypto_ctx[0];
};
@@ -279,6 +282,8 @@ struct chcr_ahash_req_ctx {
u8 *skbfr;
/* SKB which is being sent to the hardware for processing */
u64 data_len; /* Data len till time */
+ u16 txqidx;
+ u16 rxqidx;
u8 reqlen;
u8 partial_hash[CHCR_HASH_MAX_DIGEST_SIZE];
u8 bfr1[CHCR_HASH_MAX_BLOCK_SIZE_128];
@@ -290,12 +295,15 @@ struct chcr_skcipher_req_ctx {
struct scatterlist *dstsg;
unsigned int processed;
unsigned int last_req_len;
+ unsigned int partial_req;
struct scatterlist *srcsg;
unsigned int src_ofst;
unsigned int dst_ofst;
unsigned int op;
u16 imm;
u8 iv[CHCR_MAX_CRYPTO_IV_LEN];
+ u16 txqidx;
+ u16 rxqidx;
};
struct chcr_alg_template {
diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c
index e1651adb9d06..dccef3a2908b 100644
--- a/drivers/crypto/chelsio/chtls/chtls_io.c
+++ b/drivers/crypto/chelsio/chtls/chtls_io.c
@@ -1110,10 +1110,10 @@ new_buf:
pg_size = page_size(page);
if (off < pg_size &&
skb_can_coalesce(skb, i, page, off)) {
- merge = 1;
+ merge = true;
goto copy;
}
- merge = 0;
+ merge = false;
if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) :
MAX_SKB_FRAGS))
goto new_buf;
@@ -1428,6 +1428,8 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
{
struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
struct chtls_hws *hws = &csk->tlshws;
+ struct net_device *dev = csk->egress_dev;
+ struct adapter *adap = netdev2adap(dev);
struct tcp_sock *tp = tcp_sk(sk);
unsigned long avail;
int buffers_freed;
@@ -1585,6 +1587,7 @@ skip_copy:
tp->copied_seq += skb->len;
hws->rcvpld = skb->hdr_len;
} else {
+ atomic_inc(&adap->chcr_stats.tls_pdu_rx);
tp->copied_seq += hws->rcvpld;
}
chtls_free_skb(sk, skb);
diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c
index a038de90b2ea..2110d0893bc7 100644
--- a/drivers/crypto/chelsio/chtls/chtls_main.c
+++ b/drivers/crypto/chelsio/chtls/chtls_main.c
@@ -174,9 +174,16 @@ static inline void chtls_dev_release(struct kref *kref)
{
struct tls_toe_device *dev;
struct chtls_dev *cdev;
+ struct adapter *adap;
dev = container_of(kref, struct tls_toe_device, kref);
cdev = to_chtls_dev(dev);
+
+ /* Reset tls rx/tx stats */
+ adap = pci_get_drvdata(cdev->pdev);
+ atomic_set(&adap->chcr_stats.tls_pdu_tx, 0);
+ atomic_set(&adap->chcr_stats.tls_pdu_rx, 0);
+
chtls_free_uld(cdev);
}
@@ -229,8 +236,7 @@ static void *chtls_uld_add(const struct cxgb4_lld_info *info)
struct chtls_dev *cdev;
int i, j;
- cdev = kzalloc(sizeof(*cdev) + info->nports *
- (sizeof(struct net_device *)), GFP_KERNEL);
+ cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
if (!cdev)
goto out;
diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig
index 8851161f722f..095850d01dcc 100644
--- a/drivers/crypto/hisilicon/Kconfig
+++ b/drivers/crypto/hisilicon/Kconfig
@@ -40,6 +40,7 @@ config CRYPTO_DEV_HISI_QM
tristate
depends on ARM64 || COMPILE_TEST
depends on PCI && PCI_MSI
+ depends on UACCE || UACCE=n
help
HiSilicon accelerator engines use a common queue management
interface. Specific engine driver may use this module.
@@ -49,6 +50,7 @@ config CRYPTO_DEV_HISI_ZIP
depends on PCI && PCI_MSI
depends on ARM64 || (COMPILE_TEST && 64BIT)
depends on !CPU_BIG_ENDIAN || COMPILE_TEST
+ depends on UACCE || UACCE=n
select CRYPTO_DEV_HISI_QM
help
Support for HiSilicon ZIP Driver
diff --git a/drivers/crypto/hisilicon/hpre/hpre.h b/drivers/crypto/hisilicon/hpre/hpre.h
index ddf13ea9862a..03d512ec6336 100644
--- a/drivers/crypto/hisilicon/hpre/hpre.h
+++ b/drivers/crypto/hisilicon/hpre/hpre.h
@@ -46,7 +46,6 @@ struct hpre_debug {
struct hpre {
struct hisi_qm qm;
- struct list_head list;
struct hpre_debug debug;
u32 num_vfs;
unsigned long status;
@@ -76,7 +75,7 @@ struct hpre_sqe {
__le32 rsvd1[_HPRE_SQE_ALIGN_EXT];
};
-struct hpre *hpre_find_device(int node);
+struct hisi_qp *hpre_create_qp(void);
int hpre_algs_register(void);
void hpre_algs_unregister(void);
diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index 5d400d69e8e4..65425250b2e9 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -147,26 +147,18 @@ static void hpre_rm_req_from_ctx(struct hpre_asym_request *hpre_req)
static struct hisi_qp *hpre_get_qp_and_start(void)
{
struct hisi_qp *qp;
- struct hpre *hpre;
int ret;
- /* find the proper hpre device, which is near the current CPU core */
- hpre = hpre_find_device(cpu_to_node(smp_processor_id()));
- if (!hpre) {
- pr_err("Can not find proper hpre device!\n");
- return ERR_PTR(-ENODEV);
- }
-
- qp = hisi_qm_create_qp(&hpre->qm, 0);
- if (IS_ERR(qp)) {
- pci_err(hpre->qm.pdev, "Can not create qp!\n");
+ qp = hpre_create_qp();
+ if (!qp) {
+ pr_err("Can not create hpre qp!\n");
return ERR_PTR(-ENODEV);
}
ret = hisi_qm_start_qp(qp, 0);
if (ret < 0) {
- hisi_qm_release_qp(qp);
- pci_err(hpre->qm.pdev, "Can not start qp!\n");
+ hisi_qm_free_qps(&qp, 1);
+ pci_err(qp->qm->pdev, "Can not start qp!\n");
return ERR_PTR(-EINVAL);
}
@@ -338,7 +330,7 @@ static void hpre_ctx_clear(struct hpre_ctx *ctx, bool is_clear_all)
if (is_clear_all) {
idr_destroy(&ctx->req_idr);
kfree(ctx->req_list);
- hisi_qm_release_qp(ctx->qp);
+ hisi_qm_free_qps(&ctx->qp, 1);
}
ctx->crt_g2_mode = false;
diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 401747de67a8..88be53bf4a38 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -82,8 +82,7 @@
#define HPRE_VIA_MSI_DSM 1
-static LIST_HEAD(hpre_list);
-static DEFINE_MUTEX(hpre_list_lock);
+static struct hisi_qm_list hpre_devices;
static const char hpre_name[] = "hisi_hpre";
static struct dentry *hpre_debugfs_root;
static const struct pci_device_id hpre_dev_ids[] = {
@@ -196,43 +195,17 @@ static u32 hpre_pf_q_num = HPRE_PF_DEF_Q_NUM;
module_param_cb(hpre_pf_q_num, &hpre_pf_q_num_ops, &hpre_pf_q_num, 0444);
MODULE_PARM_DESC(hpre_pf_q_num, "Number of queues in PF of CS(1-1024)");
-static inline void hpre_add_to_list(struct hpre *hpre)
+struct hisi_qp *hpre_create_qp(void)
{
- mutex_lock(&hpre_list_lock);
- list_add_tail(&hpre->list, &hpre_list);
- mutex_unlock(&hpre_list_lock);
-}
-
-static inline void hpre_remove_from_list(struct hpre *hpre)
-{
- mutex_lock(&hpre_list_lock);
- list_del(&hpre->list);
- mutex_unlock(&hpre_list_lock);
-}
+ int node = cpu_to_node(smp_processor_id());
+ struct hisi_qp *qp = NULL;
+ int ret;
-struct hpre *hpre_find_device(int node)
-{
- struct hpre *hpre, *ret = NULL;
- int min_distance = INT_MAX;
- struct device *dev;
- int dev_node = 0;
-
- mutex_lock(&hpre_list_lock);
- list_for_each_entry(hpre, &hpre_list, list) {
- dev = &hpre->qm.pdev->dev;
-#ifdef CONFIG_NUMA
- dev_node = dev->numa_node;
- if (dev_node < 0)
- dev_node = 0;
-#endif
- if (node_distance(dev_node, node) < min_distance) {
- ret = hpre;
- min_distance = node_distance(dev_node, node);
- }
- }
- mutex_unlock(&hpre_list_lock);
+ ret = hisi_qm_alloc_qps_node(&hpre_devices, 1, 0, node, &qp);
+ if (!ret)
+ return qp;
- return ret;
+ return NULL;
}
static int hpre_cfg_by_dsm(struct hisi_qm *qm)
@@ -349,18 +322,14 @@ static void hpre_cnt_regs_clear(struct hisi_qm *qm)
hisi_qm_debug_regs_clear(qm);
}
-static void hpre_hw_error_disable(struct hpre *hpre)
+static void hpre_hw_error_disable(struct hisi_qm *qm)
{
- struct hisi_qm *qm = &hpre->qm;
-
/* disable hpre hw error interrupts */
writel(HPRE_CORE_INT_DISABLE, qm->io_base + HPRE_INT_MASK);
}
-static void hpre_hw_error_enable(struct hpre *hpre)
+static void hpre_hw_error_enable(struct hisi_qm *qm)
{
- struct hisi_qm *qm = &hpre->qm;
-
/* enable hpre hw error interrupts */
writel(HPRE_CORE_INT_ENABLE, qm->io_base + HPRE_INT_MASK);
writel(HPRE_HAC_RAS_CE_ENABLE, qm->io_base + HPRE_RAS_CE_ENB);
@@ -713,13 +682,39 @@ static int hpre_qm_pre_init(struct hisi_qm *qm, struct pci_dev *pdev)
return 0;
}
-static void hpre_hw_err_init(struct hpre *hpre)
+static void hpre_log_hw_error(struct hisi_qm *qm, u32 err_sts)
{
- hisi_qm_hw_error_init(&hpre->qm, QM_BASE_CE, QM_BASE_NFE,
- 0, QM_DB_RANDOM_INVALID);
- hpre_hw_error_enable(hpre);
+ const struct hpre_hw_error *err = hpre_hw_errors;
+ struct device *dev = &qm->pdev->dev;
+
+ while (err->msg) {
+ if (err->int_msk & err_sts)
+ dev_warn(dev, "%s [error status=0x%x] found\n",
+ err->msg, err->int_msk);
+ err++;
+ }
+
+ writel(err_sts, qm->io_base + HPRE_HAC_SOURCE_INT);
+}
+
+static u32 hpre_get_hw_err_status(struct hisi_qm *qm)
+{
+ return readl(qm->io_base + HPRE_HAC_INT_STATUS);
}
+static const struct hisi_qm_err_ini hpre_err_ini = {
+ .hw_err_enable = hpre_hw_error_enable,
+ .hw_err_disable = hpre_hw_error_disable,
+ .get_dev_hw_err_status = hpre_get_hw_err_status,
+ .log_dev_hw_err = hpre_log_hw_error,
+ .err_info = {
+ .ce = QM_BASE_CE,
+ .nfe = QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT,
+ .fe = 0,
+ .msi = QM_DB_RANDOM_INVALID,
+ }
+};
+
static int hpre_pf_probe_init(struct hpre *hpre)
{
struct hisi_qm *qm = &hpre->qm;
@@ -731,7 +726,8 @@ static int hpre_pf_probe_init(struct hpre *hpre)
if (ret)
return ret;
- hpre_hw_err_init(hpre);
+ qm->err_ini = &hpre_err_ini;
+ hisi_qm_dev_err_init(qm);
return 0;
}
@@ -776,22 +772,21 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (ret)
dev_warn(&pdev->dev, "init debugfs fail!\n");
- hpre_add_to_list(hpre);
+ hisi_qm_add_to_list(qm, &hpre_devices);
ret = hpre_algs_register();
if (ret < 0) {
- hpre_remove_from_list(hpre);
pci_err(pdev, "fail to register algs to crypto!\n");
goto err_with_qm_start;
}
return 0;
err_with_qm_start:
+ hisi_qm_del_from_list(qm, &hpre_devices);
hisi_qm_stop(qm);
err_with_err_init:
- if (pdev->is_physfn)
- hpre_hw_error_disable(hpre);
+ hisi_qm_dev_err_uninit(qm);
err_with_qm_init:
hisi_qm_uninit(qm);
@@ -907,7 +902,7 @@ static void hpre_remove(struct pci_dev *pdev)
int ret;
hpre_algs_unregister();
- hpre_remove_from_list(hpre);
+ hisi_qm_del_from_list(qm, &hpre_devices);
if (qm->fun_type == QM_HW_PF && hpre->num_vfs != 0) {
ret = hpre_sriov_disable(pdev);
if (ret) {
@@ -922,69 +917,13 @@ static void hpre_remove(struct pci_dev *pdev)
hpre_debugfs_exit(hpre);
hisi_qm_stop(qm);
- if (qm->fun_type == QM_HW_PF)
- hpre_hw_error_disable(hpre);
+ hisi_qm_dev_err_uninit(qm);
hisi_qm_uninit(qm);
}
-static void hpre_log_hw_error(struct hpre *hpre, u32 err_sts)
-{
- const struct hpre_hw_error *err = hpre_hw_errors;
- struct device *dev = &hpre->qm.pdev->dev;
-
- while (err->msg) {
- if (err->int_msk & err_sts)
- dev_warn(dev, "%s [error status=0x%x] found\n",
- err->msg, err->int_msk);
- err++;
- }
-}
-
-static pci_ers_result_t hpre_hw_error_handle(struct hpre *hpre)
-{
- u32 err_sts;
-
- /* read err sts */
- err_sts = readl(hpre->qm.io_base + HPRE_HAC_INT_STATUS);
- if (err_sts) {
- hpre_log_hw_error(hpre, err_sts);
-
- /* clear error interrupts */
- writel(err_sts, hpre->qm.io_base + HPRE_HAC_SOURCE_INT);
- return PCI_ERS_RESULT_NEED_RESET;
- }
-
- return PCI_ERS_RESULT_RECOVERED;
-}
-
-static pci_ers_result_t hpre_process_hw_error(struct pci_dev *pdev)
-{
- struct hpre *hpre = pci_get_drvdata(pdev);
- pci_ers_result_t qm_ret, hpre_ret;
-
- /* log qm error */
- qm_ret = hisi_qm_hw_error_handle(&hpre->qm);
-
- /* log hpre error */
- hpre_ret = hpre_hw_error_handle(hpre);
-
- return (qm_ret == PCI_ERS_RESULT_NEED_RESET ||
- hpre_ret == PCI_ERS_RESULT_NEED_RESET) ?
- PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED;
-}
-
-static pci_ers_result_t hpre_error_detected(struct pci_dev *pdev,
- pci_channel_state_t state)
-{
- pci_info(pdev, "PCI error detected, state(=%d)!!\n", state);
- if (state == pci_channel_io_perm_failure)
- return PCI_ERS_RESULT_DISCONNECT;
-
- return hpre_process_hw_error(pdev);
-}
static const struct pci_error_handlers hpre_err_handler = {
- .error_detected = hpre_error_detected,
+ .error_detected = hisi_qm_dev_err_detected,
};
static struct pci_driver hpre_pci_driver = {
@@ -1013,6 +952,7 @@ static int __init hpre_init(void)
{
int ret;
+ hisi_qm_init_list(&hpre_devices);
hpre_register_debugfs();
ret = pci_register_driver(&hpre_pci_driver);
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index b57da5ef8b5b..f795fb557630 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -9,6 +9,9 @@
#include <linux/log2.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
+#include <linux/uacce.h>
+#include <linux/uaccess.h>
+#include <uapi/misc/uacce/hisi_qm.h>
#include "qm.h"
/* eq/aeq irq enable */
@@ -269,6 +272,12 @@ struct qm_doorbell {
__le16 priority;
};
+struct hisi_qm_resource {
+ struct hisi_qm *qm;
+ int distance;
+ struct list_head list;
+};
+
struct hisi_qm_hw_ops {
int (*get_vft)(struct hisi_qm *qm, u32 *base, u32 *number);
void (*qm_db)(struct hisi_qm *qm, u16 qn,
@@ -277,6 +286,7 @@ struct hisi_qm_hw_ops {
int (*debug_init)(struct hisi_qm *qm);
void (*hw_error_init)(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
u32 msi);
+ void (*hw_error_uninit)(struct hisi_qm *qm);
pci_ers_result_t (*hw_error_handle)(struct hisi_qm *qm);
};
@@ -465,9 +475,14 @@ static void qm_cq_head_update(struct hisi_qp *qp)
static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
{
- struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
+ if (qp->event_cb) {
+ qp->event_cb(qp);
+ return;
+ }
if (qp->req_cb) {
+ struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
+
while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
dma_rmb();
qp->req_cb(qp, qp->sqe + qm->sqe_size *
@@ -485,17 +500,9 @@ static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
}
}
-static void qm_qp_work_func(struct work_struct *work)
+static void qm_work_process(struct work_struct *work)
{
- struct hisi_qp *qp;
-
- qp = container_of(work, struct hisi_qp, work);
- qm_poll_qp(qp, qp->qm);
-}
-
-static irqreturn_t qm_irq_handler(int irq, void *data)
-{
- struct hisi_qm *qm = data;
+ struct hisi_qm *qm = container_of(work, struct hisi_qm, work);
struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
struct hisi_qp *qp;
int eqe_num = 0;
@@ -504,7 +511,7 @@ static irqreturn_t qm_irq_handler(int irq, void *data)
eqe_num++;
qp = qm_to_hisi_qp(qm, eqe);
if (qp)
- queue_work(qp->wq, &qp->work);
+ qm_poll_qp(qp, qm);
if (qm->status.eq_head == QM_Q_DEPTH - 1) {
qm->status.eqc_phase = !qm->status.eqc_phase;
@@ -522,6 +529,17 @@ static irqreturn_t qm_irq_handler(int irq, void *data)
}
qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
+}
+
+static irqreturn_t do_qm_irq(int irq, void *data)
+{
+ struct hisi_qm *qm = (struct hisi_qm *)data;
+
+ /* the workqueue created by device driver of QM */
+ if (qm->wq)
+ queue_work(qm->wq, &qm->work);
+ else
+ schedule_work(&qm->work);
return IRQ_HANDLED;
}
@@ -531,7 +549,7 @@ static irqreturn_t qm_irq(int irq, void *data)
struct hisi_qm *qm = data;
if (readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
- return qm_irq_handler(irq, data);
+ return do_qm_irq(irq, data);
dev_err(&qm->pdev->dev, "invalid int source\n");
qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
@@ -1011,43 +1029,45 @@ static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
writel(irq_unmask, qm->io_base + QM_ABNORMAL_INT_MASK);
}
+static void qm_hw_error_uninit_v2(struct hisi_qm *qm)
+{
+ writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK);
+}
+
static void qm_log_hw_error(struct hisi_qm *qm, u32 error_status)
{
- const struct hisi_qm_hw_error *err = qm_hw_error;
+ const struct hisi_qm_hw_error *err;
struct device *dev = &qm->pdev->dev;
u32 reg_val, type, vf_num;
+ int i;
- while (err->msg) {
- if (err->int_msk & error_status) {
- dev_err(dev, "%s [error status=0x%x] found\n",
- err->msg, err->int_msk);
-
- if (error_status & QM_DB_TIMEOUT) {
- reg_val = readl(qm->io_base +
- QM_ABNORMAL_INF01);
- type = (reg_val & QM_DB_TIMEOUT_TYPE) >>
- QM_DB_TIMEOUT_TYPE_SHIFT;
- vf_num = reg_val & QM_DB_TIMEOUT_VF;
- dev_err(dev, "qm %s doorbell timeout in function %u\n",
- qm_db_timeout[type], vf_num);
- }
-
- if (error_status & QM_OF_FIFO_OF) {
- reg_val = readl(qm->io_base +
- QM_ABNORMAL_INF00);
- type = (reg_val & QM_FIFO_OVERFLOW_TYPE) >>
- QM_FIFO_OVERFLOW_TYPE_SHIFT;
- vf_num = reg_val & QM_FIFO_OVERFLOW_VF;
-
- if (type < ARRAY_SIZE(qm_fifo_overflow))
- dev_err(dev, "qm %s fifo overflow in function %u\n",
- qm_fifo_overflow[type],
- vf_num);
- else
- dev_err(dev, "unknown error type\n");
- }
+ for (i = 0; i < ARRAY_SIZE(qm_hw_error); i++) {
+ err = &qm_hw_error[i];
+ if (!(err->int_msk & error_status))
+ continue;
+
+ dev_err(dev, "%s [error status=0x%x] found\n",
+ err->msg, err->int_msk);
+
+ if (err->int_msk & QM_DB_TIMEOUT) {
+ reg_val = readl(qm->io_base + QM_ABNORMAL_INF01);
+ type = (reg_val & QM_DB_TIMEOUT_TYPE) >>
+ QM_DB_TIMEOUT_TYPE_SHIFT;
+ vf_num = reg_val & QM_DB_TIMEOUT_VF;
+ dev_err(dev, "qm %s doorbell timeout in function %u\n",
+ qm_db_timeout[type], vf_num);
+ } else if (err->int_msk & QM_OF_FIFO_OF) {
+ reg_val = readl(qm->io_base + QM_ABNORMAL_INF00);
+ type = (reg_val & QM_FIFO_OVERFLOW_TYPE) >>
+ QM_FIFO_OVERFLOW_TYPE_SHIFT;
+ vf_num = reg_val & QM_FIFO_OVERFLOW_VF;
+
+ if (type < ARRAY_SIZE(qm_fifo_overflow))
+ dev_err(dev, "qm %s fifo overflow in function %u\n",
+ qm_fifo_overflow[type], vf_num);
+ else
+ dev_err(dev, "unknown error type\n");
}
- err++;
}
}
@@ -1082,6 +1102,7 @@ static const struct hisi_qm_hw_ops qm_hw_ops_v2 = {
.qm_db = qm_db_v2,
.get_irq_num = qm_get_irq_num_v2,
.hw_error_init = qm_hw_error_init_v2,
+ .hw_error_uninit = qm_hw_error_uninit_v2,
.hw_error_handle = qm_hw_error_handle_v2,
};
@@ -1147,20 +1168,9 @@ struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
qp->qp_id = qp_id;
qp->alg_type = alg_type;
- INIT_WORK(&qp->work, qm_qp_work_func);
- qp->wq = alloc_workqueue("hisi_qm", WQ_UNBOUND | WQ_HIGHPRI |
- WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0);
- if (!qp->wq) {
- ret = -EFAULT;
- goto err_free_qp_mem;
- }
return qp;
-err_free_qp_mem:
- if (qm->use_dma_api)
- dma_free_coherent(dev, qp->qdma.size, qp->qdma.va,
- qp->qdma.dma);
err_clear_bit:
write_lock(&qm->qps_lock);
qm->qp_array[qp_id] = NULL;
@@ -1269,7 +1279,7 @@ static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, int pasid)
* @qp: The qp we want to start to run.
* @arg: Accelerator specific argument.
*
- * After this function, qp can receive request from user. Return qp_id if
+ * After this function, qp can receive request from user. Return 0 if
* successful, Return -EBUSY if failed.
*/
int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg)
@@ -1314,7 +1324,7 @@ int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg)
dev_dbg(dev, "queue %d started\n", qp_id);
- return qp_id;
+ return 0;
}
EXPORT_SYMBOL_GPL(hisi_qm_start_qp);
@@ -1395,6 +1405,214 @@ static void hisi_qm_cache_wb(struct hisi_qm *qm)
}
}
+static void qm_qp_event_notifier(struct hisi_qp *qp)
+{
+ wake_up_interruptible(&qp->uacce_q->wait);
+}
+
+static int hisi_qm_get_available_instances(struct uacce_device *uacce)
+{
+ int i, ret;
+ struct hisi_qm *qm = uacce->priv;
+
+ read_lock(&qm->qps_lock);
+ for (i = 0, ret = 0; i < qm->qp_num; i++)
+ if (!qm->qp_array[i])
+ ret++;
+ read_unlock(&qm->qps_lock);
+
+ return ret;
+}
+
+static int hisi_qm_uacce_get_queue(struct uacce_device *uacce,
+ unsigned long arg,
+ struct uacce_queue *q)
+{
+ struct hisi_qm *qm = uacce->priv;
+ struct hisi_qp *qp;
+ u8 alg_type = 0;
+
+ qp = hisi_qm_create_qp(qm, alg_type);
+ if (IS_ERR(qp))
+ return PTR_ERR(qp);
+
+ q->priv = qp;
+ q->uacce = uacce;
+ qp->uacce_q = q;
+ qp->event_cb = qm_qp_event_notifier;
+ qp->pasid = arg;
+
+ return 0;
+}
+
+static void hisi_qm_uacce_put_queue(struct uacce_queue *q)
+{
+ struct hisi_qp *qp = q->priv;
+
+ hisi_qm_cache_wb(qp->qm);
+ hisi_qm_release_qp(qp);
+}
+
+/* map sq/cq/doorbell to user space */
+static int hisi_qm_uacce_mmap(struct uacce_queue *q,
+ struct vm_area_struct *vma,
+ struct uacce_qfile_region *qfr)
+{
+ struct hisi_qp *qp = q->priv;
+ struct hisi_qm *qm = qp->qm;
+ size_t sz = vma->vm_end - vma->vm_start;
+ struct pci_dev *pdev = qm->pdev;
+ struct device *dev = &pdev->dev;
+ unsigned long vm_pgoff;
+ int ret;
+
+ switch (qfr->type) {
+ case UACCE_QFRT_MMIO:
+ if (qm->ver == QM_HW_V2) {
+ if (sz > PAGE_SIZE * (QM_DOORBELL_PAGE_NR +
+ QM_DOORBELL_SQ_CQ_BASE_V2 / PAGE_SIZE))
+ return -EINVAL;
+ } else {
+ if (sz > PAGE_SIZE * QM_DOORBELL_PAGE_NR)
+ return -EINVAL;
+ }
+
+ vma->vm_flags |= VM_IO;
+
+ return remap_pfn_range(vma, vma->vm_start,
+ qm->phys_base >> PAGE_SHIFT,
+ sz, pgprot_noncached(vma->vm_page_prot));
+ case UACCE_QFRT_DUS:
+ if (sz != qp->qdma.size)
+ return -EINVAL;
+
+ /*
+ * dma_mmap_coherent() requires vm_pgoff as 0
+ * restore vm_pfoff to initial value for mmap()
+ */
+ vm_pgoff = vma->vm_pgoff;
+ vma->vm_pgoff = 0;
+ ret = dma_mmap_coherent(dev, vma, qp->qdma.va,
+ qp->qdma.dma, sz);
+ vma->vm_pgoff = vm_pgoff;
+ return ret;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+static int hisi_qm_uacce_start_queue(struct uacce_queue *q)
+{
+ struct hisi_qp *qp = q->priv;
+
+ return hisi_qm_start_qp(qp, qp->pasid);
+}
+
+static void hisi_qm_uacce_stop_queue(struct uacce_queue *q)
+{
+ hisi_qm_stop_qp(q->priv);
+}
+
+static int qm_set_sqctype(struct uacce_queue *q, u16 type)
+{
+ struct hisi_qm *qm = q->uacce->priv;
+ struct hisi_qp *qp = q->priv;
+
+ write_lock(&qm->qps_lock);
+ qp->alg_type = type;
+ write_unlock(&qm->qps_lock);
+
+ return 0;
+}
+
+static long hisi_qm_uacce_ioctl(struct uacce_queue *q, unsigned int cmd,
+ unsigned long arg)
+{
+ struct hisi_qp *qp = q->priv;
+ struct hisi_qp_ctx qp_ctx;
+
+ if (cmd == UACCE_CMD_QM_SET_QP_CTX) {
+ if (copy_from_user(&qp_ctx, (void __user *)arg,
+ sizeof(struct hisi_qp_ctx)))
+ return -EFAULT;
+
+ if (qp_ctx.qc_type != 0 && qp_ctx.qc_type != 1)
+ return -EINVAL;
+
+ qm_set_sqctype(q, qp_ctx.qc_type);
+ qp_ctx.id = qp->qp_id;
+
+ if (copy_to_user((void __user *)arg, &qp_ctx,
+ sizeof(struct hisi_qp_ctx)))
+ return -EFAULT;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct uacce_ops uacce_qm_ops = {
+ .get_available_instances = hisi_qm_get_available_instances,
+ .get_queue = hisi_qm_uacce_get_queue,
+ .put_queue = hisi_qm_uacce_put_queue,
+ .start_queue = hisi_qm_uacce_start_queue,
+ .stop_queue = hisi_qm_uacce_stop_queue,
+ .mmap = hisi_qm_uacce_mmap,
+ .ioctl = hisi_qm_uacce_ioctl,
+};
+
+static int qm_alloc_uacce(struct hisi_qm *qm)
+{
+ struct pci_dev *pdev = qm->pdev;
+ struct uacce_device *uacce;
+ unsigned long mmio_page_nr;
+ unsigned long dus_page_nr;
+ struct uacce_interface interface = {
+ .flags = UACCE_DEV_SVA,
+ .ops = &uacce_qm_ops,
+ };
+
+ strncpy(interface.name, pdev->driver->name, sizeof(interface.name));
+
+ uacce = uacce_alloc(&pdev->dev, &interface);
+ if (IS_ERR(uacce))
+ return PTR_ERR(uacce);
+
+ if (uacce->flags & UACCE_DEV_SVA) {
+ qm->use_sva = true;
+ } else {
+ /* only consider sva case */
+ uacce_remove(uacce);
+ qm->uacce = NULL;
+ return -EINVAL;
+ }
+
+ uacce->is_vf = pdev->is_virtfn;
+ uacce->priv = qm;
+ uacce->algs = qm->algs;
+
+ if (qm->ver == QM_HW_V1) {
+ mmio_page_nr = QM_DOORBELL_PAGE_NR;
+ uacce->api_ver = HISI_QM_API_VER_BASE;
+ } else {
+ mmio_page_nr = QM_DOORBELL_PAGE_NR +
+ QM_DOORBELL_SQ_CQ_BASE_V2 / PAGE_SIZE;
+ uacce->api_ver = HISI_QM_API_VER2_BASE;
+ }
+
+ dus_page_nr = (PAGE_SIZE - 1 + qm->sqe_size * QM_Q_DEPTH +
+ sizeof(struct qm_cqe) * QM_Q_DEPTH) >> PAGE_SHIFT;
+
+ uacce->qf_pg_num[UACCE_QFRT_MMIO] = mmio_page_nr;
+ uacce->qf_pg_num[UACCE_QFRT_DUS] = dus_page_nr;
+
+ qm->uacce = uacce;
+
+ return 0;
+}
+
/**
* hisi_qm_get_free_qp_num() - Get free number of qp in qm.
* @qm: The qm which want to get free qp.
@@ -1437,10 +1655,14 @@ int hisi_qm_init(struct hisi_qm *qm)
return -EINVAL;
}
+ ret = qm_alloc_uacce(qm);
+ if (ret < 0)
+ dev_warn(&pdev->dev, "fail to alloc uacce (%d)\n", ret);
+
ret = pci_enable_device_mem(pdev);
if (ret < 0) {
dev_err(&pdev->dev, "Failed to enable device mem!\n");
- return ret;
+ goto err_remove_uacce;
}
ret = pci_request_mem_regions(pdev, qm->dev_name);
@@ -1449,8 +1671,9 @@ int hisi_qm_init(struct hisi_qm *qm)
goto err_disable_pcidev;
}
- qm->io_base = ioremap(pci_resource_start(pdev, PCI_BAR_2),
- pci_resource_len(qm->pdev, PCI_BAR_2));
+ qm->phys_base = pci_resource_start(pdev, PCI_BAR_2);
+ qm->phys_size = pci_resource_len(qm->pdev, PCI_BAR_2);
+ qm->io_base = ioremap(qm->phys_base, qm->phys_size);
if (!qm->io_base) {
ret = -EIO;
goto err_release_mem_regions;
@@ -1479,6 +1702,7 @@ int hisi_qm_init(struct hisi_qm *qm)
qm->qp_in_used = 0;
mutex_init(&qm->mailbox_lock);
rwlock_init(&qm->qps_lock);
+ INIT_WORK(&qm->work, qm_work_process);
dev_dbg(dev, "init qm %s with %s\n", pdev->is_physfn ? "pf" : "vf",
qm->use_dma_api ? "dma api" : "iommu api");
@@ -1493,6 +1717,9 @@ err_release_mem_regions:
pci_release_mem_regions(pdev);
err_disable_pcidev:
pci_disable_device(pdev);
+err_remove_uacce:
+ uacce_remove(qm->uacce);
+ qm->uacce = NULL;
return ret;
}
@@ -1509,6 +1736,9 @@ void hisi_qm_uninit(struct hisi_qm *qm)
struct pci_dev *pdev = qm->pdev;
struct device *dev = &pdev->dev;
+ uacce_remove(qm->uacce);
+ qm->uacce = NULL;
+
if (qm->use_dma_api && qm->qdma.va) {
hisi_qm_cache_wb(qm);
dma_free_coherent(dev, qm->qdma.size,
@@ -1856,43 +2086,30 @@ void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
}
EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear);
-/**
- * hisi_qm_hw_error_init() - Configure qm hardware error report method.
- * @qm: The qm which we want to configure.
- * @ce: Bit mask of correctable error configure.
- * @nfe: Bit mask of non-fatal error configure.
- * @fe: Bit mask of fatal error configure.
- * @msi: Bit mask of error reported by message signal interrupt.
- *
- * Hardware errors of qm can be reported either by RAS interrupts which will
- * be handled by UEFI and then PCIe AER or by device MSI. User can configure
- * each error to use either of above two methods. For RAS interrupts, we can
- * configure an error as one of correctable error, non-fatal error or
- * fatal error.
- *
- * Bits indicating errors can be configured to ce, nfe, fe and msi to enable
- * related report methods. Error report will be masked if related error bit
- * does not configure.
- */
-void hisi_qm_hw_error_init(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
- u32 msi)
+static void qm_hw_error_init(struct hisi_qm *qm)
{
+ const struct hisi_qm_err_info *err_info = &qm->err_ini->err_info;
+
if (!qm->ops->hw_error_init) {
dev_err(&qm->pdev->dev, "QM doesn't support hw error handling!\n");
return;
}
- qm->ops->hw_error_init(qm, ce, nfe, fe, msi);
+ qm->ops->hw_error_init(qm, err_info->ce, err_info->nfe,
+ err_info->fe, err_info->msi);
}
-EXPORT_SYMBOL_GPL(hisi_qm_hw_error_init);
-/**
- * hisi_qm_hw_error_handle() - Handle qm non-fatal hardware errors.
- * @qm: The qm which has non-fatal hardware errors.
- *
- * Accelerators use this function to handle qm non-fatal hardware errors.
- */
-pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm)
+static void qm_hw_error_uninit(struct hisi_qm *qm)
+{
+ if (!qm->ops->hw_error_uninit) {
+ dev_err(&qm->pdev->dev, "Unexpected QM hw error uninit!\n");
+ return;
+ }
+
+ qm->ops->hw_error_uninit(qm);
+}
+
+static pci_ers_result_t qm_hw_error_handle(struct hisi_qm *qm)
{
if (!qm->ops->hw_error_handle) {
dev_err(&qm->pdev->dev, "QM doesn't support hw error report!\n");
@@ -1901,7 +2118,6 @@ pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm)
return qm->ops->hw_error_handle(qm);
}
-EXPORT_SYMBOL_GPL(hisi_qm_hw_error_handle);
/**
* hisi_qm_get_hw_version() - Get hardware version of a qm.
@@ -1922,6 +2138,229 @@ enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev)
}
EXPORT_SYMBOL_GPL(hisi_qm_get_hw_version);
+/**
+ * hisi_qm_dev_err_init() - Initialize device error configuration.
+ * @qm: The qm for which we want to do error initialization.
+ *
+ * Initialize QM and device error related configuration.
+ */
+void hisi_qm_dev_err_init(struct hisi_qm *qm)
+{
+ if (qm->fun_type == QM_HW_VF)
+ return;
+
+ qm_hw_error_init(qm);
+
+ if (!qm->err_ini->hw_err_enable) {
+ dev_err(&qm->pdev->dev, "Device doesn't support hw error init!\n");
+ return;
+ }
+ qm->err_ini->hw_err_enable(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_dev_err_init);
+
+/**
+ * hisi_qm_dev_err_uninit() - Uninitialize device error configuration.
+ * @qm: The qm for which we want to do error uninitialization.
+ *
+ * Uninitialize QM and device error related configuration.
+ */
+void hisi_qm_dev_err_uninit(struct hisi_qm *qm)
+{
+ if (qm->fun_type == QM_HW_VF)
+ return;
+
+ qm_hw_error_uninit(qm);
+
+ if (!qm->err_ini->hw_err_disable) {
+ dev_err(&qm->pdev->dev, "Unexpected device hw error uninit!\n");
+ return;
+ }
+ qm->err_ini->hw_err_disable(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_dev_err_uninit);
+
+/**
+ * hisi_qm_free_qps() - free multiple queue pairs.
+ * @qps: The queue pairs need to be freed.
+ * @qp_num: The num of queue pairs.
+ */
+void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num)
+{
+ int i;
+
+ if (!qps || qp_num <= 0)
+ return;
+
+ for (i = qp_num - 1; i >= 0; i--)
+ hisi_qm_release_qp(qps[i]);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_free_qps);
+
+static void free_list(struct list_head *head)
+{
+ struct hisi_qm_resource *res, *tmp;
+
+ list_for_each_entry_safe(res, tmp, head, list) {
+ list_del(&res->list);
+ kfree(res);
+ }
+}
+
+static int hisi_qm_sort_devices(int node, struct list_head *head,
+ struct hisi_qm_list *qm_list)
+{
+ struct hisi_qm_resource *res, *tmp;
+ struct hisi_qm *qm;
+ struct list_head *n;
+ struct device *dev;
+ int dev_node = 0;
+
+ list_for_each_entry(qm, &qm_list->list, list) {
+ dev = &qm->pdev->dev;
+
+ if (IS_ENABLED(CONFIG_NUMA)) {
+ dev_node = dev_to_node(dev);
+ if (dev_node < 0)
+ dev_node = 0;
+ }
+
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ return -ENOMEM;
+
+ res->qm = qm;
+ res->distance = node_distance(dev_node, node);
+ n = head;
+ list_for_each_entry(tmp, head, list) {
+ if (res->distance < tmp->distance) {
+ n = &tmp->list;
+ break;
+ }
+ }
+ list_add_tail(&res->list, n);
+ }
+
+ return 0;
+}
+
+/**
+ * hisi_qm_alloc_qps_node() - Create multiple queue pairs.
+ * @qm_list: The list of all available devices.
+ * @qp_num: The number of queue pairs need created.
+ * @alg_type: The algorithm type.
+ * @node: The numa node.
+ * @qps: The queue pairs need created.
+ *
+ * This function will sort all available device according to numa distance.
+ * Then try to create all queue pairs from one device, if all devices do
+ * not meet the requirements will return error.
+ */
+int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num,
+ u8 alg_type, int node, struct hisi_qp **qps)
+{
+ struct hisi_qm_resource *tmp;
+ int ret = -ENODEV;
+ LIST_HEAD(head);
+ int i;
+
+ if (!qps || !qm_list || qp_num <= 0)
+ return -EINVAL;
+
+ mutex_lock(&qm_list->lock);
+ if (hisi_qm_sort_devices(node, &head, qm_list)) {
+ mutex_unlock(&qm_list->lock);
+ goto err;
+ }
+
+ list_for_each_entry(tmp, &head, list) {
+ for (i = 0; i < qp_num; i++) {
+ qps[i] = hisi_qm_create_qp(tmp->qm, alg_type);
+ if (IS_ERR(qps[i])) {
+ hisi_qm_free_qps(qps, i);
+ break;
+ }
+ }
+
+ if (i == qp_num) {
+ ret = 0;
+ break;
+ }
+ }
+
+ mutex_unlock(&qm_list->lock);
+ if (ret)
+ pr_info("Failed to create qps, node[%d], alg[%d], qp[%d]!\n",
+ node, alg_type, qp_num);
+
+err:
+ free_list(&head);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_alloc_qps_node);
+
+static pci_ers_result_t qm_dev_err_handle(struct hisi_qm *qm)
+{
+ u32 err_sts;
+
+ if (!qm->err_ini->get_dev_hw_err_status) {
+ dev_err(&qm->pdev->dev, "Device doesn't support get hw error status!\n");
+ return PCI_ERS_RESULT_NONE;
+ }
+
+ /* get device hardware error status */
+ err_sts = qm->err_ini->get_dev_hw_err_status(qm);
+ if (err_sts) {
+ if (!qm->err_ini->log_dev_hw_err) {
+ dev_err(&qm->pdev->dev, "Device doesn't support log hw error!\n");
+ return PCI_ERS_RESULT_NEED_RESET;
+ }
+
+ qm->err_ini->log_dev_hw_err(qm, err_sts);
+ return PCI_ERS_RESULT_NEED_RESET;
+ }
+
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+static pci_ers_result_t qm_process_dev_error(struct pci_dev *pdev)
+{
+ struct hisi_qm *qm = pci_get_drvdata(pdev);
+ pci_ers_result_t qm_ret, dev_ret;
+
+ /* log qm error */
+ qm_ret = qm_hw_error_handle(qm);
+
+ /* log device error */
+ dev_ret = qm_dev_err_handle(qm);
+
+ return (qm_ret == PCI_ERS_RESULT_NEED_RESET ||
+ dev_ret == PCI_ERS_RESULT_NEED_RESET) ?
+ PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * hisi_qm_dev_err_detected() - Get device and qm error status then log it.
+ * @pdev: The PCI device which need report error.
+ * @state: The connectivity between CPU and device.
+ *
+ * We register this function into PCIe AER handlers, It will report device or
+ * qm hardware error status when error occur.
+ */
+pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ if (pdev->is_virtfn)
+ return PCI_ERS_RESULT_NONE;
+
+ pci_info(pdev, "PCI error detected, state(=%d)!!\n", state);
+ if (state == pci_channel_io_perm_failure)
+ return PCI_ERS_RESULT_DISCONNECT;
+
+ return qm_process_dev_error(pdev);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_dev_err_detected);
+
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Zhou Wang <wangzhou1@hisilicon.com>");
MODULE_DESCRIPTION("HiSilicon Accelerator queue manager driver");
diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h
index 078b8f1f1b77..ec5b6f48db6c 100644
--- a/drivers/crypto/hisilicon/qm.h
+++ b/drivers/crypto/hisilicon/qm.h
@@ -77,6 +77,9 @@
#define HISI_ACC_SGL_SGE_NR_MAX 255
+/* page number for queue file region */
+#define QM_DOORBELL_PAGE_NR 1
+
enum qp_state {
QP_STOP,
};
@@ -125,6 +128,28 @@ struct hisi_qm_status {
unsigned long flags;
};
+struct hisi_qm;
+
+struct hisi_qm_err_info {
+ u32 ce;
+ u32 nfe;
+ u32 fe;
+ u32 msi;
+};
+
+struct hisi_qm_err_ini {
+ void (*hw_err_enable)(struct hisi_qm *qm);
+ void (*hw_err_disable)(struct hisi_qm *qm);
+ u32 (*get_dev_hw_err_status)(struct hisi_qm *qm);
+ void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts);
+ struct hisi_qm_err_info err_info;
+};
+
+struct hisi_qm_list {
+ struct mutex lock;
+ struct list_head list;
+};
+
struct hisi_qm {
enum qm_hw_ver ver;
enum qm_fun_type fun_type;
@@ -136,6 +161,7 @@ struct hisi_qm {
u32 qp_num;
u32 qp_in_used;
u32 ctrl_qp_num;
+ struct list_head list;
struct qm_dma qdma;
struct qm_sqc *sqc;
@@ -148,6 +174,7 @@ struct hisi_qm {
dma_addr_t aeqe_dma;
struct hisi_qm_status status;
+ const struct hisi_qm_err_ini *err_ini;
rwlock_t qps_lock;
unsigned long *qp_bitmap;
@@ -162,7 +189,15 @@ struct hisi_qm {
u32 error_mask;
u32 msi_mask;
+ struct workqueue_struct *wq;
+ struct work_struct work;
+
+ const char *algs;
bool use_dma_api;
+ bool use_sva;
+ resource_size_t phys_base;
+ resource_size_t phys_size;
+ struct uacce_device *uacce;
};
struct hisi_qp_status {
@@ -192,12 +227,35 @@ struct hisi_qp {
struct hisi_qp_ops *hw_ops;
void *qp_ctx;
void (*req_cb)(struct hisi_qp *qp, void *data);
- struct work_struct work;
- struct workqueue_struct *wq;
+ void (*event_cb)(struct hisi_qp *qp);
struct hisi_qm *qm;
+ u16 pasid;
+ struct uacce_queue *uacce_q;
};
+static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list)
+{
+ INIT_LIST_HEAD(&qm_list->list);
+ mutex_init(&qm_list->lock);
+}
+
+static inline void hisi_qm_add_to_list(struct hisi_qm *qm,
+ struct hisi_qm_list *qm_list)
+{
+ mutex_lock(&qm_list->lock);
+ list_add_tail(&qm->list, &qm_list->list);
+ mutex_unlock(&qm_list->lock);
+}
+
+static inline void hisi_qm_del_from_list(struct hisi_qm *qm,
+ struct hisi_qm_list *qm_list)
+{
+ mutex_lock(&qm_list->lock);
+ list_del(&qm->list);
+ mutex_unlock(&qm_list->lock);
+}
+
int hisi_qm_init(struct hisi_qm *qm);
void hisi_qm_uninit(struct hisi_qm *qm);
int hisi_qm_start(struct hisi_qm *qm);
@@ -211,11 +269,12 @@ int hisi_qm_get_free_qp_num(struct hisi_qm *qm);
int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number);
int hisi_qm_set_vft(struct hisi_qm *qm, u32 fun_num, u32 base, u32 number);
int hisi_qm_debug_init(struct hisi_qm *qm);
-void hisi_qm_hw_error_init(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
- u32 msi);
-pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm);
enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev);
void hisi_qm_debug_regs_clear(struct hisi_qm *qm);
+void hisi_qm_dev_err_init(struct hisi_qm *qm);
+void hisi_qm_dev_err_uninit(struct hisi_qm *qm);
+pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev,
+ pci_channel_state_t state);
struct hisi_acc_sgl_pool;
struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev,
@@ -227,4 +286,7 @@ struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev,
u32 count, u32 sge_nr);
void hisi_acc_free_sgl_pool(struct device *dev,
struct hisi_acc_sgl_pool *pool);
+int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num,
+ u8 alg_type, int node, struct hisi_qp **qps);
+void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num);
#endif
diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index 13e2d8d7be94..3598fa17beb2 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -11,6 +11,8 @@
/* Algorithm resource per hardware SEC queue */
struct sec_alg_res {
+ u8 *pbuf;
+ dma_addr_t pbuf_dma;
u8 *c_ivin;
dma_addr_t c_ivin_dma;
u8 *out_mac;
@@ -23,6 +25,8 @@ struct sec_cipher_req {
dma_addr_t c_in_dma;
struct hisi_acc_hw_sgl *c_out;
dma_addr_t c_out_dma;
+ u8 *c_ivin;
+ dma_addr_t c_ivin_dma;
struct skcipher_request *sk_req;
u32 c_len;
bool encrypt;
@@ -48,6 +52,7 @@ struct sec_req {
/* Status of the SEC request */
bool fake_busy;
+ bool use_pbuf;
};
/**
@@ -114,6 +119,7 @@ struct sec_ctx {
struct sec_qp_ctx *qp_ctx;
struct sec_dev *sec;
const struct sec_req_op *req_op;
+ struct hisi_qp **qps;
/* Half queues for encipher, and half for decipher */
u32 hlf_q_num;
@@ -128,6 +134,7 @@ struct sec_ctx {
atomic_t dec_qcyclic;
enum sec_alg_type alg_type;
+ bool pbuf_supported;
struct sec_cipher_ctx c_ctx;
struct sec_auth_ctx a_ctx;
};
@@ -162,14 +169,15 @@ struct sec_debug {
struct sec_dev {
struct hisi_qm qm;
- struct list_head list;
struct sec_debug debug;
u32 ctx_q_num;
+ bool iommu_used;
u32 num_vfs;
unsigned long status;
};
-struct sec_dev *sec_find_device(int node);
+void sec_destroy_qps(struct hisi_qp **qps, int qp_num);
+struct hisi_qp **sec_create_qps(void);
int sec_register_to_crypto(void);
void sec_unregister_from_crypto(void);
#endif
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index a2cfcc9ccd94..7f1c6a31b82f 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -46,7 +46,21 @@
#define SEC_CIPHER_AUTH 0xfe
#define SEC_AUTH_CIPHER 0x1
#define SEC_MAX_MAC_LEN 64
+#define SEC_MAX_AAD_LEN 65535
#define SEC_TOTAL_MAC_SZ (SEC_MAX_MAC_LEN * QM_Q_DEPTH)
+
+#define SEC_PBUF_SZ 512
+#define SEC_PBUF_IV_OFFSET SEC_PBUF_SZ
+#define SEC_PBUF_MAC_OFFSET (SEC_PBUF_SZ + SEC_IV_SIZE)
+#define SEC_PBUF_PKG (SEC_PBUF_SZ + SEC_IV_SIZE + \
+ SEC_MAX_MAC_LEN * 2)
+#define SEC_PBUF_NUM (PAGE_SIZE / SEC_PBUF_PKG)
+#define SEC_PBUF_PAGE_NUM (QM_Q_DEPTH / SEC_PBUF_NUM)
+#define SEC_PBUF_LEFT_SZ (SEC_PBUF_PKG * (QM_Q_DEPTH - \
+ SEC_PBUF_PAGE_NUM * SEC_PBUF_NUM))
+#define SEC_TOTAL_PBUF_SZ (PAGE_SIZE * SEC_PBUF_PAGE_NUM + \
+ SEC_PBUF_LEFT_SZ)
+
#define SEC_SQE_LEN_RATE 4
#define SEC_SQE_CFLAG 2
#define SEC_SQE_AEAD_FLAG 3
@@ -110,12 +124,12 @@ static void sec_free_req_id(struct sec_req *req)
mutex_unlock(&qp_ctx->req_lock);
}
-static int sec_aead_verify(struct sec_req *req, struct sec_qp_ctx *qp_ctx)
+static int sec_aead_verify(struct sec_req *req)
{
struct aead_request *aead_req = req->aead_req.aead_req;
struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req);
- u8 *mac_out = qp_ctx->res[req->req_id].out_mac;
size_t authsize = crypto_aead_authsize(tfm);
+ u8 *mac_out = req->aead_req.out_mac;
u8 *mac = mac_out + SEC_MAX_MAC_LEN;
struct scatterlist *sgl = aead_req->src;
size_t sz;
@@ -163,7 +177,7 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp)
}
if (ctx->alg_type == SEC_AEAD && !req->c_req.encrypt)
- err = sec_aead_verify(req, qp_ctx);
+ err = sec_aead_verify(req);
atomic64_inc(&ctx->sec->debug.dfx.recv_cnt);
@@ -245,6 +259,50 @@ static void sec_free_mac_resource(struct device *dev, struct sec_alg_res *res)
res->out_mac, res->out_mac_dma);
}
+static void sec_free_pbuf_resource(struct device *dev, struct sec_alg_res *res)
+{
+ if (res->pbuf)
+ dma_free_coherent(dev, SEC_TOTAL_PBUF_SZ,
+ res->pbuf, res->pbuf_dma);
+}
+
+/*
+ * To improve performance, pbuffer is used for
+ * small packets (< 512Bytes) as IOMMU translation using.
+ */
+static int sec_alloc_pbuf_resource(struct device *dev, struct sec_alg_res *res)
+{
+ int pbuf_page_offset;
+ int i, j, k;
+
+ res->pbuf = dma_alloc_coherent(dev, SEC_TOTAL_PBUF_SZ,
+ &res->pbuf_dma, GFP_KERNEL);
+ if (!res->pbuf)
+ return -ENOMEM;
+
+ /*
+ * SEC_PBUF_PKG contains data pbuf, iv and
+ * out_mac : <SEC_PBUF|SEC_IV|SEC_MAC>
+ * Every PAGE contains six SEC_PBUF_PKG
+ * The sec_qp_ctx contains QM_Q_DEPTH numbers of SEC_PBUF_PKG
+ * So we need SEC_PBUF_PAGE_NUM numbers of PAGE
+ * for the SEC_TOTAL_PBUF_SZ
+ */
+ for (i = 0; i <= SEC_PBUF_PAGE_NUM; i++) {
+ pbuf_page_offset = PAGE_SIZE * i;
+ for (j = 0; j < SEC_PBUF_NUM; j++) {
+ k = i * SEC_PBUF_NUM + j;
+ if (k == QM_Q_DEPTH)
+ break;
+ res[k].pbuf = res->pbuf +
+ j * SEC_PBUF_PKG + pbuf_page_offset;
+ res[k].pbuf_dma = res->pbuf_dma +
+ j * SEC_PBUF_PKG + pbuf_page_offset;
+ }
+ }
+ return 0;
+}
+
static int sec_alg_resource_alloc(struct sec_ctx *ctx,
struct sec_qp_ctx *qp_ctx)
{
@@ -259,11 +317,18 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx,
if (ctx->alg_type == SEC_AEAD) {
ret = sec_alloc_mac_resource(dev, res);
if (ret)
- goto get_fail;
+ goto alloc_fail;
+ }
+ if (ctx->pbuf_supported) {
+ ret = sec_alloc_pbuf_resource(dev, res);
+ if (ret) {
+ dev_err(dev, "fail to alloc pbuf dma resource!\n");
+ goto alloc_fail;
+ }
}
return 0;
-get_fail:
+alloc_fail:
sec_free_civ_resource(dev, res);
return ret;
@@ -276,6 +341,8 @@ static void sec_alg_resource_free(struct sec_ctx *ctx,
sec_free_civ_resource(dev, qp_ctx->res);
+ if (ctx->pbuf_supported)
+ sec_free_pbuf_resource(dev, qp_ctx->res);
if (ctx->alg_type == SEC_AEAD)
sec_free_mac_resource(dev, qp_ctx->res);
}
@@ -288,11 +355,8 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
struct hisi_qp *qp;
int ret = -ENOMEM;
- qp = hisi_qm_create_qp(qm, alg_type);
- if (IS_ERR(qp))
- return PTR_ERR(qp);
-
qp_ctx = &ctx->qp_ctx[qp_ctx_id];
+ qp = ctx->qps[qp_ctx_id];
qp->req_type = 0;
qp->qp_ctx = qp_ctx;
qp->req_cb = sec_req_cb;
@@ -335,7 +399,6 @@ err_free_c_in_pool:
hisi_acc_free_sgl_pool(dev, qp_ctx->c_in_pool);
err_destroy_idr:
idr_destroy(&qp_ctx->req_idr);
- hisi_qm_release_qp(qp);
return ret;
}
@@ -352,7 +415,6 @@ static void sec_release_qp_ctx(struct sec_ctx *ctx,
hisi_acc_free_sgl_pool(dev, qp_ctx->c_in_pool);
idr_destroy(&qp_ctx->req_idr);
- hisi_qm_release_qp(qp_ctx->qp);
}
static int sec_ctx_base_init(struct sec_ctx *ctx)
@@ -360,14 +422,18 @@ static int sec_ctx_base_init(struct sec_ctx *ctx)
struct sec_dev *sec;
int i, ret;
- sec = sec_find_device(cpu_to_node(smp_processor_id()));
- if (!sec) {
- pr_err("Can not find proper Hisilicon SEC device!\n");
+ ctx->qps = sec_create_qps();
+ if (!ctx->qps) {
+ pr_err("Can not create sec qps!\n");
return -ENODEV;
}
+
+ sec = container_of(ctx->qps[0]->qm, struct sec_dev, qm);
ctx->sec = sec;
ctx->hlf_q_num = sec->ctx_q_num >> 1;
+ ctx->pbuf_supported = ctx->sec->iommu_used;
+
/* Half of queue depth is taken as fake requests limit in the queue. */
ctx->fake_req_limit = QM_Q_DEPTH >> 1;
ctx->qp_ctx = kcalloc(sec->ctx_q_num, sizeof(struct sec_qp_ctx),
@@ -386,6 +452,7 @@ err_sec_release_qp_ctx:
for (i = i - 1; i >= 0; i--)
sec_release_qp_ctx(ctx, &ctx->qp_ctx[i]);
+ sec_destroy_qps(ctx->qps, sec->ctx_q_num);
kfree(ctx->qp_ctx);
return ret;
}
@@ -397,6 +464,7 @@ static void sec_ctx_base_uninit(struct sec_ctx *ctx)
for (i = 0; i < ctx->sec->ctx_q_num; i++)
sec_release_qp_ctx(ctx, &ctx->qp_ctx[i]);
+ sec_destroy_qps(ctx->qps, ctx->sec->ctx_q_num);
kfree(ctx->qp_ctx);
}
@@ -447,7 +515,6 @@ static int sec_skcipher_init(struct crypto_skcipher *tfm)
struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
int ret;
- ctx = crypto_skcipher_ctx(tfm);
ctx->alg_type = SEC_SKCIPHER;
crypto_skcipher_set_reqsize(tfm, sizeof(struct sec_req));
ctx->c_ctx.ivsize = crypto_skcipher_ivsize(tfm);
@@ -591,11 +658,94 @@ GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC)
GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS)
GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC)
-static int sec_cipher_map(struct device *dev, struct sec_req *req,
+static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req,
+ struct scatterlist *src)
+{
+ struct aead_request *aead_req = req->aead_req.aead_req;
+ struct sec_cipher_req *c_req = &req->c_req;
+ struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+ struct device *dev = SEC_CTX_DEV(ctx);
+ int copy_size, pbuf_length;
+ int req_id = req->req_id;
+
+ if (ctx->alg_type == SEC_AEAD)
+ copy_size = aead_req->cryptlen + aead_req->assoclen;
+ else
+ copy_size = c_req->c_len;
+
+ pbuf_length = sg_copy_to_buffer(src, sg_nents(src),
+ qp_ctx->res[req_id].pbuf,
+ copy_size);
+
+ if (unlikely(pbuf_length != copy_size)) {
+ dev_err(dev, "copy src data to pbuf error!\n");
+ return -EINVAL;
+ }
+
+ c_req->c_in_dma = qp_ctx->res[req_id].pbuf_dma;
+
+ if (!c_req->c_in_dma) {
+ dev_err(dev, "fail to set pbuffer address!\n");
+ return -ENOMEM;
+ }
+
+ c_req->c_out_dma = c_req->c_in_dma;
+
+ return 0;
+}
+
+static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req,
+ struct scatterlist *dst)
+{
+ struct aead_request *aead_req = req->aead_req.aead_req;
+ struct sec_cipher_req *c_req = &req->c_req;
+ struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+ struct device *dev = SEC_CTX_DEV(ctx);
+ int copy_size, pbuf_length;
+ int req_id = req->req_id;
+
+ if (ctx->alg_type == SEC_AEAD)
+ copy_size = c_req->c_len + aead_req->assoclen;
+ else
+ copy_size = c_req->c_len;
+
+ pbuf_length = sg_copy_from_buffer(dst, sg_nents(dst),
+ qp_ctx->res[req_id].pbuf,
+ copy_size);
+
+ if (unlikely(pbuf_length != copy_size))
+ dev_err(dev, "copy pbuf data to dst error!\n");
+
+}
+
+static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
struct scatterlist *src, struct scatterlist *dst)
{
struct sec_cipher_req *c_req = &req->c_req;
+ struct sec_aead_req *a_req = &req->aead_req;
struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+ struct sec_alg_res *res = &qp_ctx->res[req->req_id];
+ struct device *dev = SEC_CTX_DEV(ctx);
+ int ret;
+
+ if (req->use_pbuf) {
+ ret = sec_cipher_pbuf_map(ctx, req, src);
+ c_req->c_ivin = res->pbuf + SEC_PBUF_IV_OFFSET;
+ c_req->c_ivin_dma = res->pbuf_dma + SEC_PBUF_IV_OFFSET;
+ if (ctx->alg_type == SEC_AEAD) {
+ a_req->out_mac = res->pbuf + SEC_PBUF_MAC_OFFSET;
+ a_req->out_mac_dma = res->pbuf_dma +
+ SEC_PBUF_MAC_OFFSET;
+ }
+
+ return ret;
+ }
+ c_req->c_ivin = res->c_ivin;
+ c_req->c_ivin_dma = res->c_ivin_dma;
+ if (ctx->alg_type == SEC_AEAD) {
+ a_req->out_mac = res->out_mac;
+ a_req->out_mac_dma = res->out_mac_dma;
+ }
c_req->c_in = hisi_acc_sg_buf_map_to_hw_sgl(dev, src,
qp_ctx->c_in_pool,
@@ -626,29 +776,34 @@ static int sec_cipher_map(struct device *dev, struct sec_req *req,
return 0;
}
-static void sec_cipher_unmap(struct device *dev, struct sec_cipher_req *req,
+static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req,
struct scatterlist *src, struct scatterlist *dst)
{
- if (dst != src)
- hisi_acc_sg_buf_unmap(dev, src, req->c_in);
+ struct sec_cipher_req *c_req = &req->c_req;
+ struct device *dev = SEC_CTX_DEV(ctx);
+
+ if (req->use_pbuf) {
+ sec_cipher_pbuf_unmap(ctx, req, dst);
+ } else {
+ if (dst != src)
+ hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
- hisi_acc_sg_buf_unmap(dev, dst, req->c_out);
+ hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
+ }
}
static int sec_skcipher_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
{
struct skcipher_request *sq = req->c_req.sk_req;
- return sec_cipher_map(SEC_CTX_DEV(ctx), req, sq->src, sq->dst);
+ return sec_cipher_map(ctx, req, sq->src, sq->dst);
}
static void sec_skcipher_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req)
{
- struct device *dev = SEC_CTX_DEV(ctx);
- struct sec_cipher_req *c_req = &req->c_req;
- struct skcipher_request *sk_req = c_req->sk_req;
+ struct skcipher_request *sq = req->c_req.sk_req;
- sec_cipher_unmap(dev, c_req, sk_req->src, sk_req->dst);
+ sec_cipher_unmap(ctx, req, sq->src, sq->dst);
}
static int sec_aead_aes_set_key(struct sec_cipher_ctx *c_ctx,
@@ -759,16 +914,14 @@ static int sec_aead_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
{
struct aead_request *aq = req->aead_req.aead_req;
- return sec_cipher_map(SEC_CTX_DEV(ctx), req, aq->src, aq->dst);
+ return sec_cipher_map(ctx, req, aq->src, aq->dst);
}
static void sec_aead_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req)
{
- struct device *dev = SEC_CTX_DEV(ctx);
- struct sec_cipher_req *cq = &req->c_req;
struct aead_request *aq = req->aead_req.aead_req;
- sec_cipher_unmap(dev, cq, aq->src, aq->dst);
+ sec_cipher_unmap(ctx, req, aq->src, aq->dst);
}
static int sec_request_transfer(struct sec_ctx *ctx, struct sec_req *req)
@@ -801,9 +954,9 @@ static void sec_request_untransfer(struct sec_ctx *ctx, struct sec_req *req)
static void sec_skcipher_copy_iv(struct sec_ctx *ctx, struct sec_req *req)
{
struct skcipher_request *sk_req = req->c_req.sk_req;
- u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin;
+ struct sec_cipher_req *c_req = &req->c_req;
- memcpy(c_ivin, sk_req->iv, ctx->c_ctx.ivsize);
+ memcpy(c_req->c_ivin, sk_req->iv, ctx->c_ctx.ivsize);
}
static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
@@ -818,8 +971,7 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
memset(sec_sqe, 0, sizeof(struct sec_sqe));
sec_sqe->type2.c_key_addr = cpu_to_le64(c_ctx->c_key_dma);
- sec_sqe->type2.c_ivin_addr =
- cpu_to_le64(req->qp_ctx->res[req->req_id].c_ivin_dma);
+ sec_sqe->type2.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma);
sec_sqe->type2.data_src_addr = cpu_to_le64(c_req->c_in_dma);
sec_sqe->type2.data_dst_addr = cpu_to_le64(c_req->c_out_dma);
@@ -836,7 +988,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
cipher = SEC_CIPHER_DEC << SEC_CIPHER_OFFSET;
sec_sqe->type_cipher_auth = bd_type | cipher;
- sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
+ if (req->use_pbuf)
+ sa_type = SEC_PBUF << SEC_SRC_SGL_OFFSET;
+ else
+ sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET;
if (c_req->c_in_dma != c_req->c_out_dma)
de = 0x1 << SEC_DE_OFFSET;
@@ -844,7 +999,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
sec_sqe->sds_sa_type = (de | scene | sa_type);
/* Just set DST address type */
- da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
+ if (req->use_pbuf)
+ da_type = SEC_PBUF << SEC_DST_SGL_OFFSET;
+ else
+ da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
sec_sqe->sdm_addr_type |= da_type;
sec_sqe->type2.clen_ivhlen |= cpu_to_le32(c_req->c_len);
@@ -904,9 +1062,9 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req,
static void sec_aead_copy_iv(struct sec_ctx *ctx, struct sec_req *req)
{
struct aead_request *aead_req = req->aead_req.aead_req;
- u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin;
+ struct sec_cipher_req *c_req = &req->c_req;
- memcpy(c_ivin, aead_req->iv, ctx->c_ctx.ivsize);
+ memcpy(c_req->c_ivin, aead_req->iv, ctx->c_ctx.ivsize);
}
static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir,
@@ -939,8 +1097,7 @@ static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir,
sec_sqe->type2.cipher_src_offset = cpu_to_le16((u16)aq->assoclen);
- sec_sqe->type2.mac_addr =
- cpu_to_le64(req->qp_ctx->res[req->req_id].out_mac_dma);
+ sec_sqe->type2.mac_addr = cpu_to_le64(a_req->out_mac_dma);
}
static int sec_aead_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
@@ -964,6 +1121,7 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err)
{
struct aead_request *a_req = req->aead_req.aead_req;
struct crypto_aead *tfm = crypto_aead_reqtfm(a_req);
+ struct sec_aead_req *aead_req = &req->aead_req;
struct sec_cipher_req *c_req = &req->c_req;
size_t authsize = crypto_aead_authsize(tfm);
struct sec_qp_ctx *qp_ctx = req->qp_ctx;
@@ -979,7 +1137,7 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err)
struct scatterlist *sgl = a_req->dst;
sz = sg_pcopy_from_buffer(sgl, sg_nents(sgl),
- qp_ctx->res[req->req_id].out_mac,
+ aead_req->out_mac,
authsize, a_req->cryptlen +
a_req->assoclen);
@@ -1031,6 +1189,7 @@ static int sec_request_init(struct sec_ctx *ctx, struct sec_req *req)
static int sec_process(struct sec_ctx *ctx, struct sec_req *req)
{
+ struct sec_cipher_req *c_req = &req->c_req;
int ret;
ret = sec_request_init(ctx, req);
@@ -1057,12 +1216,10 @@ err_send_req:
/* As failing, restore the IV from user */
if (ctx->c_ctx.c_mode == SEC_CMODE_CBC && !req->c_req.encrypt) {
if (ctx->alg_type == SEC_SKCIPHER)
- memcpy(req->c_req.sk_req->iv,
- req->qp_ctx->res[req->req_id].c_ivin,
+ memcpy(req->c_req.sk_req->iv, c_req->c_ivin,
ctx->c_ctx.ivsize);
else
- memcpy(req->aead_req.aead_req->iv,
- req->qp_ctx->res[req->req_id].c_ivin,
+ memcpy(req->aead_req.aead_req->iv, c_req->c_ivin,
ctx->c_ctx.ivsize);
}
@@ -1208,6 +1365,12 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
return -EINVAL;
}
sreq->c_req.c_len = sk_req->cryptlen;
+
+ if (ctx->pbuf_supported && sk_req->cryptlen <= SEC_PBUF_SZ)
+ sreq->use_pbuf = true;
+ else
+ sreq->use_pbuf = false;
+
if (c_alg == SEC_CALG_3DES) {
if (unlikely(sk_req->cryptlen & (DES3_EDE_BLOCK_SIZE - 1))) {
dev_err(dev, "skcipher 3des input length error!\n");
@@ -1321,11 +1484,18 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
size_t authsize = crypto_aead_authsize(tfm);
- if (unlikely(!req->src || !req->dst || !req->cryptlen)) {
+ if (unlikely(!req->src || !req->dst || !req->cryptlen ||
+ req->assoclen > SEC_MAX_AAD_LEN)) {
dev_err(SEC_CTX_DEV(ctx), "aead input param error!\n");
return -EINVAL;
}
+ if (ctx->pbuf_supported && (req->cryptlen + req->assoclen) <=
+ SEC_PBUF_SZ)
+ sreq->use_pbuf = true;
+ else
+ sreq->use_pbuf = false;
+
/* Support AES only */
if (unlikely(c_alg != SEC_CALG_AES)) {
dev_err(SEC_CTX_DEV(ctx), "aead crypto alg error!\n");
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 2bbaf1e2dae7..1f54ebe164b6 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -7,6 +7,7 @@
#include <linux/debugfs.h>
#include <linux/init.h>
#include <linux/io.h>
+#include <linux/iommu.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
@@ -89,8 +90,7 @@ struct sec_hw_error {
static const char sec_name[] = "hisi_sec2";
static struct dentry *sec_debugfs_root;
-static LIST_HEAD(sec_list);
-static DEFINE_MUTEX(sec_list_lock);
+static struct hisi_qm_list sec_devices;
static const struct sec_hw_error sec_hw_errors[] = {
{.int_msk = BIT(0), .msg = "sec_axi_rresp_err_rint"},
@@ -105,37 +105,6 @@ static const struct sec_hw_error sec_hw_errors[] = {
{ /* sentinel */ }
};
-struct sec_dev *sec_find_device(int node)
-{
-#define SEC_NUMA_MAX_DISTANCE 100
- int min_distance = SEC_NUMA_MAX_DISTANCE;
- int dev_node = 0, free_qp_num = 0;
- struct sec_dev *sec, *ret = NULL;
- struct hisi_qm *qm;
- struct device *dev;
-
- mutex_lock(&sec_list_lock);
- list_for_each_entry(sec, &sec_list, list) {
- qm = &sec->qm;
- dev = &qm->pdev->dev;
-#ifdef CONFIG_NUMA
- dev_node = dev->numa_node;
- if (dev_node < 0)
- dev_node = 0;
-#endif
- if (node_distance(dev_node, node) < min_distance) {
- free_qp_num = hisi_qm_get_free_qp_num(qm);
- if (free_qp_num >= sec->ctx_q_num) {
- ret = sec;
- min_distance = node_distance(dev_node, node);
- }
- }
- }
- mutex_unlock(&sec_list_lock);
-
- return ret;
-}
-
static const char * const sec_dbg_file_name[] = {
[SEC_CURRENT_QM] = "current_qm",
[SEC_CLEAR_ENABLE] = "clear_enable",
@@ -238,6 +207,32 @@ static u32 ctx_q_num = SEC_CTX_Q_NUM_DEF;
module_param_cb(ctx_q_num, &sec_ctx_q_num_ops, &ctx_q_num, 0444);
MODULE_PARM_DESC(ctx_q_num, "Queue num in ctx (24 default, 2, 4, ..., 32)");
+void sec_destroy_qps(struct hisi_qp **qps, int qp_num)
+{
+ hisi_qm_free_qps(qps, qp_num);
+ kfree(qps);
+}
+
+struct hisi_qp **sec_create_qps(void)
+{
+ int node = cpu_to_node(smp_processor_id());
+ u32 ctx_num = ctx_q_num;
+ struct hisi_qp **qps;
+ int ret;
+
+ qps = kcalloc(ctx_num, sizeof(struct hisi_qp *), GFP_KERNEL);
+ if (!qps)
+ return NULL;
+
+ ret = hisi_qm_alloc_qps_node(&sec_devices, ctx_num, 0, node, qps);
+ if (!ret)
+ return qps;
+
+ kfree(qps);
+ return NULL;
+}
+
+
static const struct pci_device_id sec_dev_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_PF_PCI_DEVICE_ID) },
{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_VF_PCI_DEVICE_ID) },
@@ -245,20 +240,6 @@ static const struct pci_device_id sec_dev_ids[] = {
};
MODULE_DEVICE_TABLE(pci, sec_dev_ids);
-static inline void sec_add_to_list(struct sec_dev *sec)
-{
- mutex_lock(&sec_list_lock);
- list_add_tail(&sec->list, &sec_list);
- mutex_unlock(&sec_list_lock);
-}
-
-static inline void sec_remove_from_list(struct sec_dev *sec)
-{
- mutex_lock(&sec_list_lock);
- list_del(&sec->list);
- mutex_unlock(&sec_list_lock);
-}
-
static u8 sec_get_endian(struct sec_dev *sec)
{
struct hisi_qm *qm = &sec->qm;
@@ -384,9 +365,8 @@ static void sec_debug_regs_clear(struct hisi_qm *qm)
hisi_qm_debug_regs_clear(qm);
}
-static void sec_hw_error_enable(struct sec_dev *sec)
+static void sec_hw_error_enable(struct hisi_qm *qm)
{
- struct hisi_qm *qm = &sec->qm;
u32 val;
if (qm->ver == QM_HW_V1) {
@@ -414,9 +394,8 @@ static void sec_hw_error_enable(struct sec_dev *sec)
writel(val, qm->io_base + SEC_CONTROL_REG);
}
-static void sec_hw_error_disable(struct sec_dev *sec)
+static void sec_hw_error_disable(struct hisi_qm *qm)
{
- struct hisi_qm *qm = &sec->qm;
u32 val;
val = readl(qm->io_base + SEC_CONTROL_REG);
@@ -435,27 +414,6 @@ static void sec_hw_error_disable(struct sec_dev *sec)
writel(val, qm->io_base + SEC_CONTROL_REG);
}
-static void sec_hw_error_init(struct sec_dev *sec)
-{
- if (sec->qm.fun_type == QM_HW_VF)
- return;
-
- hisi_qm_hw_error_init(&sec->qm, QM_BASE_CE,
- QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT
- | QM_ACC_WB_NOT_READY_TIMEOUT, 0,
- QM_DB_RANDOM_INVALID);
- sec_hw_error_enable(sec);
-}
-
-static void sec_hw_error_uninit(struct sec_dev *sec)
-{
- if (sec->qm.fun_type == QM_HW_VF)
- return;
-
- sec_hw_error_disable(sec);
- writel(GENMASK(12, 0), sec->qm.io_base + SEC_QM_ABNORMAL_INT_MASK);
-}
-
static u32 sec_current_qm_read(struct sec_debug_file *file)
{
struct hisi_qm *qm = file->qm;
@@ -695,6 +653,51 @@ static void sec_debugfs_exit(struct sec_dev *sec)
debugfs_remove_recursive(sec->qm.debug.debug_root);
}
+static void sec_log_hw_error(struct hisi_qm *qm, u32 err_sts)
+{
+ const struct sec_hw_error *errs = sec_hw_errors;
+ struct device *dev = &qm->pdev->dev;
+ u32 err_val;
+
+ while (errs->msg) {
+ if (errs->int_msk & err_sts) {
+ dev_err(dev, "%s [error status=0x%x] found\n",
+ errs->msg, errs->int_msk);
+
+ if (SEC_CORE_INT_STATUS_M_ECC & errs->int_msk) {
+ err_val = readl(qm->io_base +
+ SEC_CORE_SRAM_ECC_ERR_INFO);
+ dev_err(dev, "multi ecc sram num=0x%x\n",
+ SEC_ECC_NUM(err_val));
+ dev_err(dev, "multi ecc sram addr=0x%x\n",
+ SEC_ECC_ADDR(err_val));
+ }
+ }
+ errs++;
+ }
+
+ writel(err_sts, qm->io_base + SEC_CORE_INT_SOURCE);
+}
+
+static u32 sec_get_hw_err_status(struct hisi_qm *qm)
+{
+ return readl(qm->io_base + SEC_CORE_INT_STATUS);
+}
+
+static const struct hisi_qm_err_ini sec_err_ini = {
+ .hw_err_enable = sec_hw_error_enable,
+ .hw_err_disable = sec_hw_error_disable,
+ .get_dev_hw_err_status = sec_get_hw_err_status,
+ .log_dev_hw_err = sec_log_hw_error,
+ .err_info = {
+ .ce = QM_BASE_CE,
+ .nfe = QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT |
+ QM_ACC_WB_NOT_READY_TIMEOUT,
+ .fe = 0,
+ .msi = QM_DB_RANDOM_INVALID,
+ }
+};
+
static int sec_pf_probe_init(struct sec_dev *sec)
{
struct hisi_qm *qm = &sec->qm;
@@ -713,11 +716,13 @@ static int sec_pf_probe_init(struct sec_dev *sec)
return -EINVAL;
}
+ qm->err_ini = &sec_err_ini;
+
ret = sec_set_user_domain_and_cache(sec);
if (ret)
return ret;
- sec_hw_error_init(sec);
+ hisi_qm_dev_err_init(qm);
sec_debug_regs_clear(qm);
return 0;
@@ -750,12 +755,30 @@ static void sec_qm_uninit(struct hisi_qm *qm)
static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec)
{
+ int ret;
+
+ /*
+ * WQ_HIGHPRI: SEC request must be low delayed,
+ * so need a high priority workqueue.
+ * WQ_UNBOUND: SEC task is likely with long
+ * running CPU intensive workloads.
+ */
+ qm->wq = alloc_workqueue("%s", WQ_HIGHPRI |
+ WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus(),
+ pci_name(qm->pdev));
+ if (!qm->wq) {
+ pci_err(qm->pdev, "fail to alloc workqueue\n");
+ return -ENOMEM;
+ }
+
if (qm->fun_type == QM_HW_PF) {
qm->qp_base = SEC_PF_DEF_Q_BASE;
qm->qp_num = pf_q_num;
qm->debug.curr_qm_qp_num = pf_q_num;
- return sec_pf_probe_init(sec);
+ ret = sec_pf_probe_init(sec);
+ if (ret)
+ goto err_probe_uninit;
} else if (qm->fun_type == QM_HW_VF) {
/*
* have no way to get qm configure in VM in v1 hardware,
@@ -768,18 +791,43 @@ static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec)
qm->qp_num = SEC_QUEUE_NUM_V1 - SEC_PF_DEF_Q_NUM;
} else if (qm->ver == QM_HW_V2) {
/* v2 starts to support get vft by mailbox */
- return hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
+ ret = hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
+ if (ret)
+ goto err_probe_uninit;
}
} else {
- return -ENODEV;
+ ret = -ENODEV;
+ goto err_probe_uninit;
}
return 0;
+err_probe_uninit:
+ destroy_workqueue(qm->wq);
+ return ret;
}
-static void sec_probe_uninit(struct sec_dev *sec)
+static void sec_probe_uninit(struct hisi_qm *qm)
{
- sec_hw_error_uninit(sec);
+ hisi_qm_dev_err_uninit(qm);
+
+ destroy_workqueue(qm->wq);
+}
+
+static void sec_iommu_used_check(struct sec_dev *sec)
+{
+ struct iommu_domain *domain;
+ struct device *dev = &sec->qm.pdev->dev;
+
+ domain = iommu_get_domain_for_dev(dev);
+
+ /* Check if iommu is used */
+ sec->iommu_used = false;
+ if (domain) {
+ if (domain->type & __IOMMU_DOMAIN_PAGING)
+ sec->iommu_used = true;
+ dev_info(dev, "SMMU Opened, the iommu type = %u\n",
+ domain->type);
+ }
}
static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -795,6 +843,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
pci_set_drvdata(pdev, sec);
sec->ctx_q_num = ctx_q_num;
+ sec_iommu_used_check(sec);
qm = &sec->qm;
@@ -820,7 +869,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (ret)
pci_warn(pdev, "Failed to init debugfs!\n");
- sec_add_to_list(sec);
+ hisi_qm_add_to_list(qm, &sec_devices);
ret = sec_register_to_crypto();
if (ret < 0) {
@@ -831,12 +880,12 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return 0;
err_remove_from_list:
- sec_remove_from_list(sec);
+ hisi_qm_del_from_list(qm, &sec_devices);
sec_debugfs_exit(sec);
hisi_qm_stop(qm);
err_probe_uninit:
- sec_probe_uninit(sec);
+ sec_probe_uninit(qm);
err_qm_uninit:
sec_qm_uninit(qm);
@@ -955,7 +1004,7 @@ static void sec_remove(struct pci_dev *pdev)
sec_unregister_from_crypto();
- sec_remove_from_list(sec);
+ hisi_qm_del_from_list(qm, &sec_devices);
if (qm->fun_type == QM_HW_PF && sec->num_vfs)
(void)sec_sriov_disable(pdev);
@@ -967,89 +1016,13 @@ static void sec_remove(struct pci_dev *pdev)
if (qm->fun_type == QM_HW_PF)
sec_debug_regs_clear(qm);
- sec_probe_uninit(sec);
+ sec_probe_uninit(qm);
sec_qm_uninit(qm);
}
-static void sec_log_hw_error(struct sec_dev *sec, u32 err_sts)
-{
- const struct sec_hw_error *errs = sec_hw_errors;
- struct device *dev = &sec->qm.pdev->dev;
- u32 err_val;
-
- while (errs->msg) {
- if (errs->int_msk & err_sts) {
- dev_err(dev, "%s [error status=0x%x] found\n",
- errs->msg, errs->int_msk);
-
- if (SEC_CORE_INT_STATUS_M_ECC & err_sts) {
- err_val = readl(sec->qm.io_base +
- SEC_CORE_SRAM_ECC_ERR_INFO);
- dev_err(dev, "multi ecc sram num=0x%x\n",
- SEC_ECC_NUM(err_val));
- dev_err(dev, "multi ecc sram addr=0x%x\n",
- SEC_ECC_ADDR(err_val));
- }
- }
- errs++;
- }
-}
-
-static pci_ers_result_t sec_hw_error_handle(struct sec_dev *sec)
-{
- u32 err_sts;
-
- /* read err sts */
- err_sts = readl(sec->qm.io_base + SEC_CORE_INT_STATUS);
- if (err_sts) {
- sec_log_hw_error(sec, err_sts);
-
- /* clear error interrupts */
- writel(err_sts, sec->qm.io_base + SEC_CORE_INT_SOURCE);
-
- return PCI_ERS_RESULT_NEED_RESET;
- }
-
- return PCI_ERS_RESULT_RECOVERED;
-}
-
-static pci_ers_result_t sec_process_hw_error(struct pci_dev *pdev)
-{
- struct sec_dev *sec = pci_get_drvdata(pdev);
- pci_ers_result_t qm_ret, sec_ret;
-
- if (!sec) {
- pci_err(pdev, "Can't recover error during device init\n");
- return PCI_ERS_RESULT_NONE;
- }
-
- /* log qm error */
- qm_ret = hisi_qm_hw_error_handle(&sec->qm);
-
- /* log sec error */
- sec_ret = sec_hw_error_handle(sec);
-
- return (qm_ret == PCI_ERS_RESULT_NEED_RESET ||
- sec_ret == PCI_ERS_RESULT_NEED_RESET) ?
- PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED;
-}
-
-static pci_ers_result_t sec_error_detected(struct pci_dev *pdev,
- pci_channel_state_t state)
-{
- if (pdev->is_virtfn)
- return PCI_ERS_RESULT_NONE;
-
- pci_info(pdev, "PCI error detected, state(=%d)!!\n", state);
- if (state == pci_channel_io_perm_failure)
- return PCI_ERS_RESULT_DISCONNECT;
-
- return sec_process_hw_error(pdev);
-}
-
static const struct pci_error_handlers sec_err_handler = {
- .error_detected = sec_error_detected,
+ .error_detected = hisi_qm_dev_err_detected,
};
static struct pci_driver sec_pci_driver = {
@@ -1078,6 +1051,7 @@ static int __init sec_init(void)
{
int ret;
+ hisi_qm_init_list(&sec_devices);
sec_register_debugfs();
ret = pci_register_driver(&sec_pci_driver);
diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h
index bc1db26598bb..82dc6f867171 100644
--- a/drivers/crypto/hisilicon/zip/zip.h
+++ b/drivers/crypto/hisilicon/zip/zip.h
@@ -68,7 +68,7 @@ struct hisi_zip_sqe {
u32 rsvd1[4];
};
-struct hisi_zip *find_zip_device(int node);
+int zip_create_qps(struct hisi_qp **qps, int ctx_num);
int hisi_zip_register_to_crypto(void);
void hisi_zip_unregister_from_crypto(void);
#endif
diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c
index 9815d5e3ccd0..369ec3220574 100644
--- a/drivers/crypto/hisilicon/zip/zip_crypto.c
+++ b/drivers/crypto/hisilicon/zip/zip_crypto.c
@@ -132,29 +132,25 @@ static void hisi_zip_fill_sqe(struct hisi_zip_sqe *sqe, u8 req_type,
sqe->dest_addr_h = upper_32_bits(d_addr);
}
-static int hisi_zip_create_qp(struct hisi_qm *qm, struct hisi_zip_qp_ctx *ctx,
- int alg_type, int req_type)
+static int hisi_zip_start_qp(struct hisi_qp *qp, struct hisi_zip_qp_ctx *ctx,
+ int alg_type, int req_type)
{
- struct hisi_qp *qp;
+ struct device *dev = &qp->qm->pdev->dev;
int ret;
- qp = hisi_qm_create_qp(qm, alg_type);
- if (IS_ERR(qp))
- return PTR_ERR(qp);
-
qp->req_type = req_type;
+ qp->alg_type = alg_type;
qp->qp_ctx = ctx;
- ctx->qp = qp;
ret = hisi_qm_start_qp(qp, 0);
- if (ret < 0)
- goto err_release_qp;
+ if (ret < 0) {
+ dev_err(dev, "start qp failed!\n");
+ return ret;
+ }
- return 0;
+ ctx->qp = qp;
-err_release_qp:
- hisi_qm_release_qp(qp);
- return ret;
+ return 0;
}
static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *ctx)
@@ -165,34 +161,34 @@ static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *ctx)
static int hisi_zip_ctx_init(struct hisi_zip_ctx *hisi_zip_ctx, u8 req_type)
{
+ struct hisi_qp *qps[HZIP_CTX_Q_NUM] = { NULL };
struct hisi_zip *hisi_zip;
- struct hisi_qm *qm;
int ret, i, j;
- /* find the proper zip device */
- hisi_zip = find_zip_device(cpu_to_node(smp_processor_id()));
- if (!hisi_zip) {
- pr_err("Failed to find a proper ZIP device!\n");
+ ret = zip_create_qps(qps, HZIP_CTX_Q_NUM);
+ if (ret) {
+ pr_err("Can not create zip qps!\n");
return -ENODEV;
}
- qm = &hisi_zip->qm;
+
+ hisi_zip = container_of(qps[0]->qm, struct hisi_zip, qm);
for (i = 0; i < HZIP_CTX_Q_NUM; i++) {
/* alg_type = 0 for compress, 1 for decompress in hw sqe */
- ret = hisi_zip_create_qp(qm, &hisi_zip_ctx->qp_ctx[i], i,
- req_type);
- if (ret)
- goto err;
+ ret = hisi_zip_start_qp(qps[i], &hisi_zip_ctx->qp_ctx[i], i,
+ req_type);
+ if (ret) {
+ for (j = i - 1; j >= 0; j--)
+ hisi_qm_stop_qp(hisi_zip_ctx->qp_ctx[j].qp);
+
+ hisi_qm_free_qps(qps, HZIP_CTX_Q_NUM);
+ return ret;
+ }
hisi_zip_ctx->qp_ctx[i].zip_dev = hisi_zip;
}
return 0;
-err:
- for (j = i - 1; j >= 0; j--)
- hisi_zip_release_qp(&hisi_zip_ctx->qp_ctx[j]);
-
- return ret;
}
static void hisi_zip_ctx_exit(struct hisi_zip_ctx *hisi_zip_ctx)
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index e1bab1a91333..fcc85d2dbd07 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -11,6 +11,7 @@
#include <linux/pci.h>
#include <linux/seq_file.h>
#include <linux/topology.h>
+#include <linux/uacce.h>
#include "zip.h"
#define PCI_DEVICE_ID_ZIP_PF 0xa250
@@ -60,13 +61,17 @@
#define HZIP_CORE_DEBUG_DECOMP_5 0x309000
#define HZIP_CORE_INT_SOURCE 0x3010A0
-#define HZIP_CORE_INT_MASK 0x3010A4
+#define HZIP_CORE_INT_MASK_REG 0x3010A4
#define HZIP_CORE_INT_STATUS 0x3010AC
#define HZIP_CORE_INT_STATUS_M_ECC BIT(1)
#define HZIP_CORE_SRAM_ECC_ERR_INFO 0x301148
-#define SRAM_ECC_ERR_NUM_SHIFT 16
-#define SRAM_ECC_ERR_ADDR_SHIFT 24
-#define HZIP_CORE_INT_DISABLE 0x000007FF
+#define HZIP_CORE_INT_RAS_CE_ENB 0x301160
+#define HZIP_CORE_INT_RAS_NFE_ENB 0x301164
+#define HZIP_CORE_INT_RAS_FE_ENB 0x301168
+#define HZIP_CORE_INT_RAS_NFE_ENABLE 0x7FE
+#define HZIP_SRAM_ECC_ERR_NUM_SHIFT 16
+#define HZIP_SRAM_ECC_ERR_ADDR_SHIFT 24
+#define HZIP_CORE_INT_MASK_ALL GENMASK(10, 0)
#define HZIP_COMP_CORE_NUM 2
#define HZIP_DECOMP_CORE_NUM 6
#define HZIP_CORE_NUM (HZIP_COMP_CORE_NUM + \
@@ -83,77 +88,7 @@
static const char hisi_zip_name[] = "hisi_zip";
static struct dentry *hzip_debugfs_root;
-static LIST_HEAD(hisi_zip_list);
-static DEFINE_MUTEX(hisi_zip_list_lock);
-
-struct hisi_zip_resource {
- struct hisi_zip *hzip;
- int distance;
- struct list_head list;
-};
-
-static void free_list(struct list_head *head)
-{
- struct hisi_zip_resource *res, *tmp;
-
- list_for_each_entry_safe(res, tmp, head, list) {
- list_del(&res->list);
- kfree(res);
- }
-}
-
-struct hisi_zip *find_zip_device(int node)
-{
- struct hisi_zip_resource *res, *tmp;
- struct hisi_zip *ret = NULL;
- struct hisi_zip *hisi_zip;
- struct list_head *n;
- struct device *dev;
- LIST_HEAD(head);
-
- mutex_lock(&hisi_zip_list_lock);
-
- if (IS_ENABLED(CONFIG_NUMA)) {
- list_for_each_entry(hisi_zip, &hisi_zip_list, list) {
- res = kzalloc(sizeof(*res), GFP_KERNEL);
- if (!res)
- goto err;
-
- dev = &hisi_zip->qm.pdev->dev;
- res->hzip = hisi_zip;
- res->distance = node_distance(dev_to_node(dev), node);
-
- n = &head;
- list_for_each_entry(tmp, &head, list) {
- if (res->distance < tmp->distance) {
- n = &tmp->list;
- break;
- }
- }
- list_add_tail(&res->list, n);
- }
-
- list_for_each_entry(tmp, &head, list) {
- if (hisi_qm_get_free_qp_num(&tmp->hzip->qm)) {
- ret = tmp->hzip;
- break;
- }
- }
-
- free_list(&head);
- } else {
- ret = list_first_entry(&hisi_zip_list, struct hisi_zip, list);
- }
-
- mutex_unlock(&hisi_zip_list_lock);
-
- return ret;
-
-err:
- free_list(&head);
- mutex_unlock(&hisi_zip_list_lock);
- return NULL;
-}
+static struct hisi_qm_list zip_devices;
struct hisi_zip_hw_error {
u32 int_msk;
@@ -297,9 +232,6 @@ static u32 pf_q_num = HZIP_PF_DEF_Q_NUM;
module_param_cb(pf_q_num, &pf_q_num_ops, &pf_q_num, 0444);
MODULE_PARM_DESC(pf_q_num, "Number of queues in PF(v1 1-4096, v2 1-1024)");
-static int uacce_mode;
-module_param(uacce_mode, int, 0);
-
static u32 vfs_num;
module_param(vfs_num, uint, 0444);
MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63)");
@@ -311,18 +243,11 @@ static const struct pci_device_id hisi_zip_dev_ids[] = {
};
MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids);
-static inline void hisi_zip_add_to_list(struct hisi_zip *hisi_zip)
+int zip_create_qps(struct hisi_qp **qps, int qp_num)
{
- mutex_lock(&hisi_zip_list_lock);
- list_add_tail(&hisi_zip->list, &hisi_zip_list);
- mutex_unlock(&hisi_zip_list_lock);
-}
+ int node = cpu_to_node(smp_processor_id());
-static inline void hisi_zip_remove_from_list(struct hisi_zip *hisi_zip)
-{
- mutex_lock(&hisi_zip_list_lock);
- list_del(&hisi_zip->list);
- mutex_unlock(&hisi_zip_list_lock);
+ return hisi_qm_alloc_qps_node(&zip_devices, qp_num, 0, node, qps);
}
static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip)
@@ -353,8 +278,14 @@ static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip)
writel(AXUSER_BASE, base + HZIP_BD_RUSER_32_63);
writel(AXUSER_BASE, base + HZIP_SGL_RUSER_32_63);
writel(AXUSER_BASE, base + HZIP_BD_WUSER_32_63);
- writel(AXUSER_BASE, base + HZIP_DATA_RUSER_32_63);
- writel(AXUSER_BASE, base + HZIP_DATA_WUSER_32_63);
+
+ if (hisi_zip->qm.use_sva) {
+ writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_RUSER_32_63);
+ writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_WUSER_32_63);
+ } else {
+ writel(AXUSER_BASE, base + HZIP_DATA_RUSER_32_63);
+ writel(AXUSER_BASE, base + HZIP_DATA_WUSER_32_63);
+ }
/* let's open all compression/decompression cores */
writel(DECOMP_CHECK_ENABLE | ALL_COMP_DECOMP_EN,
@@ -366,27 +297,32 @@ static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip)
FIELD_PREP(CQC_CACHE_WB_THRD, 1), base + QM_CACHE_CTL);
}
-static void hisi_zip_hw_error_set_state(struct hisi_zip *hisi_zip, bool state)
+static void hisi_zip_hw_error_enable(struct hisi_qm *qm)
{
- struct hisi_qm *qm = &hisi_zip->qm;
-
if (qm->ver == QM_HW_V1) {
- writel(HZIP_CORE_INT_DISABLE, qm->io_base + HZIP_CORE_INT_MASK);
+ writel(HZIP_CORE_INT_MASK_ALL,
+ qm->io_base + HZIP_CORE_INT_MASK_REG);
dev_info(&qm->pdev->dev, "Does not support hw error handle\n");
return;
}
- if (state) {
- /* clear ZIP hw error source if having */
- writel(HZIP_CORE_INT_DISABLE, hisi_zip->qm.io_base +
- HZIP_CORE_INT_SOURCE);
- /* enable ZIP hw error interrupts */
- writel(0, hisi_zip->qm.io_base + HZIP_CORE_INT_MASK);
- } else {
- /* disable ZIP hw error interrupts */
- writel(HZIP_CORE_INT_DISABLE,
- hisi_zip->qm.io_base + HZIP_CORE_INT_MASK);
- }
+ /* clear ZIP hw error source if having */
+ writel(HZIP_CORE_INT_MASK_ALL, qm->io_base + HZIP_CORE_INT_SOURCE);
+
+ /* configure error type */
+ writel(0x1, qm->io_base + HZIP_CORE_INT_RAS_CE_ENB);
+ writel(0x0, qm->io_base + HZIP_CORE_INT_RAS_FE_ENB);
+ writel(HZIP_CORE_INT_RAS_NFE_ENABLE,
+ qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB);
+
+ /* enable ZIP hw error interrupts */
+ writel(0, qm->io_base + HZIP_CORE_INT_MASK_REG);
+}
+
+static void hisi_zip_hw_error_disable(struct hisi_qm *qm)
+{
+ /* disable ZIP hw error interrupts */
+ writel(HZIP_CORE_INT_MASK_ALL, qm->io_base + HZIP_CORE_INT_MASK_REG);
}
static inline struct hisi_qm *file_to_qm(struct ctrl_debug_file *file)
@@ -638,14 +574,53 @@ static void hisi_zip_debugfs_exit(struct hisi_zip *hisi_zip)
hisi_zip_debug_regs_clear(hisi_zip);
}
-static void hisi_zip_hw_error_init(struct hisi_zip *hisi_zip)
+static void hisi_zip_log_hw_error(struct hisi_qm *qm, u32 err_sts)
+{
+ const struct hisi_zip_hw_error *err = zip_hw_error;
+ struct device *dev = &qm->pdev->dev;
+ u32 err_val;
+
+ while (err->msg) {
+ if (err->int_msk & err_sts) {
+ dev_err(dev, "%s [error status=0x%x] found\n",
+ err->msg, err->int_msk);
+
+ if (err->int_msk & HZIP_CORE_INT_STATUS_M_ECC) {
+ err_val = readl(qm->io_base +
+ HZIP_CORE_SRAM_ECC_ERR_INFO);
+ dev_err(dev, "hisi-zip multi ecc sram num=0x%x\n",
+ ((err_val >>
+ HZIP_SRAM_ECC_ERR_NUM_SHIFT) & 0xFF));
+ dev_err(dev, "hisi-zip multi ecc sram addr=0x%x\n",
+ (err_val >>
+ HZIP_SRAM_ECC_ERR_ADDR_SHIFT));
+ }
+ }
+ err++;
+ }
+
+ writel(err_sts, qm->io_base + HZIP_CORE_INT_SOURCE);
+}
+
+static u32 hisi_zip_get_hw_err_status(struct hisi_qm *qm)
{
- hisi_qm_hw_error_init(&hisi_zip->qm, QM_BASE_CE,
- QM_BASE_NFE | QM_ACC_WB_NOT_READY_TIMEOUT, 0,
- QM_DB_RANDOM_INVALID);
- hisi_zip_hw_error_set_state(hisi_zip, true);
+ return readl(qm->io_base + HZIP_CORE_INT_STATUS);
}
+static const struct hisi_qm_err_ini hisi_zip_err_ini = {
+ .hw_err_enable = hisi_zip_hw_error_enable,
+ .hw_err_disable = hisi_zip_hw_error_disable,
+ .get_dev_hw_err_status = hisi_zip_get_hw_err_status,
+ .log_dev_hw_err = hisi_zip_log_hw_error,
+ .err_info = {
+ .ce = QM_BASE_CE,
+ .nfe = QM_BASE_NFE |
+ QM_ACC_WB_NOT_READY_TIMEOUT,
+ .fe = 0,
+ .msi = QM_DB_RANDOM_INVALID,
+ }
+};
+
static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
{
struct hisi_qm *qm = &hisi_zip->qm;
@@ -671,8 +646,10 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
return -EINVAL;
}
+ qm->err_ini = &hisi_zip_err_ini;
+
hisi_zip_set_user_domain_and_cache(hisi_zip);
- hisi_zip_hw_error_init(hisi_zip);
+ hisi_qm_dev_err_init(qm);
hisi_zip_debug_regs_clear(hisi_zip);
return 0;
@@ -791,27 +768,15 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
pci_set_drvdata(pdev, hisi_zip);
qm = &hisi_zip->qm;
+ qm->use_dma_api = true;
qm->pdev = pdev;
qm->ver = rev_id;
+ qm->algs = "zlib\ngzip";
qm->sqe_size = HZIP_SQE_SIZE;
qm->dev_name = hisi_zip_name;
qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ? QM_HW_PF :
QM_HW_VF;
- switch (uacce_mode) {
- case 0:
- qm->use_dma_api = true;
- break;
- case 1:
- qm->use_dma_api = false;
- break;
- case 2:
- qm->use_dma_api = true;
- break;
- default:
- return -EINVAL;
- }
-
ret = hisi_qm_init(qm);
if (ret) {
dev_err(&pdev->dev, "Failed to init qm!\n");
@@ -849,7 +814,13 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (ret)
dev_err(&pdev->dev, "Failed to init debugfs (%d)!\n", ret);
- hisi_zip_add_to_list(hisi_zip);
+ hisi_qm_add_to_list(qm, &zip_devices);
+
+ if (qm->uacce) {
+ ret = uacce_register(qm->uacce);
+ if (ret)
+ goto err_qm_uninit;
+ }
if (qm->fun_type == QM_HW_PF && vfs_num > 0) {
ret = hisi_zip_sriov_enable(pdev, vfs_num);
@@ -860,7 +831,7 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return 0;
err_remove_from_list:
- hisi_zip_remove_from_list(hisi_zip);
+ hisi_qm_del_from_list(qm, &zip_devices);
hisi_zip_debugfs_exit(hisi_zip);
hisi_qm_stop(qm);
err_qm_uninit:
@@ -887,92 +858,13 @@ static void hisi_zip_remove(struct pci_dev *pdev)
hisi_zip_debugfs_exit(hisi_zip);
hisi_qm_stop(qm);
- if (qm->fun_type == QM_HW_PF)
- hisi_zip_hw_error_set_state(hisi_zip, false);
-
+ hisi_qm_dev_err_uninit(qm);
hisi_qm_uninit(qm);
- hisi_zip_remove_from_list(hisi_zip);
-}
-
-static void hisi_zip_log_hw_error(struct hisi_zip *hisi_zip, u32 err_sts)
-{
- const struct hisi_zip_hw_error *err = zip_hw_error;
- struct device *dev = &hisi_zip->qm.pdev->dev;
- u32 err_val;
-
- while (err->msg) {
- if (err->int_msk & err_sts) {
- dev_warn(dev, "%s [error status=0x%x] found\n",
- err->msg, err->int_msk);
-
- if (HZIP_CORE_INT_STATUS_M_ECC & err->int_msk) {
- err_val = readl(hisi_zip->qm.io_base +
- HZIP_CORE_SRAM_ECC_ERR_INFO);
- dev_warn(dev, "hisi-zip multi ecc sram num=0x%x\n",
- ((err_val >> SRAM_ECC_ERR_NUM_SHIFT) &
- 0xFF));
- dev_warn(dev, "hisi-zip multi ecc sram addr=0x%x\n",
- (err_val >> SRAM_ECC_ERR_ADDR_SHIFT));
- }
- }
- err++;
- }
-}
-
-static pci_ers_result_t hisi_zip_hw_error_handle(struct hisi_zip *hisi_zip)
-{
- u32 err_sts;
-
- /* read err sts */
- err_sts = readl(hisi_zip->qm.io_base + HZIP_CORE_INT_STATUS);
-
- if (err_sts) {
- hisi_zip_log_hw_error(hisi_zip, err_sts);
- /* clear error interrupts */
- writel(err_sts, hisi_zip->qm.io_base + HZIP_CORE_INT_SOURCE);
-
- return PCI_ERS_RESULT_NEED_RESET;
- }
-
- return PCI_ERS_RESULT_RECOVERED;
-}
-
-static pci_ers_result_t hisi_zip_process_hw_error(struct pci_dev *pdev)
-{
- struct hisi_zip *hisi_zip = pci_get_drvdata(pdev);
- struct device *dev = &pdev->dev;
- pci_ers_result_t qm_ret, zip_ret;
-
- if (!hisi_zip) {
- dev_err(dev,
- "Can't recover ZIP-error occurred during device init\n");
- return PCI_ERS_RESULT_NONE;
- }
-
- qm_ret = hisi_qm_hw_error_handle(&hisi_zip->qm);
-
- zip_ret = hisi_zip_hw_error_handle(hisi_zip);
-
- return (qm_ret == PCI_ERS_RESULT_NEED_RESET ||
- zip_ret == PCI_ERS_RESULT_NEED_RESET) ?
- PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED;
-}
-
-static pci_ers_result_t hisi_zip_error_detected(struct pci_dev *pdev,
- pci_channel_state_t state)
-{
- if (pdev->is_virtfn)
- return PCI_ERS_RESULT_NONE;
-
- dev_info(&pdev->dev, "PCI error detected, state(=%d)!!\n", state);
- if (state == pci_channel_io_perm_failure)
- return PCI_ERS_RESULT_DISCONNECT;
-
- return hisi_zip_process_hw_error(pdev);
+ hisi_qm_del_from_list(qm, &zip_devices);
}
static const struct pci_error_handlers hisi_zip_err_handler = {
- .error_detected = hisi_zip_error_detected,
+ .error_detected = hisi_qm_dev_err_detected,
};
static struct pci_driver hisi_zip_pci_driver = {
@@ -1002,6 +894,7 @@ static int __init hisi_zip_init(void)
{
int ret;
+ hisi_qm_init_list(&zip_devices);
hisi_zip_register_debugfs();
ret = pci_register_driver(&hisi_zip_pci_driver);
@@ -1010,12 +903,10 @@ static int __init hisi_zip_init(void)
goto err_pci;
}
- if (uacce_mode == 0 || uacce_mode == 2) {
- ret = hisi_zip_register_to_crypto();
- if (ret < 0) {
- pr_err("Failed to register driver to crypto.\n");
- goto err_crypto;
- }
+ ret = hisi_zip_register_to_crypto();
+ if (ret < 0) {
+ pr_err("Failed to register driver to crypto.\n");
+ goto err_crypto;
}
return 0;
@@ -1030,8 +921,7 @@ err_pci:
static void __exit hisi_zip_exit(void)
{
- if (uacce_mode == 0 || uacce_mode == 2)
- hisi_zip_unregister_from_crypto();
+ hisi_zip_unregister_from_crypto();
pci_unregister_driver(&hisi_zip_pci_driver);
hisi_zip_unregister_debugfs();
}
diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
index 25d5227f74a1..0e25fc3087f3 100644
--- a/drivers/crypto/img-hash.c
+++ b/drivers/crypto/img-hash.c
@@ -103,7 +103,7 @@ struct img_hash_request_ctx {
struct ahash_request fallback_req;
/* Zero length buffer must remain last member of struct */
- u8 buffer[0] __aligned(sizeof(u32));
+ u8 buffer[] __aligned(sizeof(u32));
};
struct img_hash_ctx {
diff --git a/drivers/crypto/marvell/Kconfig b/drivers/crypto/marvell/Kconfig
new file mode 100644
index 000000000000..13063384f958
--- /dev/null
+++ b/drivers/crypto/marvell/Kconfig
@@ -0,0 +1,37 @@
+#
+# Marvell crypto drivers configuration
+#
+
+config CRYPTO_DEV_MARVELL
+ tristate
+
+config CRYPTO_DEV_MARVELL_CESA
+ tristate "Marvell's Cryptographic Engine driver"
+ depends on PLAT_ORION || ARCH_MVEBU
+ select CRYPTO_LIB_AES
+ select CRYPTO_LIB_DES
+ select CRYPTO_SKCIPHER
+ select CRYPTO_HASH
+ select SRAM
+ select CRYPTO_DEV_MARVELL
+ help
+ This driver allows you to utilize the Cryptographic Engines and
+ Security Accelerator (CESA) which can be found on MVEBU and ORION
+ platforms.
+ This driver supports CPU offload through DMA transfers.
+
+config CRYPTO_DEV_OCTEONTX_CPT
+ tristate "Support for Marvell OcteonTX CPT driver"
+ depends on ARCH_THUNDER || COMPILE_TEST
+ depends on PCI_MSI && 64BIT
+ depends on CRYPTO_LIB_AES
+ select CRYPTO_SKCIPHER
+ select CRYPTO_HASH
+ select CRYPTO_AEAD
+ select CRYPTO_DEV_MARVELL
+ help
+ This driver allows you to utilize the Marvell Cryptographic
+ Accelerator Unit(CPT) found in OcteonTX series of processors.
+
+ To compile this driver as module, choose M here:
+ the modules will be called octeontx-cpt and octeontx-cptvf
diff --git a/drivers/crypto/marvell/Makefile b/drivers/crypto/marvell/Makefile
index b27cab65e696..6c6a1519b0f1 100644
--- a/drivers/crypto/marvell/Makefile
+++ b/drivers/crypto/marvell/Makefile
@@ -1,3 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell-cesa.o
-marvell-cesa-objs := cesa.o cipher.o hash.o tdma.o
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += cesa/
+obj-$(CONFIG_CRYPTO_DEV_OCTEONTX_CPT) += octeontx/
diff --git a/drivers/crypto/marvell/cesa/Makefile b/drivers/crypto/marvell/cesa/Makefile
new file mode 100644
index 000000000000..b27cab65e696
--- /dev/null
+++ b/drivers/crypto/marvell/cesa/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell-cesa.o
+marvell-cesa-objs := cesa.o cipher.o hash.o tdma.o
diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa/cesa.c
index 8a5f0b0bdf77..8a5f0b0bdf77 100644
--- a/drivers/crypto/marvell/cesa.c
+++ b/drivers/crypto/marvell/cesa/cesa.c
diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa/cesa.h
index f1ed3b85c0d2..e8632d5f343f 100644
--- a/drivers/crypto/marvell/cesa.h
+++ b/drivers/crypto/marvell/cesa/cesa.h
@@ -436,7 +436,7 @@ struct mv_cesa_dev {
* @queue: fifo of the pending crypto requests
* @load: engine load counter, useful for load balancing
* @chain: list of the current tdma descriptors being processed
- * by this engine.
+ * by this engine.
* @complete_queue: fifo of the processed requests by the engine
*
* Structure storing CESA engine information.
@@ -467,7 +467,7 @@ struct mv_cesa_engine {
* @step: launch the crypto operation on the next chunk
* @cleanup: cleanup the crypto request (release associated data)
* @complete: complete the request, i.e copy result or context from sram when
- * needed.
+ * needed.
*/
struct mv_cesa_req_ops {
int (*process)(struct crypto_async_request *req, u32 status);
@@ -734,6 +734,7 @@ static inline struct mv_cesa_engine *mv_cesa_select_engine(int weight)
for (i = 0; i < cesa_dev->caps->nengines; i++) {
struct mv_cesa_engine *engine = cesa_dev->engines + i;
u32 load = atomic_read(&engine->load);
+
if (load < min_load) {
min_load = load;
selected = engine;
diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cesa/cipher.c
index c24f34a48cef..f133c2ccb5ae 100644
--- a/drivers/crypto/marvell/cipher.c
+++ b/drivers/crypto/marvell/cesa/cipher.c
@@ -106,8 +106,8 @@ static void mv_cesa_skcipher_std_step(struct skcipher_request *req)
mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE);
writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG);
- BUG_ON(readl(engine->regs + CESA_SA_CMD) &
- CESA_SA_CMD_EN_CESA_SA_ACCL0);
+ WARN_ON(readl(engine->regs + CESA_SA_CMD) &
+ CESA_SA_CMD_EN_CESA_SA_ACCL0);
writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD);
}
@@ -178,6 +178,7 @@ static inline void mv_cesa_skcipher_prepare(struct crypto_async_request *req,
{
struct skcipher_request *skreq = skcipher_request_cast(req);
struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(skreq);
+
creq->base.engine = engine;
if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ)
@@ -336,7 +337,8 @@ static int mv_cesa_skcipher_dma_req_init(struct skcipher_request *req,
do {
struct mv_cesa_op_ctx *op;
- op = mv_cesa_dma_add_op(&basereq->chain, op_templ, skip_ctx, flags);
+ op = mv_cesa_dma_add_op(&basereq->chain, op_templ, skip_ctx,
+ flags);
if (IS_ERR(op)) {
ret = PTR_ERR(op);
goto err_free_tdma;
@@ -365,9 +367,10 @@ static int mv_cesa_skcipher_dma_req_init(struct skcipher_request *req,
} while (mv_cesa_skcipher_req_iter_next_op(&iter));
/* Add output data for IV */
- ret = mv_cesa_dma_add_result_op(&basereq->chain, CESA_SA_CFG_SRAM_OFFSET,
- CESA_SA_DATA_SRAM_OFFSET,
- CESA_TDMA_SRC_IN_SRAM, flags);
+ ret = mv_cesa_dma_add_result_op(&basereq->chain,
+ CESA_SA_CFG_SRAM_OFFSET,
+ CESA_SA_DATA_SRAM_OFFSET,
+ CESA_TDMA_SRC_IN_SRAM, flags);
if (ret)
goto err_free_tdma;
diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/cesa/hash.c
index a2b35fb0fb89..b971284332b6 100644
--- a/drivers/crypto/marvell/hash.c
+++ b/drivers/crypto/marvell/cesa/hash.c
@@ -141,9 +141,11 @@ static int mv_cesa_ahash_pad_req(struct mv_cesa_ahash_req *creq, u8 *buf)
if (creq->algo_le) {
__le64 bits = cpu_to_le64(creq->len << 3);
+
memcpy(buf + padlen, &bits, sizeof(bits));
} else {
__be64 bits = cpu_to_be64(creq->len << 3);
+
memcpy(buf + padlen, &bits, sizeof(bits));
}
@@ -168,7 +170,8 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req)
if (!sreq->offset) {
digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req));
for (i = 0; i < digsize / 4; i++)
- writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i));
+ writel_relaxed(creq->state[i],
+ engine->regs + CESA_IVDIG(i));
}
if (creq->cache_ptr)
@@ -245,8 +248,8 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req)
mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE);
writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG);
- BUG_ON(readl(engine->regs + CESA_SA_CMD) &
- CESA_SA_CMD_EN_CESA_SA_ACCL0);
+ WARN_ON(readl(engine->regs + CESA_SA_CMD) &
+ CESA_SA_CMD_EN_CESA_SA_ACCL0);
writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD);
}
@@ -329,11 +332,12 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req)
digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ &&
- (creq->base.chain.last->flags & CESA_TDMA_TYPE_MSK) == CESA_TDMA_RESULT) {
+ (creq->base.chain.last->flags & CESA_TDMA_TYPE_MSK) ==
+ CESA_TDMA_RESULT) {
__le32 *data = NULL;
/*
- * Result is already in the correct endianess when the SA is
+ * Result is already in the correct endianness when the SA is
* used
*/
data = creq->base.chain.last->op->ctx.hash.hash;
@@ -347,9 +351,9 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req)
CESA_IVDIG(i));
if (creq->last_req) {
/*
- * Hardware's MD5 digest is in little endian format, but
- * SHA in big endian format
- */
+ * Hardware's MD5 digest is in little endian format, but
+ * SHA in big endian format
+ */
if (creq->algo_le) {
__le32 *result = (void *)ahashreq->result;
@@ -439,7 +443,8 @@ static bool mv_cesa_ahash_cache_req(struct ahash_request *req)
struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
bool cached = false;
- if (creq->cache_ptr + req->nbytes < CESA_MAX_HASH_BLOCK_SIZE && !creq->last_req) {
+ if (creq->cache_ptr + req->nbytes < CESA_MAX_HASH_BLOCK_SIZE &&
+ !creq->last_req) {
cached = true;
if (!req->nbytes)
@@ -648,7 +653,8 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
if (!mv_cesa_ahash_req_iter_next_op(&iter))
break;
- op = mv_cesa_dma_add_frag(&basereq->chain, &creq->op_tmpl,
+ op = mv_cesa_dma_add_frag(&basereq->chain,
+ &creq->op_tmpl,
frag_len, flags);
if (IS_ERR(op)) {
ret = PTR_ERR(op);
@@ -920,7 +926,7 @@ struct ahash_alg mv_md5_alg = {
.cra_ctxsize = sizeof(struct mv_cesa_hash_ctx),
.cra_init = mv_cesa_ahash_cra_init,
.cra_module = THIS_MODULE,
- }
+ }
}
};
@@ -990,7 +996,7 @@ struct ahash_alg mv_sha1_alg = {
.cra_ctxsize = sizeof(struct mv_cesa_hash_ctx),
.cra_init = mv_cesa_ahash_cra_init,
.cra_module = THIS_MODULE,
- }
+ }
}
};
@@ -1063,7 +1069,7 @@ struct ahash_alg mv_sha256_alg = {
.cra_ctxsize = sizeof(struct mv_cesa_hash_ctx),
.cra_init = mv_cesa_ahash_cra_init,
.cra_module = THIS_MODULE,
- }
+ }
}
};
@@ -1297,7 +1303,7 @@ struct ahash_alg mv_ahmac_md5_alg = {
.cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx),
.cra_init = mv_cesa_ahmac_cra_init,
.cra_module = THIS_MODULE,
- }
+ }
}
};
@@ -1367,7 +1373,7 @@ struct ahash_alg mv_ahmac_sha1_alg = {
.cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx),
.cra_init = mv_cesa_ahmac_cra_init,
.cra_module = THIS_MODULE,
- }
+ }
}
};
@@ -1437,6 +1443,6 @@ struct ahash_alg mv_ahmac_sha256_alg = {
.cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx),
.cra_init = mv_cesa_ahmac_cra_init,
.cra_module = THIS_MODULE,
- }
+ }
}
};
diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/cesa/tdma.c
index 45939d53e8d6..b81ee276fe0e 100644
--- a/drivers/crypto/marvell/tdma.c
+++ b/drivers/crypto/marvell/cesa/tdma.c
@@ -50,8 +50,8 @@ void mv_cesa_dma_step(struct mv_cesa_req *dreq)
engine->regs + CESA_SA_CFG);
writel_relaxed(dreq->chain.first->cur_dma,
engine->regs + CESA_TDMA_NEXT_ADDR);
- BUG_ON(readl(engine->regs + CESA_SA_CMD) &
- CESA_SA_CMD_EN_CESA_SA_ACCL0);
+ WARN_ON(readl(engine->regs + CESA_SA_CMD) &
+ CESA_SA_CMD_EN_CESA_SA_ACCL0);
writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD);
}
@@ -175,8 +175,10 @@ int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status)
break;
}
- /* Save the last request in error to engine->req, so that the core
- * knows which request was fautly */
+ /*
+ * Save the last request in error to engine->req, so that the core
+ * knows which request was fautly
+ */
if (res) {
spin_lock_bh(&engine->lock);
engine->req = req;
diff --git a/drivers/crypto/marvell/octeontx/Makefile b/drivers/crypto/marvell/octeontx/Makefile
new file mode 100644
index 000000000000..5e956fe1a85b
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_CRYPTO_DEV_OCTEONTX_CPT) += octeontx-cpt.o octeontx-cptvf.o
+
+octeontx-cpt-objs := otx_cptpf_main.o otx_cptpf_mbox.o otx_cptpf_ucode.o
+octeontx-cptvf-objs := otx_cptvf_main.o otx_cptvf_mbox.o otx_cptvf_reqmgr.o \
+ otx_cptvf_algs.o
diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_common.h b/drivers/crypto/marvell/octeontx/otx_cpt_common.h
new file mode 100644
index 000000000000..ca704a7a265f
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cpt_common.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __OTX_CPT_COMMON_H
+#define __OTX_CPT_COMMON_H
+
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+
+#define OTX_CPT_MAX_MBOX_DATA_STR_SIZE 64
+
+enum otx_cptpf_type {
+ OTX_CPT_AE = 2,
+ OTX_CPT_SE = 3,
+ BAD_OTX_CPTPF_TYPE,
+};
+
+enum otx_cptvf_type {
+ OTX_CPT_AE_TYPES = 1,
+ OTX_CPT_SE_TYPES = 2,
+ BAD_OTX_CPTVF_TYPE,
+};
+
+/* VF-PF message opcodes */
+enum otx_cpt_mbox_opcode {
+ OTX_CPT_MSG_VF_UP = 1,
+ OTX_CPT_MSG_VF_DOWN,
+ OTX_CPT_MSG_READY,
+ OTX_CPT_MSG_QLEN,
+ OTX_CPT_MSG_QBIND_GRP,
+ OTX_CPT_MSG_VQ_PRIORITY,
+ OTX_CPT_MSG_PF_TYPE,
+ OTX_CPT_MSG_ACK,
+ OTX_CPT_MSG_NACK
+};
+
+/* OcteonTX CPT mailbox structure */
+struct otx_cpt_mbox {
+ u64 msg; /* Message type MBOX[0] */
+ u64 data;/* Data MBOX[1] */
+};
+
+#endif /* __OTX_CPT_COMMON_H */
diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h
new file mode 100644
index 000000000000..b8bdb9f134f3
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h
@@ -0,0 +1,824 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __OTX_CPT_HW_TYPES_H
+#define __OTX_CPT_HW_TYPES_H
+
+#include <linux/types.h>
+
+/* Device IDs */
+#define OTX_CPT_PCI_PF_DEVICE_ID 0xa040
+#define OTX_CPT_PCI_VF_DEVICE_ID 0xa041
+
+#define OTX_CPT_PCI_PF_SUBSYS_ID 0xa340
+#define OTX_CPT_PCI_VF_SUBSYS_ID 0xa341
+
+/* Configuration and status registers are in BAR0 on OcteonTX platform */
+#define OTX_CPT_PF_PCI_CFG_BAR 0
+#define OTX_CPT_VF_PCI_CFG_BAR 0
+
+#define OTX_CPT_BAR_E_CPTX_VFX_BAR0_OFFSET(a, b) \
+ (0x000020000000ll + 0x1000000000ll * (a) + 0x100000ll * (b))
+#define OTX_CPT_BAR_E_CPTX_VFX_BAR0_SIZE 0x400000
+
+/* Mailbox interrupts offset */
+#define OTX_CPT_PF_MBOX_INT 3
+#define OTX_CPT_PF_INT_VEC_E_MBOXX(x, a) ((x) + (a))
+/* Number of MSIX supported in PF */
+#define OTX_CPT_PF_MSIX_VECTORS 4
+/* Maximum supported microcode groups */
+#define OTX_CPT_MAX_ENGINE_GROUPS 8
+
+/* CPT instruction size in bytes */
+#define OTX_CPT_INST_SIZE 64
+/* CPT queue next chunk pointer size in bytes */
+#define OTX_CPT_NEXT_CHUNK_PTR_SIZE 8
+
+/* OcteonTX CPT VF MSIX vectors and their offsets */
+#define OTX_CPT_VF_MSIX_VECTORS 2
+#define OTX_CPT_VF_INTR_MBOX_MASK BIT(0)
+#define OTX_CPT_VF_INTR_DOVF_MASK BIT(1)
+#define OTX_CPT_VF_INTR_IRDE_MASK BIT(2)
+#define OTX_CPT_VF_INTR_NWRP_MASK BIT(3)
+#define OTX_CPT_VF_INTR_SERR_MASK BIT(4)
+
+/* OcteonTX CPT PF registers */
+#define OTX_CPT_PF_CONSTANTS (0x0ll)
+#define OTX_CPT_PF_RESET (0x100ll)
+#define OTX_CPT_PF_DIAG (0x120ll)
+#define OTX_CPT_PF_BIST_STATUS (0x160ll)
+#define OTX_CPT_PF_ECC0_CTL (0x200ll)
+#define OTX_CPT_PF_ECC0_FLIP (0x210ll)
+#define OTX_CPT_PF_ECC0_INT (0x220ll)
+#define OTX_CPT_PF_ECC0_INT_W1S (0x230ll)
+#define OTX_CPT_PF_ECC0_ENA_W1S (0x240ll)
+#define OTX_CPT_PF_ECC0_ENA_W1C (0x250ll)
+#define OTX_CPT_PF_MBOX_INTX(b) (0x400ll | (u64)(b) << 3)
+#define OTX_CPT_PF_MBOX_INT_W1SX(b) (0x420ll | (u64)(b) << 3)
+#define OTX_CPT_PF_MBOX_ENA_W1CX(b) (0x440ll | (u64)(b) << 3)
+#define OTX_CPT_PF_MBOX_ENA_W1SX(b) (0x460ll | (u64)(b) << 3)
+#define OTX_CPT_PF_EXEC_INT (0x500ll)
+#define OTX_CPT_PF_EXEC_INT_W1S (0x520ll)
+#define OTX_CPT_PF_EXEC_ENA_W1C (0x540ll)
+#define OTX_CPT_PF_EXEC_ENA_W1S (0x560ll)
+#define OTX_CPT_PF_GX_EN(b) (0x600ll | (u64)(b) << 3)
+#define OTX_CPT_PF_EXEC_INFO (0x700ll)
+#define OTX_CPT_PF_EXEC_BUSY (0x800ll)
+#define OTX_CPT_PF_EXEC_INFO0 (0x900ll)
+#define OTX_CPT_PF_EXEC_INFO1 (0x910ll)
+#define OTX_CPT_PF_INST_REQ_PC (0x10000ll)
+#define OTX_CPT_PF_INST_LATENCY_PC (0x10020ll)
+#define OTX_CPT_PF_RD_REQ_PC (0x10040ll)
+#define OTX_CPT_PF_RD_LATENCY_PC (0x10060ll)
+#define OTX_CPT_PF_RD_UC_PC (0x10080ll)
+#define OTX_CPT_PF_ACTIVE_CYCLES_PC (0x10100ll)
+#define OTX_CPT_PF_EXE_CTL (0x4000000ll)
+#define OTX_CPT_PF_EXE_STATUS (0x4000008ll)
+#define OTX_CPT_PF_EXE_CLK (0x4000010ll)
+#define OTX_CPT_PF_EXE_DBG_CTL (0x4000018ll)
+#define OTX_CPT_PF_EXE_DBG_DATA (0x4000020ll)
+#define OTX_CPT_PF_EXE_BIST_STATUS (0x4000028ll)
+#define OTX_CPT_PF_EXE_REQ_TIMER (0x4000030ll)
+#define OTX_CPT_PF_EXE_MEM_CTL (0x4000038ll)
+#define OTX_CPT_PF_EXE_PERF_CTL (0x4001000ll)
+#define OTX_CPT_PF_EXE_DBG_CNTX(b) (0x4001100ll | (u64)(b) << 3)
+#define OTX_CPT_PF_EXE_PERF_EVENT_CNT (0x4001180ll)
+#define OTX_CPT_PF_EXE_EPCI_INBX_CNT(b) (0x4001200ll | (u64)(b) << 3)
+#define OTX_CPT_PF_EXE_EPCI_OUTBX_CNT(b) (0x4001240ll | (u64)(b) << 3)
+#define OTX_CPT_PF_ENGX_UCODE_BASE(b) (0x4002000ll | (u64)(b) << 3)
+#define OTX_CPT_PF_QX_CTL(b) (0x8000000ll | (u64)(b) << 20)
+#define OTX_CPT_PF_QX_GMCTL(b) (0x8000020ll | (u64)(b) << 20)
+#define OTX_CPT_PF_QX_CTL2(b) (0x8000100ll | (u64)(b) << 20)
+#define OTX_CPT_PF_VFX_MBOXX(b, c) (0x8001000ll | (u64)(b) << 20 | \
+ (u64)(c) << 8)
+
+/* OcteonTX CPT VF registers */
+#define OTX_CPT_VQX_CTL(b) (0x100ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_SADDR(b) (0x200ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_DONE_WAIT(b) (0x400ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_INPROG(b) (0x410ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_DONE(b) (0x420ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_DONE_ACK(b) (0x440ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_DONE_INT_W1S(b) (0x460ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_DONE_INT_W1C(b) (0x468ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_DONE_ENA_W1S(b) (0x470ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_DONE_ENA_W1C(b) (0x478ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_MISC_INT(b) (0x500ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_MISC_INT_W1S(b) (0x508ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_MISC_ENA_W1S(b) (0x510ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_MISC_ENA_W1C(b) (0x518ll | (u64)(b) << 20)
+#define OTX_CPT_VQX_DOORBELL(b) (0x600ll | (u64)(b) << 20)
+#define OTX_CPT_VFX_PF_MBOXX(b, c) (0x1000ll | ((b) << 20) | ((c) << 3))
+
+/*
+ * Enumeration otx_cpt_ucode_error_code_e
+ *
+ * Enumerates ucode errors
+ */
+enum otx_cpt_ucode_error_code_e {
+ CPT_NO_UCODE_ERROR = 0x00,
+ ERR_OPCODE_UNSUPPORTED = 0x01,
+
+ /* Scatter gather */
+ ERR_SCATTER_GATHER_WRITE_LENGTH = 0x02,
+ ERR_SCATTER_GATHER_LIST = 0x03,
+ ERR_SCATTER_GATHER_NOT_SUPPORTED = 0x04,
+
+};
+
+/*
+ * Enumeration otx_cpt_comp_e
+ *
+ * CPT OcteonTX Completion Enumeration
+ * Enumerates the values of CPT_RES_S[COMPCODE].
+ */
+enum otx_cpt_comp_e {
+ CPT_COMP_E_NOTDONE = 0x00,
+ CPT_COMP_E_GOOD = 0x01,
+ CPT_COMP_E_FAULT = 0x02,
+ CPT_COMP_E_SWERR = 0x03,
+ CPT_COMP_E_HWERR = 0x04,
+ CPT_COMP_E_LAST_ENTRY = 0x05
+};
+
+/*
+ * Enumeration otx_cpt_vf_int_vec_e
+ *
+ * CPT OcteonTX VF MSI-X Vector Enumeration
+ * Enumerates the MSI-X interrupt vectors.
+ */
+enum otx_cpt_vf_int_vec_e {
+ CPT_VF_INT_VEC_E_MISC = 0x00,
+ CPT_VF_INT_VEC_E_DONE = 0x01
+};
+
+/*
+ * Structure cpt_inst_s
+ *
+ * CPT Instruction Structure
+ * This structure specifies the instruction layout. Instructions are
+ * stored in memory as little-endian unless CPT()_PF_Q()_CTL[INST_BE] is set.
+ * cpt_inst_s_s
+ * Word 0
+ * doneint:1 Done interrupt.
+ * 0 = No interrupts related to this instruction.
+ * 1 = When the instruction completes, CPT()_VQ()_DONE[DONE] will be
+ * incremented,and based on the rules described there an interrupt may
+ * occur.
+ * Word 1
+ * res_addr [127: 64] Result IOVA.
+ * If nonzero, specifies where to write CPT_RES_S.
+ * If zero, no result structure will be written.
+ * Address must be 16-byte aligned.
+ * Bits <63:49> are ignored by hardware; software should use a
+ * sign-extended bit <48> for forward compatibility.
+ * Word 2
+ * grp:10 [171:162] If [WQ_PTR] is nonzero, the SSO guest-group to use when
+ * CPT submits work SSO.
+ * For the SSO to not discard the add-work request, FPA_PF_MAP() must map
+ * [GRP] and CPT()_PF_Q()_GMCTL[GMID] as valid.
+ * tt:2 [161:160] If [WQ_PTR] is nonzero, the SSO tag type to use when CPT
+ * submits work to SSO
+ * tag:32 [159:128] If [WQ_PTR] is nonzero, the SSO tag to use when CPT
+ * submits work to SSO.
+ * Word 3
+ * wq_ptr [255:192] If [WQ_PTR] is nonzero, it is a pointer to a
+ * work-queue entry that CPT submits work to SSO after all context,
+ * output data, and result write operations are visible to other
+ * CNXXXX units and the cores. Bits <2:0> must be zero.
+ * Bits <63:49> are ignored by hardware; software should
+ * use a sign-extended bit <48> for forward compatibility.
+ * Internal:
+ * Bits <63:49>, <2:0> are ignored by hardware, treated as always 0x0.
+ * Word 4
+ * ei0; [319:256] Engine instruction word 0. Passed to the AE/SE.
+ * Word 5
+ * ei1; [383:320] Engine instruction word 1. Passed to the AE/SE.
+ * Word 6
+ * ei2; [447:384] Engine instruction word 1. Passed to the AE/SE.
+ * Word 7
+ * ei3; [511:448] Engine instruction word 1. Passed to the AE/SE.
+ *
+ */
+union otx_cpt_inst_s {
+ u64 u[8];
+
+ struct {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+ u64 reserved_17_63:47;
+ u64 doneint:1;
+ u64 reserved_0_15:16;
+#else /* Word 0 - Little Endian */
+ u64 reserved_0_15:16;
+ u64 doneint:1;
+ u64 reserved_17_63:47;
+#endif /* Word 0 - End */
+ u64 res_addr;
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 2 - Big Endian */
+ u64 reserved_172_191:20;
+ u64 grp:10;
+ u64 tt:2;
+ u64 tag:32;
+#else /* Word 2 - Little Endian */
+ u64 tag:32;
+ u64 tt:2;
+ u64 grp:10;
+ u64 reserved_172_191:20;
+#endif /* Word 2 - End */
+ u64 wq_ptr;
+ u64 ei0;
+ u64 ei1;
+ u64 ei2;
+ u64 ei3;
+ } s;
+};
+
+/*
+ * Structure cpt_res_s
+ *
+ * CPT Result Structure
+ * The CPT coprocessor writes the result structure after it completes a
+ * CPT_INST_S instruction. The result structure is exactly 16 bytes, and
+ * each instruction completion produces exactly one result structure.
+ *
+ * This structure is stored in memory as little-endian unless
+ * CPT()_PF_Q()_CTL[INST_BE] is set.
+ * cpt_res_s_s
+ * Word 0
+ * doneint:1 [16:16] Done interrupt. This bit is copied from the
+ * corresponding instruction's CPT_INST_S[DONEINT].
+ * compcode:8 [7:0] Indicates completion/error status of the CPT coprocessor
+ * for the associated instruction, as enumerated by CPT_COMP_E.
+ * Core software may write the memory location containing [COMPCODE] to
+ * 0x0 before ringing the doorbell, and then poll for completion by
+ * checking for a nonzero value.
+ * Once the core observes a nonzero [COMPCODE] value in this case,the CPT
+ * coprocessor will have also completed L2/DRAM write operations.
+ * Word 1
+ * reserved
+ *
+ */
+union otx_cpt_res_s {
+ u64 u[2];
+ struct {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+ u64 reserved_17_63:47;
+ u64 doneint:1;
+ u64 reserved_8_15:8;
+ u64 compcode:8;
+#else /* Word 0 - Little Endian */
+ u64 compcode:8;
+ u64 reserved_8_15:8;
+ u64 doneint:1;
+ u64 reserved_17_63:47;
+#endif /* Word 0 - End */
+ u64 reserved_64_127;
+ } s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_pf_bist_status
+ *
+ * CPT PF Control Bist Status Register
+ * This register has the BIST status of memories. Each bit is the BIST result
+ * of an individual memory (per bit, 0 = pass and 1 = fail).
+ * otx_cptx_pf_bist_status_s
+ * Word0
+ * bstatus [29:0](RO/H) BIST status. One bit per memory, enumerated by
+ * CPT_RAMS_E.
+ */
+union otx_cptx_pf_bist_status {
+ u64 u;
+ struct otx_cptx_pf_bist_status_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+ u64 reserved_30_63:34;
+ u64 bstatus:30;
+#else /* Word 0 - Little Endian */
+ u64 bstatus:30;
+ u64 reserved_30_63:34;
+#endif /* Word 0 - End */
+ } s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_pf_constants
+ *
+ * CPT PF Constants Register
+ * This register contains implementation-related parameters of CPT in CNXXXX.
+ * otx_cptx_pf_constants_s
+ * Word 0
+ * reserved_40_63:24 [63:40] Reserved.
+ * epcis:8 [39:32](RO) Number of EPCI busses.
+ * grps:8 [31:24](RO) Number of engine groups implemented.
+ * ae:8 [23:16](RO/H) Number of AEs. In CNXXXX, for CPT0 returns 0x0,
+ * for CPT1 returns 0x18, or less if there are fuse-disables.
+ * se:8 [15:8](RO/H) Number of SEs. In CNXXXX, for CPT0 returns 0x30,
+ * or less if there are fuse-disables, for CPT1 returns 0x0.
+ * vq:8 [7:0](RO) Number of VQs.
+ */
+union otx_cptx_pf_constants {
+ u64 u;
+ struct otx_cptx_pf_constants_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+ u64 reserved_40_63:24;
+ u64 epcis:8;
+ u64 grps:8;
+ u64 ae:8;
+ u64 se:8;
+ u64 vq:8;
+#else /* Word 0 - Little Endian */
+ u64 vq:8;
+ u64 se:8;
+ u64 ae:8;
+ u64 grps:8;
+ u64 epcis:8;
+ u64 reserved_40_63:24;
+#endif /* Word 0 - End */
+ } s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_pf_exe_bist_status
+ *
+ * CPT PF Engine Bist Status Register
+ * This register has the BIST status of each engine. Each bit is the
+ * BIST result of an individual engine (per bit, 0 = pass and 1 = fail).
+ * otx_cptx_pf_exe_bist_status_s
+ * Word0
+ * reserved_48_63:16 [63:48] reserved
+ * bstatus:48 [47:0](RO/H) BIST status. One bit per engine.
+ *
+ */
+union otx_cptx_pf_exe_bist_status {
+ u64 u;
+ struct otx_cptx_pf_exe_bist_status_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+ u64 reserved_48_63:16;
+ u64 bstatus:48;
+#else /* Word 0 - Little Endian */
+ u64 bstatus:48;
+ u64 reserved_48_63:16;
+#endif /* Word 0 - End */
+ } s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_pf_q#_ctl
+ *
+ * CPT Queue Control Register
+ * This register configures queues. This register should be changed only
+ * when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]).
+ * otx_cptx_pf_qx_ctl_s
+ * Word0
+ * reserved_60_63:4 [63:60] reserved.
+ * aura:12; [59:48](R/W) Guest-aura for returning this queue's
+ * instruction-chunk buffers to FPA. Only used when [INST_FREE] is set.
+ * For the FPA to not discard the request, FPA_PF_MAP() must map
+ * [AURA] and CPT()_PF_Q()_GMCTL[GMID] as valid.
+ * reserved_45_47:3 [47:45] reserved.
+ * size:13 [44:32](R/W) Command-buffer size, in number of 64-bit words per
+ * command buffer segment. Must be 8*n + 1, where n is the number of
+ * instructions per buffer segment.
+ * reserved_11_31:21 [31:11] Reserved.
+ * cont_err:1 [10:10](R/W) Continue on error.
+ * 0 = When CPT()_VQ()_MISC_INT[NWRP], CPT()_VQ()_MISC_INT[IRDE] or
+ * CPT()_VQ()_MISC_INT[DOVF] are set by hardware or software via
+ * CPT()_VQ()_MISC_INT_W1S, then CPT()_VQ()_CTL[ENA] is cleared. Due to
+ * pipelining, additional instructions may have been processed between the
+ * instruction causing the error and the next instruction in the disabled
+ * queue (the instruction at CPT()_VQ()_SADDR).
+ * 1 = Ignore errors and continue processing instructions.
+ * For diagnostic use only.
+ * inst_free:1 [9:9](R/W) Instruction FPA free. When set, when CPT reaches the
+ * end of an instruction chunk, that chunk will be freed to the FPA.
+ * inst_be:1 [8:8](R/W) Instruction big-endian control. When set, instructions,
+ * instruction next chunk pointers, and result structures are stored in
+ * big-endian format in memory.
+ * iqb_ldwb:1 [7:7](R/W) Instruction load don't write back.
+ * 0 = The hardware issues NCB transient load (LDT) towards the cache,
+ * which if the line hits and is is dirty will cause the line to be
+ * written back before being replaced.
+ * 1 = The hardware issues NCB LDWB read-and-invalidate command towards
+ * the cache when fetching the last word of instructions; as a result the
+ * line will not be written back when replaced. This improves
+ * performance, but software must not read the instructions after they are
+ * posted to the hardware. Reads that do not consume the last word of a
+ * cache line always use LDI.
+ * reserved_4_6:3 [6:4] Reserved.
+ * grp:3; [3:1](R/W) Engine group.
+ * pri:1; [0:0](R/W) Queue priority.
+ * 1 = This queue has higher priority. Round-robin between higher
+ * priority queues.
+ * 0 = This queue has lower priority. Round-robin between lower
+ * priority queues.
+ */
+union otx_cptx_pf_qx_ctl {
+ u64 u;
+ struct otx_cptx_pf_qx_ctl_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+ u64 reserved_60_63:4;
+ u64 aura:12;
+ u64 reserved_45_47:3;
+ u64 size:13;
+ u64 reserved_11_31:21;
+ u64 cont_err:1;
+ u64 inst_free:1;
+ u64 inst_be:1;
+ u64 iqb_ldwb:1;
+ u64 reserved_4_6:3;
+ u64 grp:3;
+ u64 pri:1;
+#else /* Word 0 - Little Endian */
+ u64 pri:1;
+ u64 grp:3;
+ u64 reserved_4_6:3;
+ u64 iqb_ldwb:1;
+ u64 inst_be:1;
+ u64 inst_free:1;
+ u64 cont_err:1;
+ u64 reserved_11_31:21;
+ u64 size:13;
+ u64 reserved_45_47:3;
+ u64 aura:12;
+ u64 reserved_60_63:4;
+#endif /* Word 0 - End */
+ } s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_saddr
+ *
+ * CPT Queue Starting Buffer Address Registers
+ * These registers set the instruction buffer starting address.
+ * otx_cptx_vqx_saddr_s
+ * Word0
+ * reserved_49_63:15 [63:49] Reserved.
+ * ptr:43 [48:6](R/W/H) Instruction buffer IOVA <48:6> (64-byte aligned).
+ * When written, it is the initial buffer starting address; when read,
+ * it is the next read pointer to be requested from L2C. The PTR field
+ * is overwritten with the next pointer each time that the command buffer
+ * segment is exhausted. New commands will then be read from the newly
+ * specified command buffer pointer.
+ * reserved_0_5:6 [5:0] Reserved.
+ *
+ */
+union otx_cptx_vqx_saddr {
+ u64 u;
+ struct otx_cptx_vqx_saddr_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+ u64 reserved_49_63:15;
+ u64 ptr:43;
+ u64 reserved_0_5:6;
+#else /* Word 0 - Little Endian */
+ u64 reserved_0_5:6;
+ u64 ptr:43;
+ u64 reserved_49_63:15;
+#endif /* Word 0 - End */
+ } s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_misc_ena_w1s
+ *
+ * CPT Queue Misc Interrupt Enable Set Register
+ * This register sets interrupt enable bits.
+ * otx_cptx_vqx_misc_ena_w1s_s
+ * Word0
+ * reserved_5_63:59 [63:5] Reserved.
+ * swerr:1 [4:4](R/W1S/H) Reads or sets enable for
+ * CPT(0..1)_VQ(0..63)_MISC_INT[SWERR].
+ * nwrp:1 [3:3](R/W1S/H) Reads or sets enable for
+ * CPT(0..1)_VQ(0..63)_MISC_INT[NWRP].
+ * irde:1 [2:2](R/W1S/H) Reads or sets enable for
+ * CPT(0..1)_VQ(0..63)_MISC_INT[IRDE].
+ * dovf:1 [1:1](R/W1S/H) Reads or sets enable for
+ * CPT(0..1)_VQ(0..63)_MISC_INT[DOVF].
+ * mbox:1 [0:0](R/W1S/H) Reads or sets enable for
+ * CPT(0..1)_VQ(0..63)_MISC_INT[MBOX].
+ *
+ */
+union otx_cptx_vqx_misc_ena_w1s {
+ u64 u;
+ struct otx_cptx_vqx_misc_ena_w1s_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+ u64 reserved_5_63:59;
+ u64 swerr:1;
+ u64 nwrp:1;
+ u64 irde:1;
+ u64 dovf:1;
+ u64 mbox:1;
+#else /* Word 0 - Little Endian */
+ u64 mbox:1;
+ u64 dovf:1;
+ u64 irde:1;
+ u64 nwrp:1;
+ u64 swerr:1;
+ u64 reserved_5_63:59;
+#endif /* Word 0 - End */
+ } s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_doorbell
+ *
+ * CPT Queue Doorbell Registers
+ * Doorbells for the CPT instruction queues.
+ * otx_cptx_vqx_doorbell_s
+ * Word0
+ * reserved_20_63:44 [63:20] Reserved.
+ * dbell_cnt:20 [19:0](R/W/H) Number of instruction queue 64-bit words to add
+ * to the CPT instruction doorbell count. Readback value is the the
+ * current number of pending doorbell requests. If counter overflows
+ * CPT()_VQ()_MISC_INT[DBELL_DOVF] is set. To reset the count back to
+ * zero, write one to clear CPT()_VQ()_MISC_INT_ENA_W1C[DBELL_DOVF],
+ * then write a value of 2^20 minus the read [DBELL_CNT], then write one
+ * to CPT()_VQ()_MISC_INT_W1C[DBELL_DOVF] and
+ * CPT()_VQ()_MISC_INT_ENA_W1S[DBELL_DOVF]. Must be a multiple of 8.
+ * All CPT instructions are 8 words and require a doorbell count of
+ * multiple of 8.
+ */
+union otx_cptx_vqx_doorbell {
+ u64 u;
+ struct otx_cptx_vqx_doorbell_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+ u64 reserved_20_63:44;
+ u64 dbell_cnt:20;
+#else /* Word 0 - Little Endian */
+ u64 dbell_cnt:20;
+ u64 reserved_20_63:44;
+#endif /* Word 0 - End */
+ } s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_inprog
+ *
+ * CPT Queue In Progress Count Registers
+ * These registers contain the per-queue instruction in flight registers.
+ * otx_cptx_vqx_inprog_s
+ * Word0
+ * reserved_8_63:56 [63:8] Reserved.
+ * inflight:8 [7:0](RO/H) Inflight count. Counts the number of instructions
+ * for the VF for which CPT is fetching, executing or responding to
+ * instructions. However this does not include any interrupts that are
+ * awaiting software handling (CPT()_VQ()_DONE[DONE] != 0x0).
+ * A queue may not be reconfigured until:
+ * 1. CPT()_VQ()_CTL[ENA] is cleared by software.
+ * 2. [INFLIGHT] is polled until equals to zero.
+ */
+union otx_cptx_vqx_inprog {
+ u64 u;
+ struct otx_cptx_vqx_inprog_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+ u64 reserved_8_63:56;
+ u64 inflight:8;
+#else /* Word 0 - Little Endian */
+ u64 inflight:8;
+ u64 reserved_8_63:56;
+#endif /* Word 0 - End */
+ } s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_misc_int
+ *
+ * CPT Queue Misc Interrupt Register
+ * These registers contain the per-queue miscellaneous interrupts.
+ * otx_cptx_vqx_misc_int_s
+ * Word 0
+ * reserved_5_63:59 [63:5] Reserved.
+ * swerr:1 [4:4](R/W1C/H) Software error from engines.
+ * nwrp:1 [3:3](R/W1C/H) NCB result write response error.
+ * irde:1 [2:2](R/W1C/H) Instruction NCB read response error.
+ * dovf:1 [1:1](R/W1C/H) Doorbell overflow.
+ * mbox:1 [0:0](R/W1C/H) PF to VF mailbox interrupt. Set when
+ * CPT()_VF()_PF_MBOX(0) is written.
+ *
+ */
+union otx_cptx_vqx_misc_int {
+ u64 u;
+ struct otx_cptx_vqx_misc_int_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+ u64 reserved_5_63:59;
+ u64 swerr:1;
+ u64 nwrp:1;
+ u64 irde:1;
+ u64 dovf:1;
+ u64 mbox:1;
+#else /* Word 0 - Little Endian */
+ u64 mbox:1;
+ u64 dovf:1;
+ u64 irde:1;
+ u64 nwrp:1;
+ u64 swerr:1;
+ u64 reserved_5_63:59;
+#endif /* Word 0 - End */
+ } s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_done_ack
+ *
+ * CPT Queue Done Count Ack Registers
+ * This register is written by software to acknowledge interrupts.
+ * otx_cptx_vqx_done_ack_s
+ * Word0
+ * reserved_20_63:44 [63:20] Reserved.
+ * done_ack:20 [19:0](R/W/H) Number of decrements to CPT()_VQ()_DONE[DONE].
+ * Reads CPT()_VQ()_DONE[DONE]. Written by software to acknowledge
+ * interrupts. If CPT()_VQ()_DONE[DONE] is still nonzero the interrupt
+ * will be re-sent if the conditions described in CPT()_VQ()_DONE[DONE]
+ * are satisfied.
+ *
+ */
+union otx_cptx_vqx_done_ack {
+ u64 u;
+ struct otx_cptx_vqx_done_ack_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+ u64 reserved_20_63:44;
+ u64 done_ack:20;
+#else /* Word 0 - Little Endian */
+ u64 done_ack:20;
+ u64 reserved_20_63:44;
+#endif /* Word 0 - End */
+ } s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_done
+ *
+ * CPT Queue Done Count Registers
+ * These registers contain the per-queue instruction done count.
+ * cptx_vqx_done_s
+ * Word0
+ * reserved_20_63:44 [63:20] Reserved.
+ * done:20 [19:0](R/W/H) Done count. When CPT_INST_S[DONEINT] set and that
+ * instruction completes, CPT()_VQ()_DONE[DONE] is incremented when the
+ * instruction finishes. Write to this field are for diagnostic use only;
+ * instead software writes CPT()_VQ()_DONE_ACK with the number of
+ * decrements for this field.
+ * Interrupts are sent as follows:
+ * * When CPT()_VQ()_DONE[DONE] = 0, then no results are pending, the
+ * interrupt coalescing timer is held to zero, and an interrupt is not
+ * sent.
+ * * When CPT()_VQ()_DONE[DONE] != 0, then the interrupt coalescing timer
+ * counts. If the counter is >= CPT()_VQ()_DONE_WAIT[TIME_WAIT]*1024, or
+ * CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT], i.e. enough
+ * time has passed or enough results have arrived, then the interrupt is
+ * sent.
+ * * When CPT()_VQ()_DONE_ACK is written (or CPT()_VQ()_DONE is written
+ * but this is not typical), the interrupt coalescing timer restarts.
+ * Note after decrementing this interrupt equation is recomputed,
+ * for example if CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT]
+ * and because the timer is zero, the interrupt will be resent immediately.
+ * (This covers the race case between software acknowledging an interrupt
+ * and a result returning.)
+ * * When CPT()_VQ()_DONE_ENA_W1S[DONE] = 0, interrupts are not sent,
+ * but the counting described above still occurs.
+ * Since CPT instructions complete out-of-order, if software is using
+ * completion interrupts the suggested scheme is to request a DONEINT on
+ * each request, and when an interrupt arrives perform a "greedy" scan for
+ * completions; even if a later command is acknowledged first this will
+ * not result in missing a completion.
+ * Software is responsible for making sure [DONE] does not overflow;
+ * for example by insuring there are not more than 2^20-1 instructions in
+ * flight that may request interrupts.
+ *
+ */
+union otx_cptx_vqx_done {
+ u64 u;
+ struct otx_cptx_vqx_done_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+ u64 reserved_20_63:44;
+ u64 done:20;
+#else /* Word 0 - Little Endian */
+ u64 done:20;
+ u64 reserved_20_63:44;
+#endif /* Word 0 - End */
+ } s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_done_wait
+ *
+ * CPT Queue Done Interrupt Coalescing Wait Registers
+ * Specifies the per queue interrupt coalescing settings.
+ * cptx_vqx_done_wait_s
+ * Word0
+ * reserved_48_63:16 [63:48] Reserved.
+ * time_wait:16; [47:32](R/W) Time hold-off. When CPT()_VQ()_DONE[DONE] = 0
+ * or CPT()_VQ()_DONE_ACK is written a timer is cleared. When the timer
+ * reaches [TIME_WAIT]*1024 then interrupt coalescing ends.
+ * see CPT()_VQ()_DONE[DONE]. If 0x0, time coalescing is disabled.
+ * reserved_20_31:12 [31:20] Reserved.
+ * num_wait:20 [19:0](R/W) Number of messages hold-off.
+ * When CPT()_VQ()_DONE[DONE] >= [NUM_WAIT] then interrupt coalescing ends
+ * see CPT()_VQ()_DONE[DONE]. If 0x0, same behavior as 0x1.
+ *
+ */
+union otx_cptx_vqx_done_wait {
+ u64 u;
+ struct otx_cptx_vqx_done_wait_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+ u64 reserved_48_63:16;
+ u64 time_wait:16;
+ u64 reserved_20_31:12;
+ u64 num_wait:20;
+#else /* Word 0 - Little Endian */
+ u64 num_wait:20;
+ u64 reserved_20_31:12;
+ u64 time_wait:16;
+ u64 reserved_48_63:16;
+#endif /* Word 0 - End */
+ } s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_done_ena_w1s
+ *
+ * CPT Queue Done Interrupt Enable Set Registers
+ * Write 1 to these registers will enable the DONEINT interrupt for the queue.
+ * cptx_vqx_done_ena_w1s_s
+ * Word0
+ * reserved_1_63:63 [63:1] Reserved.
+ * done:1 [0:0](R/W1S/H) Write 1 will enable DONEINT for this queue.
+ * Write 0 has no effect. Read will return the enable bit.
+ */
+union otx_cptx_vqx_done_ena_w1s {
+ u64 u;
+ struct otx_cptx_vqx_done_ena_w1s_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+ u64 reserved_1_63:63;
+ u64 done:1;
+#else /* Word 0 - Little Endian */
+ u64 done:1;
+ u64 reserved_1_63:63;
+#endif /* Word 0 - End */
+ } s;
+};
+
+/*
+ * Register (NCB) otx_cpt#_vq#_ctl
+ *
+ * CPT VF Queue Control Registers
+ * This register configures queues. This register should be changed (other than
+ * clearing [ENA]) only when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]).
+ * cptx_vqx_ctl_s
+ * Word0
+ * reserved_1_63:63 [63:1] Reserved.
+ * ena:1 [0:0](R/W/H) Enables the logical instruction queue.
+ * See also CPT()_PF_Q()_CTL[CONT_ERR] and CPT()_VQ()_INPROG[INFLIGHT].
+ * 1 = Queue is enabled.
+ * 0 = Queue is disabled.
+ */
+union otx_cptx_vqx_ctl {
+ u64 u;
+ struct otx_cptx_vqx_ctl_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+ u64 reserved_1_63:63;
+ u64 ena:1;
+#else /* Word 0 - Little Endian */
+ u64 ena:1;
+ u64 reserved_1_63:63;
+#endif /* Word 0 - End */
+ } s;
+};
+
+/*
+ * Error Address/Error Codes
+ *
+ * In the event of a severe error, microcode writes an 8-byte Error Code
+ * value (ECODE) to host memory at the Rptr address specified by the host
+ * system (in the 64-byte request).
+ *
+ * Word0
+ * [63:56](R) 8-bit completion code
+ * [55:48](R) Number of the core that reported the severe error
+ * [47:0] Lower 6 bytes of M-Inst word2. Used to assist in uniquely
+ * identifying which specific instruction caused the error. This assumes
+ * that each instruction has a unique result location (RPTR), at least
+ * for a given period of time.
+ */
+union otx_cpt_error_code {
+ u64 u;
+ struct otx_cpt_error_code_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */
+ uint64_t ccode:8;
+ uint64_t coreid:8;
+ uint64_t rptr6:48;
+#else /* Word 0 - Little Endian */
+ uint64_t rptr6:48;
+ uint64_t coreid:8;
+ uint64_t ccode:8;
+#endif /* Word 0 - End */
+ } s;
+};
+
+#endif /*__OTX_CPT_HW_TYPES_H */
diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf.h b/drivers/crypto/marvell/octeontx/otx_cptpf.h
new file mode 100644
index 000000000000..73cd0a9bc563
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptpf.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __OTX_CPTPF_H
+#define __OTX_CPTPF_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include "otx_cptpf_ucode.h"
+
+/*
+ * OcteonTX CPT device structure
+ */
+struct otx_cpt_device {
+ void __iomem *reg_base; /* Register start address */
+ struct pci_dev *pdev; /* Pci device handle */
+ struct otx_cpt_eng_grps eng_grps;/* Engine groups information */
+ struct list_head list;
+ u8 pf_type; /* PF type SE or AE */
+ u8 max_vfs; /* Maximum number of VFs supported by the CPT */
+ u8 vfs_enabled; /* Number of enabled VFs */
+};
+
+void otx_cpt_mbox_intr_handler(struct otx_cpt_device *cpt, int mbx);
+void otx_cpt_disable_all_cores(struct otx_cpt_device *cpt);
+
+#endif /* __OTX_CPTPF_H */
diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_main.c b/drivers/crypto/marvell/octeontx/otx_cptpf_main.c
new file mode 100644
index 000000000000..200fb3303db0
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptpf_main.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "otx_cpt_common.h"
+#include "otx_cptpf.h"
+
+#define DRV_NAME "octeontx-cpt"
+#define DRV_VERSION "1.0"
+
+static void otx_cpt_disable_mbox_interrupts(struct otx_cpt_device *cpt)
+{
+ /* Disable mbox(0) interrupts for all VFs */
+ writeq(~0ull, cpt->reg_base + OTX_CPT_PF_MBOX_ENA_W1CX(0));
+}
+
+static void otx_cpt_enable_mbox_interrupts(struct otx_cpt_device *cpt)
+{
+ /* Enable mbox(0) interrupts for all VFs */
+ writeq(~0ull, cpt->reg_base + OTX_CPT_PF_MBOX_ENA_W1SX(0));
+}
+
+static irqreturn_t otx_cpt_mbx0_intr_handler(int __always_unused irq,
+ void *cpt)
+{
+ otx_cpt_mbox_intr_handler(cpt, 0);
+
+ return IRQ_HANDLED;
+}
+
+static void otx_cpt_reset(struct otx_cpt_device *cpt)
+{
+ writeq(1, cpt->reg_base + OTX_CPT_PF_RESET);
+}
+
+static void otx_cpt_find_max_enabled_cores(struct otx_cpt_device *cpt)
+{
+ union otx_cptx_pf_constants pf_cnsts = {0};
+
+ pf_cnsts.u = readq(cpt->reg_base + OTX_CPT_PF_CONSTANTS);
+ cpt->eng_grps.avail.max_se_cnt = pf_cnsts.s.se;
+ cpt->eng_grps.avail.max_ae_cnt = pf_cnsts.s.ae;
+}
+
+static u32 otx_cpt_check_bist_status(struct otx_cpt_device *cpt)
+{
+ union otx_cptx_pf_bist_status bist_sts = {0};
+
+ bist_sts.u = readq(cpt->reg_base + OTX_CPT_PF_BIST_STATUS);
+ return bist_sts.u;
+}
+
+static u64 otx_cpt_check_exe_bist_status(struct otx_cpt_device *cpt)
+{
+ union otx_cptx_pf_exe_bist_status bist_sts = {0};
+
+ bist_sts.u = readq(cpt->reg_base + OTX_CPT_PF_EXE_BIST_STATUS);
+ return bist_sts.u;
+}
+
+static int otx_cpt_device_init(struct otx_cpt_device *cpt)
+{
+ struct device *dev = &cpt->pdev->dev;
+ u16 sdevid;
+ u64 bist;
+
+ /* Reset the PF when probed first */
+ otx_cpt_reset(cpt);
+ mdelay(100);
+
+ pci_read_config_word(cpt->pdev, PCI_SUBSYSTEM_ID, &sdevid);
+
+ /* Check BIST status */
+ bist = (u64)otx_cpt_check_bist_status(cpt);
+ if (bist) {
+ dev_err(dev, "RAM BIST failed with code 0x%llx", bist);
+ return -ENODEV;
+ }
+
+ bist = otx_cpt_check_exe_bist_status(cpt);
+ if (bist) {
+ dev_err(dev, "Engine BIST failed with code 0x%llx", bist);
+ return -ENODEV;
+ }
+
+ /* Get max enabled cores */
+ otx_cpt_find_max_enabled_cores(cpt);
+
+ if ((sdevid == OTX_CPT_PCI_PF_SUBSYS_ID) &&
+ (cpt->eng_grps.avail.max_se_cnt == 0)) {
+ cpt->pf_type = OTX_CPT_AE;
+ } else if ((sdevid == OTX_CPT_PCI_PF_SUBSYS_ID) &&
+ (cpt->eng_grps.avail.max_ae_cnt == 0)) {
+ cpt->pf_type = OTX_CPT_SE;
+ }
+
+ /* Get max VQs/VFs supported by the device */
+ cpt->max_vfs = pci_sriov_get_totalvfs(cpt->pdev);
+
+ /* Disable all cores */
+ otx_cpt_disable_all_cores(cpt);
+
+ return 0;
+}
+
+static int otx_cpt_register_interrupts(struct otx_cpt_device *cpt)
+{
+ struct device *dev = &cpt->pdev->dev;
+ u32 mbox_int_idx = OTX_CPT_PF_MBOX_INT;
+ u32 num_vec = OTX_CPT_PF_MSIX_VECTORS;
+ int ret;
+
+ /* Enable MSI-X */
+ ret = pci_alloc_irq_vectors(cpt->pdev, num_vec, num_vec, PCI_IRQ_MSIX);
+ if (ret < 0) {
+ dev_err(&cpt->pdev->dev,
+ "Request for #%d msix vectors failed\n",
+ num_vec);
+ return ret;
+ }
+
+ /* Register mailbox interrupt handlers */
+ ret = request_irq(pci_irq_vector(cpt->pdev,
+ OTX_CPT_PF_INT_VEC_E_MBOXX(mbox_int_idx, 0)),
+ otx_cpt_mbx0_intr_handler, 0, "CPT Mbox0", cpt);
+ if (ret) {
+ dev_err(dev, "Request irq failed\n");
+ pci_free_irq_vectors(cpt->pdev);
+ return ret;
+ }
+ /* Enable mailbox interrupt */
+ otx_cpt_enable_mbox_interrupts(cpt);
+ return 0;
+}
+
+static void otx_cpt_unregister_interrupts(struct otx_cpt_device *cpt)
+{
+ u32 mbox_int_idx = OTX_CPT_PF_MBOX_INT;
+
+ otx_cpt_disable_mbox_interrupts(cpt);
+ free_irq(pci_irq_vector(cpt->pdev,
+ OTX_CPT_PF_INT_VEC_E_MBOXX(mbox_int_idx, 0)),
+ cpt);
+ pci_free_irq_vectors(cpt->pdev);
+}
+
+
+static int otx_cpt_sriov_configure(struct pci_dev *pdev, int numvfs)
+{
+ struct otx_cpt_device *cpt = pci_get_drvdata(pdev);
+ int ret = 0;
+
+ if (numvfs > cpt->max_vfs)
+ numvfs = cpt->max_vfs;
+
+ if (numvfs > 0) {
+ ret = otx_cpt_try_create_default_eng_grps(cpt->pdev,
+ &cpt->eng_grps,
+ cpt->pf_type);
+ if (ret)
+ return ret;
+
+ cpt->vfs_enabled = numvfs;
+ ret = pci_enable_sriov(pdev, numvfs);
+ if (ret) {
+ cpt->vfs_enabled = 0;
+ return ret;
+ }
+ otx_cpt_set_eng_grps_is_rdonly(&cpt->eng_grps, true);
+ try_module_get(THIS_MODULE);
+ ret = numvfs;
+ } else {
+ pci_disable_sriov(pdev);
+ otx_cpt_set_eng_grps_is_rdonly(&cpt->eng_grps, false);
+ module_put(THIS_MODULE);
+ cpt->vfs_enabled = 0;
+ }
+ dev_notice(&cpt->pdev->dev, "VFs enabled: %d\n", ret);
+
+ return ret;
+}
+
+static int otx_cpt_probe(struct pci_dev *pdev,
+ const struct pci_device_id __always_unused *ent)
+{
+ struct device *dev = &pdev->dev;
+ struct otx_cpt_device *cpt;
+ int err;
+
+ cpt = devm_kzalloc(dev, sizeof(*cpt), GFP_KERNEL);
+ if (!cpt)
+ return -ENOMEM;
+
+ pci_set_drvdata(pdev, cpt);
+ cpt->pdev = pdev;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(dev, "Failed to enable PCI device\n");
+ goto err_clear_drvdata;
+ }
+
+ err = pci_request_regions(pdev, DRV_NAME);
+ if (err) {
+ dev_err(dev, "PCI request regions failed 0x%x\n", err);
+ goto err_disable_device;
+ }
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+ if (err) {
+ dev_err(dev, "Unable to get usable DMA configuration\n");
+ goto err_release_regions;
+ }
+
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
+ if (err) {
+ dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n");
+ goto err_release_regions;
+ }
+
+ /* MAP PF's configuration registers */
+ cpt->reg_base = pci_iomap(pdev, OTX_CPT_PF_PCI_CFG_BAR, 0);
+ if (!cpt->reg_base) {
+ dev_err(dev, "Cannot map config register space, aborting\n");
+ err = -ENOMEM;
+ goto err_release_regions;
+ }
+
+ /* CPT device HW initialization */
+ err = otx_cpt_device_init(cpt);
+ if (err)
+ goto err_unmap_region;
+
+ /* Register interrupts */
+ err = otx_cpt_register_interrupts(cpt);
+ if (err)
+ goto err_unmap_region;
+
+ /* Initialize engine groups */
+ err = otx_cpt_init_eng_grps(pdev, &cpt->eng_grps, cpt->pf_type);
+ if (err)
+ goto err_unregister_interrupts;
+
+ return 0;
+
+err_unregister_interrupts:
+ otx_cpt_unregister_interrupts(cpt);
+err_unmap_region:
+ pci_iounmap(pdev, cpt->reg_base);
+err_release_regions:
+ pci_release_regions(pdev);
+err_disable_device:
+ pci_disable_device(pdev);
+err_clear_drvdata:
+ pci_set_drvdata(pdev, NULL);
+
+ return err;
+}
+
+static void otx_cpt_remove(struct pci_dev *pdev)
+{
+ struct otx_cpt_device *cpt = pci_get_drvdata(pdev);
+
+ if (!cpt)
+ return;
+
+ /* Disable VFs */
+ pci_disable_sriov(pdev);
+ /* Cleanup engine groups */
+ otx_cpt_cleanup_eng_grps(pdev, &cpt->eng_grps);
+ /* Disable CPT PF interrupts */
+ otx_cpt_unregister_interrupts(cpt);
+ /* Disengage SE and AE cores from all groups */
+ otx_cpt_disable_all_cores(cpt);
+ pci_iounmap(pdev, cpt->reg_base);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+}
+
+/* Supported devices */
+static const struct pci_device_id otx_cpt_id_table[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OTX_CPT_PCI_PF_DEVICE_ID) },
+ { 0, } /* end of table */
+};
+
+static struct pci_driver otx_cpt_pci_driver = {
+ .name = DRV_NAME,
+ .id_table = otx_cpt_id_table,
+ .probe = otx_cpt_probe,
+ .remove = otx_cpt_remove,
+ .sriov_configure = otx_cpt_sriov_configure
+};
+
+module_pci_driver(otx_cpt_pci_driver);
+
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_DESCRIPTION("Marvell OcteonTX CPT Physical Function Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, otx_cpt_id_table);
diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c b/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c
new file mode 100644
index 000000000000..a6774232e9a3
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "otx_cpt_common.h"
+#include "otx_cptpf.h"
+
+static char *get_mbox_opcode_str(int msg_opcode)
+{
+ char *str = "Unknown";
+
+ switch (msg_opcode) {
+ case OTX_CPT_MSG_VF_UP:
+ str = "UP";
+ break;
+
+ case OTX_CPT_MSG_VF_DOWN:
+ str = "DOWN";
+ break;
+
+ case OTX_CPT_MSG_READY:
+ str = "READY";
+ break;
+
+ case OTX_CPT_MSG_QLEN:
+ str = "QLEN";
+ break;
+
+ case OTX_CPT_MSG_QBIND_GRP:
+ str = "QBIND_GRP";
+ break;
+
+ case OTX_CPT_MSG_VQ_PRIORITY:
+ str = "VQ_PRIORITY";
+ break;
+
+ case OTX_CPT_MSG_PF_TYPE:
+ str = "PF_TYPE";
+ break;
+
+ case OTX_CPT_MSG_ACK:
+ str = "ACK";
+ break;
+
+ case OTX_CPT_MSG_NACK:
+ str = "NACK";
+ break;
+ }
+
+ return str;
+}
+
+static void dump_mbox_msg(struct otx_cpt_mbox *mbox_msg, int vf_id)
+{
+ char raw_data_str[OTX_CPT_MAX_MBOX_DATA_STR_SIZE];
+
+ hex_dump_to_buffer(mbox_msg, sizeof(struct otx_cpt_mbox), 16, 8,
+ raw_data_str, OTX_CPT_MAX_MBOX_DATA_STR_SIZE, false);
+ if (vf_id >= 0)
+ pr_debug("MBOX opcode %s received from VF%d raw_data %s",
+ get_mbox_opcode_str(mbox_msg->msg), vf_id,
+ raw_data_str);
+ else
+ pr_debug("MBOX opcode %s received from PF raw_data %s",
+ get_mbox_opcode_str(mbox_msg->msg), raw_data_str);
+}
+
+static void otx_cpt_send_msg_to_vf(struct otx_cpt_device *cpt, int vf,
+ struct otx_cpt_mbox *mbx)
+{
+ /* Writing mbox(0) causes interrupt */
+ writeq(mbx->data, cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 1));
+ writeq(mbx->msg, cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 0));
+}
+
+/*
+ * ACKs VF's mailbox message
+ * @vf: VF to which ACK to be sent
+ */
+static void otx_cpt_mbox_send_ack(struct otx_cpt_device *cpt, int vf,
+ struct otx_cpt_mbox *mbx)
+{
+ mbx->data = 0ull;
+ mbx->msg = OTX_CPT_MSG_ACK;
+ otx_cpt_send_msg_to_vf(cpt, vf, mbx);
+}
+
+/* NACKs VF's mailbox message that PF is not able to complete the action */
+static void otx_cptpf_mbox_send_nack(struct otx_cpt_device *cpt, int vf,
+ struct otx_cpt_mbox *mbx)
+{
+ mbx->data = 0ull;
+ mbx->msg = OTX_CPT_MSG_NACK;
+ otx_cpt_send_msg_to_vf(cpt, vf, mbx);
+}
+
+static void otx_cpt_clear_mbox_intr(struct otx_cpt_device *cpt, u32 vf)
+{
+ /* W1C for the VF */
+ writeq(1ull << vf, cpt->reg_base + OTX_CPT_PF_MBOX_INTX(0));
+}
+
+/*
+ * Configure QLEN/Chunk sizes for VF
+ */
+static void otx_cpt_cfg_qlen_for_vf(struct otx_cpt_device *cpt, int vf,
+ u32 size)
+{
+ union otx_cptx_pf_qx_ctl pf_qx_ctl;
+
+ pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(vf));
+ pf_qx_ctl.s.size = size;
+ pf_qx_ctl.s.cont_err = true;
+ writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(vf));
+}
+
+/*
+ * Configure VQ priority
+ */
+static void otx_cpt_cfg_vq_priority(struct otx_cpt_device *cpt, int vf, u32 pri)
+{
+ union otx_cptx_pf_qx_ctl pf_qx_ctl;
+
+ pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(vf));
+ pf_qx_ctl.s.pri = pri;
+ writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(vf));
+}
+
+static int otx_cpt_bind_vq_to_grp(struct otx_cpt_device *cpt, u8 q, u8 grp)
+{
+ struct device *dev = &cpt->pdev->dev;
+ struct otx_cpt_eng_grp_info *eng_grp;
+ union otx_cptx_pf_qx_ctl pf_qx_ctl;
+ struct otx_cpt_ucode *ucode;
+
+ if (q >= cpt->max_vfs) {
+ dev_err(dev, "Requested queue %d is > than maximum avail %d",
+ q, cpt->max_vfs);
+ return -EINVAL;
+ }
+
+ if (grp >= OTX_CPT_MAX_ENGINE_GROUPS) {
+ dev_err(dev, "Requested group %d is > than maximum avail %d",
+ grp, OTX_CPT_MAX_ENGINE_GROUPS);
+ return -EINVAL;
+ }
+
+ eng_grp = &cpt->eng_grps.grp[grp];
+ if (!eng_grp->is_enabled) {
+ dev_err(dev, "Requested engine group %d is disabled", grp);
+ return -EINVAL;
+ }
+
+ pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(q));
+ pf_qx_ctl.s.grp = grp;
+ writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(q));
+
+ if (eng_grp->mirror.is_ena)
+ ucode = &eng_grp->g->grp[eng_grp->mirror.idx].ucode[0];
+ else
+ ucode = &eng_grp->ucode[0];
+
+ if (otx_cpt_uc_supports_eng_type(ucode, OTX_CPT_SE_TYPES))
+ return OTX_CPT_SE_TYPES;
+ else if (otx_cpt_uc_supports_eng_type(ucode, OTX_CPT_AE_TYPES))
+ return OTX_CPT_AE_TYPES;
+ else
+ return BAD_OTX_CPTVF_TYPE;
+}
+
+/* Interrupt handler to handle mailbox messages from VFs */
+static void otx_cpt_handle_mbox_intr(struct otx_cpt_device *cpt, int vf)
+{
+ int vftype = 0;
+ struct otx_cpt_mbox mbx = {};
+ struct device *dev = &cpt->pdev->dev;
+ /*
+ * MBOX[0] contains msg
+ * MBOX[1] contains data
+ */
+ mbx.msg = readq(cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 0));
+ mbx.data = readq(cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 1));
+
+ dump_mbox_msg(&mbx, vf);
+
+ switch (mbx.msg) {
+ case OTX_CPT_MSG_VF_UP:
+ mbx.msg = OTX_CPT_MSG_VF_UP;
+ mbx.data = cpt->vfs_enabled;
+ otx_cpt_send_msg_to_vf(cpt, vf, &mbx);
+ break;
+ case OTX_CPT_MSG_READY:
+ mbx.msg = OTX_CPT_MSG_READY;
+ mbx.data = vf;
+ otx_cpt_send_msg_to_vf(cpt, vf, &mbx);
+ break;
+ case OTX_CPT_MSG_VF_DOWN:
+ /* First msg in VF teardown sequence */
+ otx_cpt_mbox_send_ack(cpt, vf, &mbx);
+ break;
+ case OTX_CPT_MSG_QLEN:
+ otx_cpt_cfg_qlen_for_vf(cpt, vf, mbx.data);
+ otx_cpt_mbox_send_ack(cpt, vf, &mbx);
+ break;
+ case OTX_CPT_MSG_QBIND_GRP:
+ vftype = otx_cpt_bind_vq_to_grp(cpt, vf, (u8)mbx.data);
+ if ((vftype != OTX_CPT_AE_TYPES) &&
+ (vftype != OTX_CPT_SE_TYPES)) {
+ dev_err(dev, "VF%d binding to eng group %llu failed",
+ vf, mbx.data);
+ otx_cptpf_mbox_send_nack(cpt, vf, &mbx);
+ } else {
+ mbx.msg = OTX_CPT_MSG_QBIND_GRP;
+ mbx.data = vftype;
+ otx_cpt_send_msg_to_vf(cpt, vf, &mbx);
+ }
+ break;
+ case OTX_CPT_MSG_PF_TYPE:
+ mbx.msg = OTX_CPT_MSG_PF_TYPE;
+ mbx.data = cpt->pf_type;
+ otx_cpt_send_msg_to_vf(cpt, vf, &mbx);
+ break;
+ case OTX_CPT_MSG_VQ_PRIORITY:
+ otx_cpt_cfg_vq_priority(cpt, vf, mbx.data);
+ otx_cpt_mbox_send_ack(cpt, vf, &mbx);
+ break;
+ default:
+ dev_err(&cpt->pdev->dev, "Invalid msg from VF%d, msg 0x%llx\n",
+ vf, mbx.msg);
+ break;
+ }
+}
+
+void otx_cpt_mbox_intr_handler (struct otx_cpt_device *cpt, int mbx)
+{
+ u64 intr;
+ u8 vf;
+
+ intr = readq(cpt->reg_base + OTX_CPT_PF_MBOX_INTX(0));
+ pr_debug("PF interrupt mbox%d mask 0x%llx\n", mbx, intr);
+ for (vf = 0; vf < cpt->max_vfs; vf++) {
+ if (intr & (1ULL << vf)) {
+ otx_cpt_handle_mbox_intr(cpt, vf);
+ otx_cpt_clear_mbox_intr(cpt, vf);
+ }
+ }
+}
diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c
new file mode 100644
index 000000000000..d04baa319592
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c
@@ -0,0 +1,1686 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ctype.h>
+#include <linux/firmware.h>
+#include "otx_cpt_common.h"
+#include "otx_cptpf_ucode.h"
+#include "otx_cptpf.h"
+
+#define CSR_DELAY 30
+/* Tar archive defines */
+#define TAR_MAGIC "ustar"
+#define TAR_MAGIC_LEN 6
+#define TAR_BLOCK_LEN 512
+#define REGTYPE '0'
+#define AREGTYPE '\0'
+
+/* tar header as defined in POSIX 1003.1-1990. */
+struct tar_hdr_t {
+ char name[100];
+ char mode[8];
+ char uid[8];
+ char gid[8];
+ char size[12];
+ char mtime[12];
+ char chksum[8];
+ char typeflag;
+ char linkname[100];
+ char magic[6];
+ char version[2];
+ char uname[32];
+ char gname[32];
+ char devmajor[8];
+ char devminor[8];
+ char prefix[155];
+};
+
+struct tar_blk_t {
+ union {
+ struct tar_hdr_t hdr;
+ char block[TAR_BLOCK_LEN];
+ };
+};
+
+struct tar_arch_info_t {
+ struct list_head ucodes;
+ const struct firmware *fw;
+};
+
+static struct otx_cpt_bitmap get_cores_bmap(struct device *dev,
+ struct otx_cpt_eng_grp_info *eng_grp)
+{
+ struct otx_cpt_bitmap bmap = { {0} };
+ bool found = false;
+ int i;
+
+ if (eng_grp->g->engs_num > OTX_CPT_MAX_ENGINES) {
+ dev_err(dev, "unsupported number of engines %d on octeontx",
+ eng_grp->g->engs_num);
+ return bmap;
+ }
+
+ for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) {
+ if (eng_grp->engs[i].type) {
+ bitmap_or(bmap.bits, bmap.bits,
+ eng_grp->engs[i].bmap,
+ eng_grp->g->engs_num);
+ bmap.size = eng_grp->g->engs_num;
+ found = true;
+ }
+ }
+
+ if (!found)
+ dev_err(dev, "No engines reserved for engine group %d",
+ eng_grp->idx);
+ return bmap;
+}
+
+static int is_eng_type(int val, int eng_type)
+{
+ return val & (1 << eng_type);
+}
+
+static int dev_supports_eng_type(struct otx_cpt_eng_grps *eng_grps,
+ int eng_type)
+{
+ return is_eng_type(eng_grps->eng_types_supported, eng_type);
+}
+
+static void set_ucode_filename(struct otx_cpt_ucode *ucode,
+ const char *filename)
+{
+ strlcpy(ucode->filename, filename, OTX_CPT_UCODE_NAME_LENGTH);
+}
+
+static char *get_eng_type_str(int eng_type)
+{
+ char *str = "unknown";
+
+ switch (eng_type) {
+ case OTX_CPT_SE_TYPES:
+ str = "SE";
+ break;
+
+ case OTX_CPT_AE_TYPES:
+ str = "AE";
+ break;
+ }
+ return str;
+}
+
+static char *get_ucode_type_str(int ucode_type)
+{
+ char *str = "unknown";
+
+ switch (ucode_type) {
+ case (1 << OTX_CPT_SE_TYPES):
+ str = "SE";
+ break;
+
+ case (1 << OTX_CPT_AE_TYPES):
+ str = "AE";
+ break;
+ }
+ return str;
+}
+
+static int get_ucode_type(struct otx_cpt_ucode_hdr *ucode_hdr, int *ucode_type)
+{
+ char tmp_ver_str[OTX_CPT_UCODE_VER_STR_SZ];
+ u32 i, val = 0;
+ u8 nn;
+
+ strlcpy(tmp_ver_str, ucode_hdr->ver_str, OTX_CPT_UCODE_VER_STR_SZ);
+ for (i = 0; i < strlen(tmp_ver_str); i++)
+ tmp_ver_str[i] = tolower(tmp_ver_str[i]);
+
+ nn = ucode_hdr->ver_num.nn;
+ if (strnstr(tmp_ver_str, "se-", OTX_CPT_UCODE_VER_STR_SZ) &&
+ (nn == OTX_CPT_SE_UC_TYPE1 || nn == OTX_CPT_SE_UC_TYPE2 ||
+ nn == OTX_CPT_SE_UC_TYPE3))
+ val |= 1 << OTX_CPT_SE_TYPES;
+ if (strnstr(tmp_ver_str, "ae", OTX_CPT_UCODE_VER_STR_SZ) &&
+ nn == OTX_CPT_AE_UC_TYPE)
+ val |= 1 << OTX_CPT_AE_TYPES;
+
+ *ucode_type = val;
+
+ if (!val)
+ return -EINVAL;
+ if (is_eng_type(val, OTX_CPT_AE_TYPES) &&
+ is_eng_type(val, OTX_CPT_SE_TYPES))
+ return -EINVAL;
+ return 0;
+}
+
+static int is_mem_zero(const char *ptr, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++) {
+ if (ptr[i])
+ return 0;
+ }
+ return 1;
+}
+
+static int cpt_set_ucode_base(struct otx_cpt_eng_grp_info *eng_grp, void *obj)
+{
+ struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj;
+ dma_addr_t dma_addr;
+ struct otx_cpt_bitmap bmap;
+ int i;
+
+ bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp);
+ if (!bmap.size)
+ return -EINVAL;
+
+ if (eng_grp->mirror.is_ena)
+ dma_addr =
+ eng_grp->g->grp[eng_grp->mirror.idx].ucode[0].align_dma;
+ else
+ dma_addr = eng_grp->ucode[0].align_dma;
+
+ /*
+ * Set UCODE_BASE only for the cores which are not used,
+ * other cores should have already valid UCODE_BASE set
+ */
+ for_each_set_bit(i, bmap.bits, bmap.size)
+ if (!eng_grp->g->eng_ref_cnt[i])
+ writeq((u64) dma_addr, cpt->reg_base +
+ OTX_CPT_PF_ENGX_UCODE_BASE(i));
+ return 0;
+}
+
+static int cpt_detach_and_disable_cores(struct otx_cpt_eng_grp_info *eng_grp,
+ void *obj)
+{
+ struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj;
+ struct otx_cpt_bitmap bmap = { {0} };
+ int timeout = 10;
+ int i, busy;
+ u64 reg;
+
+ bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp);
+ if (!bmap.size)
+ return -EINVAL;
+
+ /* Detach the cores from group */
+ reg = readq(cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx));
+ for_each_set_bit(i, bmap.bits, bmap.size) {
+ if (reg & (1ull << i)) {
+ eng_grp->g->eng_ref_cnt[i]--;
+ reg &= ~(1ull << i);
+ }
+ }
+ writeq(reg, cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx));
+
+ /* Wait for cores to become idle */
+ do {
+ busy = 0;
+ usleep_range(10000, 20000);
+ if (timeout-- < 0)
+ return -EBUSY;
+
+ reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY);
+ for_each_set_bit(i, bmap.bits, bmap.size)
+ if (reg & (1ull << i)) {
+ busy = 1;
+ break;
+ }
+ } while (busy);
+
+ /* Disable the cores only if they are not used anymore */
+ reg = readq(cpt->reg_base + OTX_CPT_PF_EXE_CTL);
+ for_each_set_bit(i, bmap.bits, bmap.size)
+ if (!eng_grp->g->eng_ref_cnt[i])
+ reg &= ~(1ull << i);
+ writeq(reg, cpt->reg_base + OTX_CPT_PF_EXE_CTL);
+
+ return 0;
+}
+
+static int cpt_attach_and_enable_cores(struct otx_cpt_eng_grp_info *eng_grp,
+ void *obj)
+{
+ struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj;
+ struct otx_cpt_bitmap bmap;
+ u64 reg;
+ int i;
+
+ bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp);
+ if (!bmap.size)
+ return -EINVAL;
+
+ /* Attach the cores to the group */
+ reg = readq(cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx));
+ for_each_set_bit(i, bmap.bits, bmap.size) {
+ if (!(reg & (1ull << i))) {
+ eng_grp->g->eng_ref_cnt[i]++;
+ reg |= 1ull << i;
+ }
+ }
+ writeq(reg, cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx));
+
+ /* Enable the cores */
+ reg = readq(cpt->reg_base + OTX_CPT_PF_EXE_CTL);
+ for_each_set_bit(i, bmap.bits, bmap.size)
+ reg |= 1ull << i;
+ writeq(reg, cpt->reg_base + OTX_CPT_PF_EXE_CTL);
+
+ return 0;
+}
+
+static int process_tar_file(struct device *dev,
+ struct tar_arch_info_t *tar_arch, char *filename,
+ const u8 *data, u32 size)
+{
+ struct tar_ucode_info_t *tar_info;
+ struct otx_cpt_ucode_hdr *ucode_hdr;
+ int ucode_type, ucode_size;
+
+ /*
+ * If size is less than microcode header size then don't report
+ * an error because it might not be microcode file, just process
+ * next file from archive
+ */
+ if (size < sizeof(struct otx_cpt_ucode_hdr))
+ return 0;
+
+ ucode_hdr = (struct otx_cpt_ucode_hdr *) data;
+ /*
+ * If microcode version can't be found don't report an error
+ * because it might not be microcode file, just process next file
+ */
+ if (get_ucode_type(ucode_hdr, &ucode_type))
+ return 0;
+
+ ucode_size = ntohl(ucode_hdr->code_length) * 2;
+ if (!ucode_size || (size < round_up(ucode_size, 16) +
+ sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) {
+ dev_err(dev, "Ucode %s invalid size", filename);
+ return -EINVAL;
+ }
+
+ tar_info = kzalloc(sizeof(struct tar_ucode_info_t), GFP_KERNEL);
+ if (!tar_info)
+ return -ENOMEM;
+
+ tar_info->ucode_ptr = data;
+ set_ucode_filename(&tar_info->ucode, filename);
+ memcpy(tar_info->ucode.ver_str, ucode_hdr->ver_str,
+ OTX_CPT_UCODE_VER_STR_SZ);
+ tar_info->ucode.ver_num = ucode_hdr->ver_num;
+ tar_info->ucode.type = ucode_type;
+ tar_info->ucode.size = ucode_size;
+ list_add_tail(&tar_info->list, &tar_arch->ucodes);
+
+ return 0;
+}
+
+static void release_tar_archive(struct tar_arch_info_t *tar_arch)
+{
+ struct tar_ucode_info_t *curr, *temp;
+
+ if (!tar_arch)
+ return;
+
+ list_for_each_entry_safe(curr, temp, &tar_arch->ucodes, list) {
+ list_del(&curr->list);
+ kfree(curr);
+ }
+
+ if (tar_arch->fw)
+ release_firmware(tar_arch->fw);
+ kfree(tar_arch);
+}
+
+static struct tar_ucode_info_t *get_uc_from_tar_archive(
+ struct tar_arch_info_t *tar_arch,
+ int ucode_type)
+{
+ struct tar_ucode_info_t *curr, *uc_found = NULL;
+
+ list_for_each_entry(curr, &tar_arch->ucodes, list) {
+ if (!is_eng_type(curr->ucode.type, ucode_type))
+ continue;
+
+ if (!uc_found) {
+ uc_found = curr;
+ continue;
+ }
+
+ switch (ucode_type) {
+ case OTX_CPT_AE_TYPES:
+ break;
+
+ case OTX_CPT_SE_TYPES:
+ if (uc_found->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE2 ||
+ (uc_found->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE3
+ && curr->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE1))
+ uc_found = curr;
+ break;
+ }
+ }
+
+ return uc_found;
+}
+
+static void print_tar_dbg_info(struct tar_arch_info_t *tar_arch,
+ char *tar_filename)
+{
+ struct tar_ucode_info_t *curr;
+
+ pr_debug("Tar archive filename %s", tar_filename);
+ pr_debug("Tar archive pointer %p, size %ld", tar_arch->fw->data,
+ tar_arch->fw->size);
+ list_for_each_entry(curr, &tar_arch->ucodes, list) {
+ pr_debug("Ucode filename %s", curr->ucode.filename);
+ pr_debug("Ucode version string %s", curr->ucode.ver_str);
+ pr_debug("Ucode version %d.%d.%d.%d",
+ curr->ucode.ver_num.nn, curr->ucode.ver_num.xx,
+ curr->ucode.ver_num.yy, curr->ucode.ver_num.zz);
+ pr_debug("Ucode type (%d) %s", curr->ucode.type,
+ get_ucode_type_str(curr->ucode.type));
+ pr_debug("Ucode size %d", curr->ucode.size);
+ pr_debug("Ucode ptr %p\n", curr->ucode_ptr);
+ }
+}
+
+static struct tar_arch_info_t *load_tar_archive(struct device *dev,
+ char *tar_filename)
+{
+ struct tar_arch_info_t *tar_arch = NULL;
+ struct tar_blk_t *tar_blk;
+ unsigned int cur_size;
+ size_t tar_offs = 0;
+ size_t tar_size;
+ int ret;
+
+ tar_arch = kzalloc(sizeof(struct tar_arch_info_t), GFP_KERNEL);
+ if (!tar_arch)
+ return NULL;
+
+ INIT_LIST_HEAD(&tar_arch->ucodes);
+
+ /* Load tar archive */
+ ret = request_firmware(&tar_arch->fw, tar_filename, dev);
+ if (ret)
+ goto release_tar_arch;
+
+ if (tar_arch->fw->size < TAR_BLOCK_LEN) {
+ dev_err(dev, "Invalid tar archive %s ", tar_filename);
+ goto release_tar_arch;
+ }
+
+ tar_size = tar_arch->fw->size;
+ tar_blk = (struct tar_blk_t *) tar_arch->fw->data;
+ if (strncmp(tar_blk->hdr.magic, TAR_MAGIC, TAR_MAGIC_LEN - 1)) {
+ dev_err(dev, "Unsupported format of tar archive %s",
+ tar_filename);
+ goto release_tar_arch;
+ }
+
+ while (1) {
+ /* Read current file size */
+ ret = kstrtouint(tar_blk->hdr.size, 8, &cur_size);
+ if (ret)
+ goto release_tar_arch;
+
+ if (tar_offs + cur_size > tar_size ||
+ tar_offs + 2*TAR_BLOCK_LEN > tar_size) {
+ dev_err(dev, "Invalid tar archive %s ", tar_filename);
+ goto release_tar_arch;
+ }
+
+ tar_offs += TAR_BLOCK_LEN;
+ if (tar_blk->hdr.typeflag == REGTYPE ||
+ tar_blk->hdr.typeflag == AREGTYPE) {
+ ret = process_tar_file(dev, tar_arch,
+ tar_blk->hdr.name,
+ &tar_arch->fw->data[tar_offs],
+ cur_size);
+ if (ret)
+ goto release_tar_arch;
+ }
+
+ tar_offs += (cur_size/TAR_BLOCK_LEN) * TAR_BLOCK_LEN;
+ if (cur_size % TAR_BLOCK_LEN)
+ tar_offs += TAR_BLOCK_LEN;
+
+ /* Check for the end of the archive */
+ if (tar_offs + 2*TAR_BLOCK_LEN > tar_size) {
+ dev_err(dev, "Invalid tar archive %s ", tar_filename);
+ goto release_tar_arch;
+ }
+
+ if (is_mem_zero(&tar_arch->fw->data[tar_offs],
+ 2*TAR_BLOCK_LEN))
+ break;
+
+ /* Read next block from tar archive */
+ tar_blk = (struct tar_blk_t *) &tar_arch->fw->data[tar_offs];
+ }
+
+ print_tar_dbg_info(tar_arch, tar_filename);
+ return tar_arch;
+release_tar_arch:
+ release_tar_archive(tar_arch);
+ return NULL;
+}
+
+static struct otx_cpt_engs_rsvd *find_engines_by_type(
+ struct otx_cpt_eng_grp_info *eng_grp,
+ int eng_type)
+{
+ int i;
+
+ for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) {
+ if (!eng_grp->engs[i].type)
+ continue;
+
+ if (eng_grp->engs[i].type == eng_type)
+ return &eng_grp->engs[i];
+ }
+ return NULL;
+}
+
+int otx_cpt_uc_supports_eng_type(struct otx_cpt_ucode *ucode, int eng_type)
+{
+ return is_eng_type(ucode->type, eng_type);
+}
+EXPORT_SYMBOL_GPL(otx_cpt_uc_supports_eng_type);
+
+int otx_cpt_eng_grp_has_eng_type(struct otx_cpt_eng_grp_info *eng_grp,
+ int eng_type)
+{
+ struct otx_cpt_engs_rsvd *engs;
+
+ engs = find_engines_by_type(eng_grp, eng_type);
+
+ return (engs != NULL ? 1 : 0);
+}
+EXPORT_SYMBOL_GPL(otx_cpt_eng_grp_has_eng_type);
+
+static void print_ucode_info(struct otx_cpt_eng_grp_info *eng_grp,
+ char *buf, int size)
+{
+ if (eng_grp->mirror.is_ena) {
+ scnprintf(buf, size, "%s (shared with engine_group%d)",
+ eng_grp->g->grp[eng_grp->mirror.idx].ucode[0].ver_str,
+ eng_grp->mirror.idx);
+ } else {
+ scnprintf(buf, size, "%s", eng_grp->ucode[0].ver_str);
+ }
+}
+
+static void print_engs_info(struct otx_cpt_eng_grp_info *eng_grp,
+ char *buf, int size, int idx)
+{
+ struct otx_cpt_engs_rsvd *mirrored_engs = NULL;
+ struct otx_cpt_engs_rsvd *engs;
+ int len, i;
+
+ buf[0] = '\0';
+ for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) {
+ engs = &eng_grp->engs[i];
+ if (!engs->type)
+ continue;
+ if (idx != -1 && idx != i)
+ continue;
+
+ if (eng_grp->mirror.is_ena)
+ mirrored_engs = find_engines_by_type(
+ &eng_grp->g->grp[eng_grp->mirror.idx],
+ engs->type);
+ if (i > 0 && idx == -1) {
+ len = strlen(buf);
+ scnprintf(buf+len, size-len, ", ");
+ }
+
+ len = strlen(buf);
+ scnprintf(buf+len, size-len, "%d %s ", mirrored_engs ?
+ engs->count + mirrored_engs->count : engs->count,
+ get_eng_type_str(engs->type));
+ if (mirrored_engs) {
+ len = strlen(buf);
+ scnprintf(buf+len, size-len,
+ "(%d shared with engine_group%d) ",
+ engs->count <= 0 ? engs->count +
+ mirrored_engs->count : mirrored_engs->count,
+ eng_grp->mirror.idx);
+ }
+ }
+}
+
+static void print_ucode_dbg_info(struct otx_cpt_ucode *ucode)
+{
+ pr_debug("Ucode info");
+ pr_debug("Ucode version string %s", ucode->ver_str);
+ pr_debug("Ucode version %d.%d.%d.%d", ucode->ver_num.nn,
+ ucode->ver_num.xx, ucode->ver_num.yy, ucode->ver_num.zz);
+ pr_debug("Ucode type %s", get_ucode_type_str(ucode->type));
+ pr_debug("Ucode size %d", ucode->size);
+ pr_debug("Ucode virt address %16.16llx", (u64)ucode->align_va);
+ pr_debug("Ucode phys address %16.16llx\n", ucode->align_dma);
+}
+
+static void cpt_print_engines_mask(struct otx_cpt_eng_grp_info *eng_grp,
+ struct device *dev, char *buf, int size)
+{
+ struct otx_cpt_bitmap bmap;
+ u32 mask[2];
+
+ bmap = get_cores_bmap(dev, eng_grp);
+ if (!bmap.size) {
+ scnprintf(buf, size, "unknown");
+ return;
+ }
+ bitmap_to_arr32(mask, bmap.bits, bmap.size);
+ scnprintf(buf, size, "%8.8x %8.8x", mask[1], mask[0]);
+}
+
+
+static void print_dbg_info(struct device *dev,
+ struct otx_cpt_eng_grps *eng_grps)
+{
+ char engs_info[2*OTX_CPT_UCODE_NAME_LENGTH];
+ struct otx_cpt_eng_grp_info *mirrored_grp;
+ char engs_mask[OTX_CPT_UCODE_NAME_LENGTH];
+ struct otx_cpt_eng_grp_info *grp;
+ struct otx_cpt_engs_rsvd *engs;
+ u32 mask[4];
+ int i, j;
+
+ pr_debug("Engine groups global info");
+ pr_debug("max SE %d, max AE %d",
+ eng_grps->avail.max_se_cnt, eng_grps->avail.max_ae_cnt);
+ pr_debug("free SE %d", eng_grps->avail.se_cnt);
+ pr_debug("free AE %d", eng_grps->avail.ae_cnt);
+
+ for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) {
+ grp = &eng_grps->grp[i];
+ pr_debug("engine_group%d, state %s", i, grp->is_enabled ?
+ "enabled" : "disabled");
+ if (grp->is_enabled) {
+ mirrored_grp = &eng_grps->grp[grp->mirror.idx];
+ pr_debug("Ucode0 filename %s, version %s",
+ grp->mirror.is_ena ?
+ mirrored_grp->ucode[0].filename :
+ grp->ucode[0].filename,
+ grp->mirror.is_ena ?
+ mirrored_grp->ucode[0].ver_str :
+ grp->ucode[0].ver_str);
+ }
+
+ for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) {
+ engs = &grp->engs[j];
+ if (engs->type) {
+ print_engs_info(grp, engs_info,
+ 2*OTX_CPT_UCODE_NAME_LENGTH, j);
+ pr_debug("Slot%d: %s", j, engs_info);
+ bitmap_to_arr32(mask, engs->bmap,
+ eng_grps->engs_num);
+ pr_debug("Mask: %8.8x %8.8x %8.8x %8.8x",
+ mask[3], mask[2], mask[1], mask[0]);
+ } else
+ pr_debug("Slot%d not used", j);
+ }
+ if (grp->is_enabled) {
+ cpt_print_engines_mask(grp, dev, engs_mask,
+ OTX_CPT_UCODE_NAME_LENGTH);
+ pr_debug("Cmask: %s", engs_mask);
+ }
+ }
+}
+
+static int update_engines_avail_count(struct device *dev,
+ struct otx_cpt_engs_available *avail,
+ struct otx_cpt_engs_rsvd *engs, int val)
+{
+ switch (engs->type) {
+ case OTX_CPT_SE_TYPES:
+ avail->se_cnt += val;
+ break;
+
+ case OTX_CPT_AE_TYPES:
+ avail->ae_cnt += val;
+ break;
+
+ default:
+ dev_err(dev, "Invalid engine type %d\n", engs->type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int update_engines_offset(struct device *dev,
+ struct otx_cpt_engs_available *avail,
+ struct otx_cpt_engs_rsvd *engs)
+{
+ switch (engs->type) {
+ case OTX_CPT_SE_TYPES:
+ engs->offset = 0;
+ break;
+
+ case OTX_CPT_AE_TYPES:
+ engs->offset = avail->max_se_cnt;
+ break;
+
+ default:
+ dev_err(dev, "Invalid engine type %d\n", engs->type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int release_engines(struct device *dev, struct otx_cpt_eng_grp_info *grp)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) {
+ if (!grp->engs[i].type)
+ continue;
+
+ if (grp->engs[i].count > 0) {
+ ret = update_engines_avail_count(dev, &grp->g->avail,
+ &grp->engs[i],
+ grp->engs[i].count);
+ if (ret)
+ return ret;
+ }
+
+ grp->engs[i].type = 0;
+ grp->engs[i].count = 0;
+ grp->engs[i].offset = 0;
+ grp->engs[i].ucode = NULL;
+ bitmap_zero(grp->engs[i].bmap, grp->g->engs_num);
+ }
+
+ return 0;
+}
+
+static int do_reserve_engines(struct device *dev,
+ struct otx_cpt_eng_grp_info *grp,
+ struct otx_cpt_engines *req_engs)
+{
+ struct otx_cpt_engs_rsvd *engs = NULL;
+ int i, ret;
+
+ for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) {
+ if (!grp->engs[i].type) {
+ engs = &grp->engs[i];
+ break;
+ }
+ }
+
+ if (!engs)
+ return -ENOMEM;
+
+ engs->type = req_engs->type;
+ engs->count = req_engs->count;
+
+ ret = update_engines_offset(dev, &grp->g->avail, engs);
+ if (ret)
+ return ret;
+
+ if (engs->count > 0) {
+ ret = update_engines_avail_count(dev, &grp->g->avail, engs,
+ -engs->count);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int check_engines_availability(struct device *dev,
+ struct otx_cpt_eng_grp_info *grp,
+ struct otx_cpt_engines *req_eng)
+{
+ int avail_cnt = 0;
+
+ switch (req_eng->type) {
+ case OTX_CPT_SE_TYPES:
+ avail_cnt = grp->g->avail.se_cnt;
+ break;
+
+ case OTX_CPT_AE_TYPES:
+ avail_cnt = grp->g->avail.ae_cnt;
+ break;
+
+ default:
+ dev_err(dev, "Invalid engine type %d\n", req_eng->type);
+ return -EINVAL;
+ }
+
+ if (avail_cnt < req_eng->count) {
+ dev_err(dev,
+ "Error available %s engines %d < than requested %d",
+ get_eng_type_str(req_eng->type),
+ avail_cnt, req_eng->count);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int reserve_engines(struct device *dev, struct otx_cpt_eng_grp_info *grp,
+ struct otx_cpt_engines *req_engs, int req_cnt)
+{
+ int i, ret;
+
+ /* Validate if a number of requested engines is available */
+ for (i = 0; i < req_cnt; i++) {
+ ret = check_engines_availability(dev, grp, &req_engs[i]);
+ if (ret)
+ return ret;
+ }
+
+ /* Reserve requested engines for this engine group */
+ for (i = 0; i < req_cnt; i++) {
+ ret = do_reserve_engines(dev, grp, &req_engs[i]);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static ssize_t eng_grp_info_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ char ucode_info[2*OTX_CPT_UCODE_NAME_LENGTH];
+ char engs_info[2*OTX_CPT_UCODE_NAME_LENGTH];
+ char engs_mask[OTX_CPT_UCODE_NAME_LENGTH];
+ struct otx_cpt_eng_grp_info *eng_grp;
+ int ret;
+
+ eng_grp = container_of(attr, struct otx_cpt_eng_grp_info, info_attr);
+ mutex_lock(&eng_grp->g->lock);
+
+ print_engs_info(eng_grp, engs_info, 2*OTX_CPT_UCODE_NAME_LENGTH, -1);
+ print_ucode_info(eng_grp, ucode_info, 2*OTX_CPT_UCODE_NAME_LENGTH);
+ cpt_print_engines_mask(eng_grp, dev, engs_mask,
+ OTX_CPT_UCODE_NAME_LENGTH);
+ ret = scnprintf(buf, PAGE_SIZE,
+ "Microcode : %s\nEngines: %s\nEngines mask: %s\n",
+ ucode_info, engs_info, engs_mask);
+
+ mutex_unlock(&eng_grp->g->lock);
+ return ret;
+}
+
+static int create_sysfs_eng_grps_info(struct device *dev,
+ struct otx_cpt_eng_grp_info *eng_grp)
+{
+ int ret;
+
+ eng_grp->info_attr.show = eng_grp_info_show;
+ eng_grp->info_attr.store = NULL;
+ eng_grp->info_attr.attr.name = eng_grp->sysfs_info_name;
+ eng_grp->info_attr.attr.mode = 0440;
+ sysfs_attr_init(&eng_grp->info_attr.attr);
+ ret = device_create_file(dev, &eng_grp->info_attr);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void ucode_unload(struct device *dev, struct otx_cpt_ucode *ucode)
+{
+ if (ucode->va) {
+ dma_free_coherent(dev, ucode->size + OTX_CPT_UCODE_ALIGNMENT,
+ ucode->va, ucode->dma);
+ ucode->va = NULL;
+ ucode->align_va = NULL;
+ ucode->dma = 0;
+ ucode->align_dma = 0;
+ ucode->size = 0;
+ }
+
+ memset(&ucode->ver_str, 0, OTX_CPT_UCODE_VER_STR_SZ);
+ memset(&ucode->ver_num, 0, sizeof(struct otx_cpt_ucode_ver_num));
+ set_ucode_filename(ucode, "");
+ ucode->type = 0;
+}
+
+static int copy_ucode_to_dma_mem(struct device *dev,
+ struct otx_cpt_ucode *ucode,
+ const u8 *ucode_data)
+{
+ u32 i;
+
+ /* Allocate DMAable space */
+ ucode->va = dma_alloc_coherent(dev, ucode->size +
+ OTX_CPT_UCODE_ALIGNMENT,
+ &ucode->dma, GFP_KERNEL);
+ if (!ucode->va) {
+ dev_err(dev, "Unable to allocate space for microcode");
+ return -ENOMEM;
+ }
+ ucode->align_va = PTR_ALIGN(ucode->va, OTX_CPT_UCODE_ALIGNMENT);
+ ucode->align_dma = PTR_ALIGN(ucode->dma, OTX_CPT_UCODE_ALIGNMENT);
+
+ memcpy((void *) ucode->align_va, (void *) ucode_data +
+ sizeof(struct otx_cpt_ucode_hdr), ucode->size);
+
+ /* Byte swap 64-bit */
+ for (i = 0; i < (ucode->size / 8); i++)
+ ((u64 *)ucode->align_va)[i] =
+ cpu_to_be64(((u64 *)ucode->align_va)[i]);
+ /* Ucode needs 16-bit swap */
+ for (i = 0; i < (ucode->size / 2); i++)
+ ((u16 *)ucode->align_va)[i] =
+ cpu_to_be16(((u16 *)ucode->align_va)[i]);
+ return 0;
+}
+
+static int ucode_load(struct device *dev, struct otx_cpt_ucode *ucode,
+ const char *ucode_filename)
+{
+ struct otx_cpt_ucode_hdr *ucode_hdr;
+ const struct firmware *fw;
+ int ret;
+
+ set_ucode_filename(ucode, ucode_filename);
+ ret = request_firmware(&fw, ucode->filename, dev);
+ if (ret)
+ return ret;
+
+ ucode_hdr = (struct otx_cpt_ucode_hdr *) fw->data;
+ memcpy(ucode->ver_str, ucode_hdr->ver_str, OTX_CPT_UCODE_VER_STR_SZ);
+ ucode->ver_num = ucode_hdr->ver_num;
+ ucode->size = ntohl(ucode_hdr->code_length) * 2;
+ if (!ucode->size || (fw->size < round_up(ucode->size, 16)
+ + sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) {
+ dev_err(dev, "Ucode %s invalid size", ucode_filename);
+ ret = -EINVAL;
+ goto release_fw;
+ }
+
+ ret = get_ucode_type(ucode_hdr, &ucode->type);
+ if (ret) {
+ dev_err(dev, "Microcode %s unknown type 0x%x", ucode->filename,
+ ucode->type);
+ goto release_fw;
+ }
+
+ ret = copy_ucode_to_dma_mem(dev, ucode, fw->data);
+ if (ret)
+ goto release_fw;
+
+ print_ucode_dbg_info(ucode);
+release_fw:
+ release_firmware(fw);
+ return ret;
+}
+
+static int enable_eng_grp(struct otx_cpt_eng_grp_info *eng_grp,
+ void *obj)
+{
+ int ret;
+
+ ret = cpt_set_ucode_base(eng_grp, obj);
+ if (ret)
+ return ret;
+
+ ret = cpt_attach_and_enable_cores(eng_grp, obj);
+ return ret;
+}
+
+static int disable_eng_grp(struct device *dev,
+ struct otx_cpt_eng_grp_info *eng_grp,
+ void *obj)
+{
+ int i, ret;
+
+ ret = cpt_detach_and_disable_cores(eng_grp, obj);
+ if (ret)
+ return ret;
+
+ /* Unload ucode used by this engine group */
+ ucode_unload(dev, &eng_grp->ucode[0]);
+
+ for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) {
+ if (!eng_grp->engs[i].type)
+ continue;
+
+ eng_grp->engs[i].ucode = &eng_grp->ucode[0];
+ }
+
+ ret = cpt_set_ucode_base(eng_grp, obj);
+
+ return ret;
+}
+
+static void setup_eng_grp_mirroring(struct otx_cpt_eng_grp_info *dst_grp,
+ struct otx_cpt_eng_grp_info *src_grp)
+{
+ /* Setup fields for engine group which is mirrored */
+ src_grp->mirror.is_ena = false;
+ src_grp->mirror.idx = 0;
+ src_grp->mirror.ref_count++;
+
+ /* Setup fields for mirroring engine group */
+ dst_grp->mirror.is_ena = true;
+ dst_grp->mirror.idx = src_grp->idx;
+ dst_grp->mirror.ref_count = 0;
+}
+
+static void remove_eng_grp_mirroring(struct otx_cpt_eng_grp_info *dst_grp)
+{
+ struct otx_cpt_eng_grp_info *src_grp;
+
+ if (!dst_grp->mirror.is_ena)
+ return;
+
+ src_grp = &dst_grp->g->grp[dst_grp->mirror.idx];
+
+ src_grp->mirror.ref_count--;
+ dst_grp->mirror.is_ena = false;
+ dst_grp->mirror.idx = 0;
+ dst_grp->mirror.ref_count = 0;
+}
+
+static void update_requested_engs(struct otx_cpt_eng_grp_info *mirrored_eng_grp,
+ struct otx_cpt_engines *engs, int engs_cnt)
+{
+ struct otx_cpt_engs_rsvd *mirrored_engs;
+ int i;
+
+ for (i = 0; i < engs_cnt; i++) {
+ mirrored_engs = find_engines_by_type(mirrored_eng_grp,
+ engs[i].type);
+ if (!mirrored_engs)
+ continue;
+
+ /*
+ * If mirrored group has this type of engines attached then
+ * there are 3 scenarios possible:
+ * 1) mirrored_engs.count == engs[i].count then all engines
+ * from mirrored engine group will be shared with this engine
+ * group
+ * 2) mirrored_engs.count > engs[i].count then only a subset of
+ * engines from mirrored engine group will be shared with this
+ * engine group
+ * 3) mirrored_engs.count < engs[i].count then all engines
+ * from mirrored engine group will be shared with this group
+ * and additional engines will be reserved for exclusively use
+ * by this engine group
+ */
+ engs[i].count -= mirrored_engs->count;
+ }
+}
+
+static struct otx_cpt_eng_grp_info *find_mirrored_eng_grp(
+ struct otx_cpt_eng_grp_info *grp)
+{
+ struct otx_cpt_eng_grps *eng_grps = grp->g;
+ int i;
+
+ for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) {
+ if (!eng_grps->grp[i].is_enabled)
+ continue;
+ if (eng_grps->grp[i].ucode[0].type)
+ continue;
+ if (grp->idx == i)
+ continue;
+ if (!strncasecmp(eng_grps->grp[i].ucode[0].ver_str,
+ grp->ucode[0].ver_str,
+ OTX_CPT_UCODE_VER_STR_SZ))
+ return &eng_grps->grp[i];
+ }
+
+ return NULL;
+}
+
+static struct otx_cpt_eng_grp_info *find_unused_eng_grp(
+ struct otx_cpt_eng_grps *eng_grps)
+{
+ int i;
+
+ for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) {
+ if (!eng_grps->grp[i].is_enabled)
+ return &eng_grps->grp[i];
+ }
+ return NULL;
+}
+
+static int eng_grp_update_masks(struct device *dev,
+ struct otx_cpt_eng_grp_info *eng_grp)
+{
+ struct otx_cpt_engs_rsvd *engs, *mirrored_engs;
+ struct otx_cpt_bitmap tmp_bmap = { {0} };
+ int i, j, cnt, max_cnt;
+ int bit;
+
+ for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) {
+ engs = &eng_grp->engs[i];
+ if (!engs->type)
+ continue;
+ if (engs->count <= 0)
+ continue;
+
+ switch (engs->type) {
+ case OTX_CPT_SE_TYPES:
+ max_cnt = eng_grp->g->avail.max_se_cnt;
+ break;
+
+ case OTX_CPT_AE_TYPES:
+ max_cnt = eng_grp->g->avail.max_ae_cnt;
+ break;
+
+ default:
+ dev_err(dev, "Invalid engine type %d", engs->type);
+ return -EINVAL;
+ }
+
+ cnt = engs->count;
+ WARN_ON(engs->offset + max_cnt > OTX_CPT_MAX_ENGINES);
+ bitmap_zero(tmp_bmap.bits, eng_grp->g->engs_num);
+ for (j = engs->offset; j < engs->offset + max_cnt; j++) {
+ if (!eng_grp->g->eng_ref_cnt[j]) {
+ bitmap_set(tmp_bmap.bits, j, 1);
+ cnt--;
+ if (!cnt)
+ break;
+ }
+ }
+
+ if (cnt)
+ return -ENOSPC;
+
+ bitmap_copy(engs->bmap, tmp_bmap.bits, eng_grp->g->engs_num);
+ }
+
+ if (!eng_grp->mirror.is_ena)
+ return 0;
+
+ for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) {
+ engs = &eng_grp->engs[i];
+ if (!engs->type)
+ continue;
+
+ mirrored_engs = find_engines_by_type(
+ &eng_grp->g->grp[eng_grp->mirror.idx],
+ engs->type);
+ WARN_ON(!mirrored_engs && engs->count <= 0);
+ if (!mirrored_engs)
+ continue;
+
+ bitmap_copy(tmp_bmap.bits, mirrored_engs->bmap,
+ eng_grp->g->engs_num);
+ if (engs->count < 0) {
+ bit = find_first_bit(mirrored_engs->bmap,
+ eng_grp->g->engs_num);
+ bitmap_clear(tmp_bmap.bits, bit, -engs->count);
+ }
+ bitmap_or(engs->bmap, engs->bmap, tmp_bmap.bits,
+ eng_grp->g->engs_num);
+ }
+ return 0;
+}
+
+static int delete_engine_group(struct device *dev,
+ struct otx_cpt_eng_grp_info *eng_grp)
+{
+ int i, ret;
+
+ if (!eng_grp->is_enabled)
+ return -EINVAL;
+
+ if (eng_grp->mirror.ref_count) {
+ dev_err(dev, "Can't delete engine_group%d as it is used by:",
+ eng_grp->idx);
+ for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) {
+ if (eng_grp->g->grp[i].mirror.is_ena &&
+ eng_grp->g->grp[i].mirror.idx == eng_grp->idx)
+ dev_err(dev, "engine_group%d", i);
+ }
+ return -EINVAL;
+ }
+
+ /* Removing engine group mirroring if enabled */
+ remove_eng_grp_mirroring(eng_grp);
+
+ /* Disable engine group */
+ ret = disable_eng_grp(dev, eng_grp, eng_grp->g->obj);
+ if (ret)
+ return ret;
+
+ /* Release all engines held by this engine group */
+ ret = release_engines(dev, eng_grp);
+ if (ret)
+ return ret;
+
+ device_remove_file(dev, &eng_grp->info_attr);
+ eng_grp->is_enabled = false;
+
+ return 0;
+}
+
+static int validate_1_ucode_scenario(struct device *dev,
+ struct otx_cpt_eng_grp_info *eng_grp,
+ struct otx_cpt_engines *engs, int engs_cnt)
+{
+ int i;
+
+ /* Verify that ucode loaded supports requested engine types */
+ for (i = 0; i < engs_cnt; i++) {
+ if (!otx_cpt_uc_supports_eng_type(&eng_grp->ucode[0],
+ engs[i].type)) {
+ dev_err(dev,
+ "Microcode %s does not support %s engines",
+ eng_grp->ucode[0].filename,
+ get_eng_type_str(engs[i].type));
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static void update_ucode_ptrs(struct otx_cpt_eng_grp_info *eng_grp)
+{
+ struct otx_cpt_ucode *ucode;
+
+ if (eng_grp->mirror.is_ena)
+ ucode = &eng_grp->g->grp[eng_grp->mirror.idx].ucode[0];
+ else
+ ucode = &eng_grp->ucode[0];
+ WARN_ON(!eng_grp->engs[0].type);
+ eng_grp->engs[0].ucode = ucode;
+}
+
+static int create_engine_group(struct device *dev,
+ struct otx_cpt_eng_grps *eng_grps,
+ struct otx_cpt_engines *engs, int engs_cnt,
+ void *ucode_data[], int ucodes_cnt,
+ bool use_uc_from_tar_arch)
+{
+ struct otx_cpt_eng_grp_info *mirrored_eng_grp;
+ struct tar_ucode_info_t *tar_info;
+ struct otx_cpt_eng_grp_info *eng_grp;
+ int i, ret = 0;
+
+ if (ucodes_cnt > OTX_CPT_MAX_ETYPES_PER_GRP)
+ return -EINVAL;
+
+ /* Validate if requested engine types are supported by this device */
+ for (i = 0; i < engs_cnt; i++)
+ if (!dev_supports_eng_type(eng_grps, engs[i].type)) {
+ dev_err(dev, "Device does not support %s engines",
+ get_eng_type_str(engs[i].type));
+ return -EPERM;
+ }
+
+ /* Find engine group which is not used */
+ eng_grp = find_unused_eng_grp(eng_grps);
+ if (!eng_grp) {
+ dev_err(dev, "Error all engine groups are being used");
+ return -ENOSPC;
+ }
+
+ /* Load ucode */
+ for (i = 0; i < ucodes_cnt; i++) {
+ if (use_uc_from_tar_arch) {
+ tar_info = (struct tar_ucode_info_t *) ucode_data[i];
+ eng_grp->ucode[i] = tar_info->ucode;
+ ret = copy_ucode_to_dma_mem(dev, &eng_grp->ucode[i],
+ tar_info->ucode_ptr);
+ } else
+ ret = ucode_load(dev, &eng_grp->ucode[i],
+ (char *) ucode_data[i]);
+ if (ret)
+ goto err_ucode_unload;
+ }
+
+ /* Validate scenario where 1 ucode is used */
+ ret = validate_1_ucode_scenario(dev, eng_grp, engs, engs_cnt);
+ if (ret)
+ goto err_ucode_unload;
+
+ /* Check if this group mirrors another existing engine group */
+ mirrored_eng_grp = find_mirrored_eng_grp(eng_grp);
+ if (mirrored_eng_grp) {
+ /* Setup mirroring */
+ setup_eng_grp_mirroring(eng_grp, mirrored_eng_grp);
+
+ /*
+ * Update count of requested engines because some
+ * of them might be shared with mirrored group
+ */
+ update_requested_engs(mirrored_eng_grp, engs, engs_cnt);
+ }
+
+ /* Reserve engines */
+ ret = reserve_engines(dev, eng_grp, engs, engs_cnt);
+ if (ret)
+ goto err_ucode_unload;
+
+ /* Update ucode pointers used by engines */
+ update_ucode_ptrs(eng_grp);
+
+ /* Update engine masks used by this group */
+ ret = eng_grp_update_masks(dev, eng_grp);
+ if (ret)
+ goto err_release_engs;
+
+ /* Create sysfs entry for engine group info */
+ ret = create_sysfs_eng_grps_info(dev, eng_grp);
+ if (ret)
+ goto err_release_engs;
+
+ /* Enable engine group */
+ ret = enable_eng_grp(eng_grp, eng_grps->obj);
+ if (ret)
+ goto err_release_engs;
+
+ /*
+ * If this engine group mirrors another engine group
+ * then we need to unload ucode as we will use ucode
+ * from mirrored engine group
+ */
+ if (eng_grp->mirror.is_ena)
+ ucode_unload(dev, &eng_grp->ucode[0]);
+
+ eng_grp->is_enabled = true;
+ if (eng_grp->mirror.is_ena)
+ dev_info(dev,
+ "Engine_group%d: reuse microcode %s from group %d",
+ eng_grp->idx, mirrored_eng_grp->ucode[0].ver_str,
+ mirrored_eng_grp->idx);
+ else
+ dev_info(dev, "Engine_group%d: microcode loaded %s",
+ eng_grp->idx, eng_grp->ucode[0].ver_str);
+
+ return 0;
+
+err_release_engs:
+ release_engines(dev, eng_grp);
+err_ucode_unload:
+ ucode_unload(dev, &eng_grp->ucode[0]);
+ return ret;
+}
+
+static ssize_t ucode_load_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct otx_cpt_engines engs[OTX_CPT_MAX_ETYPES_PER_GRP] = { {0} };
+ char *ucode_filename[OTX_CPT_MAX_ETYPES_PER_GRP];
+ char tmp_buf[OTX_CPT_UCODE_NAME_LENGTH] = { 0 };
+ char *start, *val, *err_msg, *tmp;
+ struct otx_cpt_eng_grps *eng_grps;
+ int grp_idx = 0, ret = -EINVAL;
+ bool has_se, has_ie, has_ae;
+ int del_grp_idx = -1;
+ int ucode_idx = 0;
+
+ if (strlen(buf) > OTX_CPT_UCODE_NAME_LENGTH)
+ return -EINVAL;
+
+ eng_grps = container_of(attr, struct otx_cpt_eng_grps, ucode_load_attr);
+ err_msg = "Invalid engine group format";
+ strlcpy(tmp_buf, buf, OTX_CPT_UCODE_NAME_LENGTH);
+ start = tmp_buf;
+
+ has_se = has_ie = has_ae = false;
+
+ for (;;) {
+ val = strsep(&start, ";");
+ if (!val)
+ break;
+ val = strim(val);
+ if (!*val)
+ continue;
+
+ if (!strncasecmp(val, "engine_group", 12)) {
+ if (del_grp_idx != -1)
+ goto err_print;
+ tmp = strim(strsep(&val, ":"));
+ if (!val)
+ goto err_print;
+ if (strlen(tmp) != 13)
+ goto err_print;
+ if (kstrtoint((tmp + 12), 10, &del_grp_idx))
+ goto err_print;
+ val = strim(val);
+ if (strncasecmp(val, "null", 4))
+ goto err_print;
+ if (strlen(val) != 4)
+ goto err_print;
+ } else if (!strncasecmp(val, "se", 2) && strchr(val, ':')) {
+ if (has_se || ucode_idx)
+ goto err_print;
+ tmp = strim(strsep(&val, ":"));
+ if (!val)
+ goto err_print;
+ if (strlen(tmp) != 2)
+ goto err_print;
+ if (kstrtoint(strim(val), 10, &engs[grp_idx].count))
+ goto err_print;
+ engs[grp_idx++].type = OTX_CPT_SE_TYPES;
+ has_se = true;
+ } else if (!strncasecmp(val, "ae", 2) && strchr(val, ':')) {
+ if (has_ae || ucode_idx)
+ goto err_print;
+ tmp = strim(strsep(&val, ":"));
+ if (!val)
+ goto err_print;
+ if (strlen(tmp) != 2)
+ goto err_print;
+ if (kstrtoint(strim(val), 10, &engs[grp_idx].count))
+ goto err_print;
+ engs[grp_idx++].type = OTX_CPT_AE_TYPES;
+ has_ae = true;
+ } else {
+ if (ucode_idx > 1)
+ goto err_print;
+ if (!strlen(val))
+ goto err_print;
+ if (strnstr(val, " ", strlen(val)))
+ goto err_print;
+ ucode_filename[ucode_idx++] = val;
+ }
+ }
+
+ /* Validate input parameters */
+ if (del_grp_idx == -1) {
+ if (!(grp_idx && ucode_idx))
+ goto err_print;
+
+ if (ucode_idx > 1 && grp_idx < 2)
+ goto err_print;
+
+ if (grp_idx > OTX_CPT_MAX_ETYPES_PER_GRP) {
+ err_msg = "Error max 2 engine types can be attached";
+ goto err_print;
+ }
+
+ } else {
+ if (del_grp_idx < 0 ||
+ del_grp_idx >= OTX_CPT_MAX_ENGINE_GROUPS) {
+ dev_err(dev, "Invalid engine group index %d",
+ del_grp_idx);
+ ret = -EINVAL;
+ return ret;
+ }
+
+ if (!eng_grps->grp[del_grp_idx].is_enabled) {
+ dev_err(dev, "Error engine_group%d is not configured",
+ del_grp_idx);
+ ret = -EINVAL;
+ return ret;
+ }
+
+ if (grp_idx || ucode_idx)
+ goto err_print;
+ }
+
+ mutex_lock(&eng_grps->lock);
+
+ if (eng_grps->is_rdonly) {
+ dev_err(dev, "Disable VFs before modifying engine groups\n");
+ ret = -EACCES;
+ goto err_unlock;
+ }
+
+ if (del_grp_idx == -1)
+ /* create engine group */
+ ret = create_engine_group(dev, eng_grps, engs, grp_idx,
+ (void **) ucode_filename,
+ ucode_idx, false);
+ else
+ /* delete engine group */
+ ret = delete_engine_group(dev, &eng_grps->grp[del_grp_idx]);
+ if (ret)
+ goto err_unlock;
+
+ print_dbg_info(dev, eng_grps);
+err_unlock:
+ mutex_unlock(&eng_grps->lock);
+ return ret ? ret : count;
+err_print:
+ dev_err(dev, "%s\n", err_msg);
+
+ return ret;
+}
+
+int otx_cpt_try_create_default_eng_grps(struct pci_dev *pdev,
+ struct otx_cpt_eng_grps *eng_grps,
+ int pf_type)
+{
+ struct tar_ucode_info_t *tar_info[OTX_CPT_MAX_ETYPES_PER_GRP] = { 0 };
+ struct otx_cpt_engines engs[OTX_CPT_MAX_ETYPES_PER_GRP] = { {0} };
+ struct tar_arch_info_t *tar_arch = NULL;
+ char *tar_filename;
+ int i, ret = 0;
+
+ mutex_lock(&eng_grps->lock);
+
+ /*
+ * We don't create engine group for kernel crypto if attempt to create
+ * it was already made (when user enabled VFs for the first time)
+ */
+ if (eng_grps->is_first_try)
+ goto unlock_mutex;
+ eng_grps->is_first_try = true;
+
+ /* We create group for kcrypto only if no groups are configured */
+ for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++)
+ if (eng_grps->grp[i].is_enabled)
+ goto unlock_mutex;
+
+ switch (pf_type) {
+ case OTX_CPT_AE:
+ case OTX_CPT_SE:
+ tar_filename = OTX_CPT_UCODE_TAR_FILE_NAME;
+ break;
+
+ default:
+ dev_err(&pdev->dev, "Unknown PF type %d\n", pf_type);
+ ret = -EINVAL;
+ goto unlock_mutex;
+ }
+
+ tar_arch = load_tar_archive(&pdev->dev, tar_filename);
+ if (!tar_arch)
+ goto unlock_mutex;
+
+ /*
+ * If device supports SE engines and there is SE microcode in tar
+ * archive try to create engine group with SE engines for kernel
+ * crypto functionality (symmetric crypto)
+ */
+ tar_info[0] = get_uc_from_tar_archive(tar_arch, OTX_CPT_SE_TYPES);
+ if (tar_info[0] &&
+ dev_supports_eng_type(eng_grps, OTX_CPT_SE_TYPES)) {
+
+ engs[0].type = OTX_CPT_SE_TYPES;
+ engs[0].count = eng_grps->avail.max_se_cnt;
+
+ ret = create_engine_group(&pdev->dev, eng_grps, engs, 1,
+ (void **) tar_info, 1, true);
+ if (ret)
+ goto release_tar_arch;
+ }
+ /*
+ * If device supports AE engines and there is AE microcode in tar
+ * archive try to create engine group with AE engines for asymmetric
+ * crypto functionality.
+ */
+ tar_info[0] = get_uc_from_tar_archive(tar_arch, OTX_CPT_AE_TYPES);
+ if (tar_info[0] &&
+ dev_supports_eng_type(eng_grps, OTX_CPT_AE_TYPES)) {
+
+ engs[0].type = OTX_CPT_AE_TYPES;
+ engs[0].count = eng_grps->avail.max_ae_cnt;
+
+ ret = create_engine_group(&pdev->dev, eng_grps, engs, 1,
+ (void **) tar_info, 1, true);
+ if (ret)
+ goto release_tar_arch;
+ }
+
+ print_dbg_info(&pdev->dev, eng_grps);
+release_tar_arch:
+ release_tar_archive(tar_arch);
+unlock_mutex:
+ mutex_unlock(&eng_grps->lock);
+ return ret;
+}
+
+void otx_cpt_set_eng_grps_is_rdonly(struct otx_cpt_eng_grps *eng_grps,
+ bool is_rdonly)
+{
+ mutex_lock(&eng_grps->lock);
+
+ eng_grps->is_rdonly = is_rdonly;
+
+ mutex_unlock(&eng_grps->lock);
+}
+
+void otx_cpt_disable_all_cores(struct otx_cpt_device *cpt)
+{
+ int grp, timeout = 100;
+ u64 reg;
+
+ /* Disengage the cores from groups */
+ for (grp = 0; grp < OTX_CPT_MAX_ENGINE_GROUPS; grp++) {
+ writeq(0, cpt->reg_base + OTX_CPT_PF_GX_EN(grp));
+ udelay(CSR_DELAY);
+ }
+
+ reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY);
+ while (reg) {
+ udelay(CSR_DELAY);
+ reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY);
+ if (timeout--) {
+ dev_warn(&cpt->pdev->dev, "Cores still busy");
+ break;
+ }
+ }
+
+ /* Disable the cores */
+ writeq(0, cpt->reg_base + OTX_CPT_PF_EXE_CTL);
+}
+
+void otx_cpt_cleanup_eng_grps(struct pci_dev *pdev,
+ struct otx_cpt_eng_grps *eng_grps)
+{
+ struct otx_cpt_eng_grp_info *grp;
+ int i, j;
+
+ mutex_lock(&eng_grps->lock);
+ if (eng_grps->is_ucode_load_created) {
+ device_remove_file(&pdev->dev,
+ &eng_grps->ucode_load_attr);
+ eng_grps->is_ucode_load_created = false;
+ }
+
+ /* First delete all mirroring engine groups */
+ for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++)
+ if (eng_grps->grp[i].mirror.is_ena)
+ delete_engine_group(&pdev->dev, &eng_grps->grp[i]);
+
+ /* Delete remaining engine groups */
+ for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++)
+ delete_engine_group(&pdev->dev, &eng_grps->grp[i]);
+
+ /* Release memory */
+ for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) {
+ grp = &eng_grps->grp[i];
+ for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) {
+ kfree(grp->engs[j].bmap);
+ grp->engs[j].bmap = NULL;
+ }
+ }
+
+ mutex_unlock(&eng_grps->lock);
+}
+
+int otx_cpt_init_eng_grps(struct pci_dev *pdev,
+ struct otx_cpt_eng_grps *eng_grps, int pf_type)
+{
+ struct otx_cpt_eng_grp_info *grp;
+ int i, j, ret = 0;
+
+ mutex_init(&eng_grps->lock);
+ eng_grps->obj = pci_get_drvdata(pdev);
+ eng_grps->avail.se_cnt = eng_grps->avail.max_se_cnt;
+ eng_grps->avail.ae_cnt = eng_grps->avail.max_ae_cnt;
+
+ eng_grps->engs_num = eng_grps->avail.max_se_cnt +
+ eng_grps->avail.max_ae_cnt;
+ if (eng_grps->engs_num > OTX_CPT_MAX_ENGINES) {
+ dev_err(&pdev->dev,
+ "Number of engines %d > than max supported %d",
+ eng_grps->engs_num, OTX_CPT_MAX_ENGINES);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) {
+ grp = &eng_grps->grp[i];
+ grp->g = eng_grps;
+ grp->idx = i;
+
+ snprintf(grp->sysfs_info_name, OTX_CPT_UCODE_NAME_LENGTH,
+ "engine_group%d", i);
+ for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) {
+ grp->engs[j].bmap =
+ kcalloc(BITS_TO_LONGS(eng_grps->engs_num),
+ sizeof(long), GFP_KERNEL);
+ if (!grp->engs[j].bmap) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ }
+ }
+
+ switch (pf_type) {
+ case OTX_CPT_SE:
+ /* OcteonTX 83XX SE CPT PF has only SE engines attached */
+ eng_grps->eng_types_supported = 1 << OTX_CPT_SE_TYPES;
+ break;
+
+ case OTX_CPT_AE:
+ /* OcteonTX 83XX AE CPT PF has only AE engines attached */
+ eng_grps->eng_types_supported = 1 << OTX_CPT_AE_TYPES;
+ break;
+
+ default:
+ dev_err(&pdev->dev, "Unknown PF type %d\n", pf_type);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ eng_grps->ucode_load_attr.show = NULL;
+ eng_grps->ucode_load_attr.store = ucode_load_store;
+ eng_grps->ucode_load_attr.attr.name = "ucode_load";
+ eng_grps->ucode_load_attr.attr.mode = 0220;
+ sysfs_attr_init(&eng_grps->ucode_load_attr.attr);
+ ret = device_create_file(&pdev->dev,
+ &eng_grps->ucode_load_attr);
+ if (ret)
+ goto err;
+ eng_grps->is_ucode_load_created = true;
+
+ print_dbg_info(&pdev->dev, eng_grps);
+ return ret;
+err:
+ otx_cpt_cleanup_eng_grps(pdev, eng_grps);
+ return ret;
+}
diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h
new file mode 100644
index 000000000000..14f02b60d0c2
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __OTX_CPTPF_UCODE_H
+#define __OTX_CPTPF_UCODE_H
+
+#include <linux/pci.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include "otx_cpt_hw_types.h"
+
+/* CPT ucode name maximum length */
+#define OTX_CPT_UCODE_NAME_LENGTH 64
+/*
+ * On OcteonTX 83xx platform, only one type of engines is allowed to be
+ * attached to an engine group.
+ */
+#define OTX_CPT_MAX_ETYPES_PER_GRP 1
+
+/* Default tar archive file names */
+#define OTX_CPT_UCODE_TAR_FILE_NAME "cpt8x-mc.tar"
+
+/* CPT ucode alignment */
+#define OTX_CPT_UCODE_ALIGNMENT 128
+
+/* CPT ucode signature size */
+#define OTX_CPT_UCODE_SIGN_LEN 256
+
+/* Microcode version string length */
+#define OTX_CPT_UCODE_VER_STR_SZ 44
+
+/* Maximum number of supported engines/cores on OcteonTX 83XX platform */
+#define OTX_CPT_MAX_ENGINES 64
+
+#define OTX_CPT_ENGS_BITMASK_LEN (OTX_CPT_MAX_ENGINES/(BITS_PER_BYTE * \
+ sizeof(unsigned long)))
+
+/* Microcode types */
+enum otx_cpt_ucode_type {
+ OTX_CPT_AE_UC_TYPE = 1, /* AE-MAIN */
+ OTX_CPT_SE_UC_TYPE1 = 20, /* SE-MAIN - combination of 21 and 22 */
+ OTX_CPT_SE_UC_TYPE2 = 21, /* Fast Path IPSec + AirCrypto */
+ OTX_CPT_SE_UC_TYPE3 = 22, /*
+ * Hash + HMAC + FlexiCrypto + RNG + Full
+ * Feature IPSec + AirCrypto + Kasumi
+ */
+};
+
+struct otx_cpt_bitmap {
+ unsigned long bits[OTX_CPT_ENGS_BITMASK_LEN];
+ int size;
+};
+
+struct otx_cpt_engines {
+ int type;
+ int count;
+};
+
+/* Microcode version number */
+struct otx_cpt_ucode_ver_num {
+ u8 nn;
+ u8 xx;
+ u8 yy;
+ u8 zz;
+};
+
+struct otx_cpt_ucode_hdr {
+ struct otx_cpt_ucode_ver_num ver_num;
+ u8 ver_str[OTX_CPT_UCODE_VER_STR_SZ];
+ u32 code_length;
+ u32 padding[3];
+};
+
+struct otx_cpt_ucode {
+ u8 ver_str[OTX_CPT_UCODE_VER_STR_SZ];/*
+ * ucode version in readable format
+ */
+ struct otx_cpt_ucode_ver_num ver_num;/* ucode version number */
+ char filename[OTX_CPT_UCODE_NAME_LENGTH]; /* ucode filename */
+ dma_addr_t dma; /* phys address of ucode image */
+ dma_addr_t align_dma; /* aligned phys address of ucode image */
+ void *va; /* virt address of ucode image */
+ void *align_va; /* aligned virt address of ucode image */
+ u32 size; /* ucode image size */
+ int type; /* ucode image type SE or AE */
+};
+
+struct tar_ucode_info_t {
+ struct list_head list;
+ struct otx_cpt_ucode ucode;/* microcode information */
+ const u8 *ucode_ptr; /* pointer to microcode in tar archive */
+};
+
+/* Maximum and current number of engines available for all engine groups */
+struct otx_cpt_engs_available {
+ int max_se_cnt;
+ int max_ae_cnt;
+ int se_cnt;
+ int ae_cnt;
+};
+
+/* Engines reserved to an engine group */
+struct otx_cpt_engs_rsvd {
+ int type; /* engine type */
+ int count; /* number of engines attached */
+ int offset; /* constant offset of engine type in the bitmap */
+ unsigned long *bmap; /* attached engines bitmap */
+ struct otx_cpt_ucode *ucode; /* ucode used by these engines */
+};
+
+struct otx_cpt_mirror_info {
+ int is_ena; /*
+ * is mirroring enabled, it is set only for engine
+ * group which mirrors another engine group
+ */
+ int idx; /*
+ * index of engine group which is mirrored by this
+ * group, set only for engine group which mirrors
+ * another group
+ */
+ int ref_count; /*
+ * number of times this engine group is mirrored by
+ * other groups, this is set only for engine group
+ * which is mirrored by other group(s)
+ */
+};
+
+struct otx_cpt_eng_grp_info {
+ struct otx_cpt_eng_grps *g; /* pointer to engine_groups structure */
+ struct device_attribute info_attr; /* group info entry attr */
+ /* engines attached */
+ struct otx_cpt_engs_rsvd engs[OTX_CPT_MAX_ETYPES_PER_GRP];
+ /* Microcode information */
+ struct otx_cpt_ucode ucode[OTX_CPT_MAX_ETYPES_PER_GRP];
+ /* sysfs info entry name */
+ char sysfs_info_name[OTX_CPT_UCODE_NAME_LENGTH];
+ /* engine group mirroring information */
+ struct otx_cpt_mirror_info mirror;
+ int idx; /* engine group index */
+ bool is_enabled; /*
+ * is engine group enabled, engine group is enabled
+ * when it has engines attached and ucode loaded
+ */
+};
+
+struct otx_cpt_eng_grps {
+ struct otx_cpt_eng_grp_info grp[OTX_CPT_MAX_ENGINE_GROUPS];
+ struct device_attribute ucode_load_attr;/* ucode load attr */
+ struct otx_cpt_engs_available avail;
+ struct mutex lock;
+ void *obj;
+ int engs_num; /* total number of engines supported */
+ int eng_types_supported; /* engine types supported SE, AE */
+ u8 eng_ref_cnt[OTX_CPT_MAX_ENGINES];/* engines reference count */
+ bool is_ucode_load_created; /* is ucode_load sysfs entry created */
+ bool is_first_try; /* is this first try to create kcrypto engine grp */
+ bool is_rdonly; /* do engine groups configuration can be modified */
+};
+
+int otx_cpt_init_eng_grps(struct pci_dev *pdev,
+ struct otx_cpt_eng_grps *eng_grps, int pf_type);
+void otx_cpt_cleanup_eng_grps(struct pci_dev *pdev,
+ struct otx_cpt_eng_grps *eng_grps);
+int otx_cpt_try_create_default_eng_grps(struct pci_dev *pdev,
+ struct otx_cpt_eng_grps *eng_grps,
+ int pf_type);
+void otx_cpt_set_eng_grps_is_rdonly(struct otx_cpt_eng_grps *eng_grps,
+ bool is_rdonly);
+int otx_cpt_uc_supports_eng_type(struct otx_cpt_ucode *ucode, int eng_type);
+int otx_cpt_eng_grp_has_eng_type(struct otx_cpt_eng_grp_info *eng_grp,
+ int eng_type);
+
+#endif /* __OTX_CPTPF_UCODE_H */
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf.h b/drivers/crypto/marvell/octeontx/otx_cptvf.h
new file mode 100644
index 000000000000..dd02f21659af
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __OTX_CPTVF_H
+#define __OTX_CPTVF_H
+
+#include <linux/list.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include "otx_cpt_common.h"
+#include "otx_cptvf_reqmgr.h"
+
+/* Flags to indicate the features supported */
+#define OTX_CPT_FLAG_DEVICE_READY BIT(1)
+#define otx_cpt_device_ready(cpt) ((cpt)->flags & OTX_CPT_FLAG_DEVICE_READY)
+/* Default command queue length */
+#define OTX_CPT_CMD_QLEN (4*2046)
+#define OTX_CPT_CMD_QCHUNK_SIZE 1023
+#define OTX_CPT_NUM_QS_PER_VF 1
+
+struct otx_cpt_cmd_chunk {
+ u8 *head;
+ dma_addr_t dma_addr;
+ u32 size; /* Chunk size, max OTX_CPT_INST_CHUNK_MAX_SIZE */
+ struct list_head nextchunk;
+};
+
+struct otx_cpt_cmd_queue {
+ u32 idx; /* Command queue host write idx */
+ u32 num_chunks; /* Number of command chunks */
+ struct otx_cpt_cmd_chunk *qhead;/*
+ * Command queue head, instructions
+ * are inserted here
+ */
+ struct otx_cpt_cmd_chunk *base;
+ struct list_head chead;
+};
+
+struct otx_cpt_cmd_qinfo {
+ u32 qchunksize; /* Command queue chunk size */
+ struct otx_cpt_cmd_queue queue[OTX_CPT_NUM_QS_PER_VF];
+};
+
+struct otx_cpt_pending_qinfo {
+ u32 num_queues; /* Number of queues supported */
+ struct otx_cpt_pending_queue queue[OTX_CPT_NUM_QS_PER_VF];
+};
+
+#define for_each_pending_queue(qinfo, q, i) \
+ for (i = 0, q = &qinfo->queue[i]; i < qinfo->num_queues; i++, \
+ q = &qinfo->queue[i])
+
+struct otx_cptvf_wqe {
+ struct tasklet_struct twork;
+ struct otx_cptvf *cptvf;
+};
+
+struct otx_cptvf_wqe_info {
+ struct otx_cptvf_wqe vq_wqe[OTX_CPT_NUM_QS_PER_VF];
+};
+
+struct otx_cptvf {
+ u16 flags; /* Flags to hold device status bits */
+ u8 vfid; /* Device Index 0...OTX_CPT_MAX_VF_NUM */
+ u8 num_vfs; /* Number of enabled VFs */
+ u8 vftype; /* VF type of SE_TYPE(2) or AE_TYPE(1) */
+ u8 vfgrp; /* VF group (0 - 8) */
+ u8 node; /* Operating node: Bits (46:44) in BAR0 address */
+ u8 priority; /*
+ * VF priority ring: 1-High proirity round
+ * robin ring;0-Low priority round robin ring;
+ */
+ struct pci_dev *pdev; /* Pci device handle */
+ void __iomem *reg_base; /* Register start address */
+ void *wqe_info; /* BH worker info */
+ /* MSI-X */
+ cpumask_var_t affinity_mask[OTX_CPT_VF_MSIX_VECTORS];
+ /* Command and Pending queues */
+ u32 qsize;
+ u32 num_queues;
+ struct otx_cpt_cmd_qinfo cqinfo; /* Command queue information */
+ struct otx_cpt_pending_qinfo pqinfo; /* Pending queue information */
+ /* VF-PF mailbox communication */
+ bool pf_acked;
+ bool pf_nacked;
+};
+
+int otx_cptvf_send_vf_up(struct otx_cptvf *cptvf);
+int otx_cptvf_send_vf_down(struct otx_cptvf *cptvf);
+int otx_cptvf_send_vf_to_grp_msg(struct otx_cptvf *cptvf, int group);
+int otx_cptvf_send_vf_priority_msg(struct otx_cptvf *cptvf);
+int otx_cptvf_send_vq_size_msg(struct otx_cptvf *cptvf);
+int otx_cptvf_check_pf_ready(struct otx_cptvf *cptvf);
+void otx_cptvf_handle_mbox_intr(struct otx_cptvf *cptvf);
+void otx_cptvf_write_vq_doorbell(struct otx_cptvf *cptvf, u32 val);
+
+#endif /* __OTX_CPTVF_H */
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
new file mode 100644
index 000000000000..946fb62949b2
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
@@ -0,0 +1,1744 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <crypto/authenc.h>
+#include <crypto/cryptd.h>
+#include <crypto/des.h>
+#include <crypto/internal/aead.h>
+#include <crypto/sha.h>
+#include <crypto/xts.h>
+#include <crypto/scatterwalk.h>
+#include <linux/rtnetlink.h>
+#include <linux/sort.h>
+#include <linux/module.h>
+#include "otx_cptvf.h"
+#include "otx_cptvf_algs.h"
+#include "otx_cptvf_reqmgr.h"
+
+#define CPT_MAX_VF_NUM 64
+/* Size of salt in AES GCM mode */
+#define AES_GCM_SALT_SIZE 4
+/* Size of IV in AES GCM mode */
+#define AES_GCM_IV_SIZE 8
+/* Size of ICV (Integrity Check Value) in AES GCM mode */
+#define AES_GCM_ICV_SIZE 16
+/* Offset of IV in AES GCM mode */
+#define AES_GCM_IV_OFFSET 8
+#define CONTROL_WORD_LEN 8
+#define KEY2_OFFSET 48
+#define DMA_MODE_FLAG(dma_mode) \
+ (((dma_mode) == OTX_CPT_DMA_GATHER_SCATTER) ? (1 << 7) : 0)
+
+/* Truncated SHA digest size */
+#define SHA1_TRUNC_DIGEST_SIZE 12
+#define SHA256_TRUNC_DIGEST_SIZE 16
+#define SHA384_TRUNC_DIGEST_SIZE 24
+#define SHA512_TRUNC_DIGEST_SIZE 32
+
+static DEFINE_MUTEX(mutex);
+static int is_crypto_registered;
+
+struct cpt_device_desc {
+ enum otx_cptpf_type pf_type;
+ struct pci_dev *dev;
+ int num_queues;
+};
+
+struct cpt_device_table {
+ atomic_t count;
+ struct cpt_device_desc desc[CPT_MAX_VF_NUM];
+};
+
+static struct cpt_device_table se_devices = {
+ .count = ATOMIC_INIT(0)
+};
+
+static struct cpt_device_table ae_devices = {
+ .count = ATOMIC_INIT(0)
+};
+
+static inline int get_se_device(struct pci_dev **pdev, int *cpu_num)
+{
+ int count, ret = 0;
+
+ count = atomic_read(&se_devices.count);
+ if (count < 1)
+ return -ENODEV;
+
+ *cpu_num = get_cpu();
+
+ if (se_devices.desc[0].pf_type == OTX_CPT_SE) {
+ /*
+ * On OcteonTX platform there is one CPT instruction queue bound
+ * to each VF. We get maximum performance if one CPT queue
+ * is available for each cpu otherwise CPT queues need to be
+ * shared between cpus.
+ */
+ if (*cpu_num >= count)
+ *cpu_num %= count;
+ *pdev = se_devices.desc[*cpu_num].dev;
+ } else {
+ pr_err("Unknown PF type %d\n", se_devices.desc[0].pf_type);
+ ret = -EINVAL;
+ }
+ put_cpu();
+
+ return ret;
+}
+
+static inline int validate_hmac_cipher_null(struct otx_cpt_req_info *cpt_req)
+{
+ struct otx_cpt_req_ctx *rctx;
+ struct aead_request *req;
+ struct crypto_aead *tfm;
+
+ req = container_of(cpt_req->areq, struct aead_request, base);
+ tfm = crypto_aead_reqtfm(req);
+ rctx = aead_request_ctx(req);
+ if (memcmp(rctx->fctx.hmac.s.hmac_calc,
+ rctx->fctx.hmac.s.hmac_recv,
+ crypto_aead_authsize(tfm)) != 0)
+ return -EBADMSG;
+
+ return 0;
+}
+
+static void otx_cpt_aead_callback(int status, void *arg1, void *arg2)
+{
+ struct otx_cpt_info_buffer *cpt_info = arg2;
+ struct crypto_async_request *areq = arg1;
+ struct otx_cpt_req_info *cpt_req;
+ struct pci_dev *pdev;
+
+ cpt_req = cpt_info->req;
+ if (!status) {
+ /*
+ * When selected cipher is NULL we need to manually
+ * verify whether calculated hmac value matches
+ * received hmac value
+ */
+ if (cpt_req->req_type == OTX_CPT_AEAD_ENC_DEC_NULL_REQ &&
+ !cpt_req->is_enc)
+ status = validate_hmac_cipher_null(cpt_req);
+ }
+ if (cpt_info) {
+ pdev = cpt_info->pdev;
+ do_request_cleanup(pdev, cpt_info);
+ }
+ if (areq)
+ areq->complete(areq, status);
+}
+
+static void output_iv_copyback(struct crypto_async_request *areq)
+{
+ struct otx_cpt_req_info *req_info;
+ struct skcipher_request *sreq;
+ struct crypto_skcipher *stfm;
+ struct otx_cpt_req_ctx *rctx;
+ struct otx_cpt_enc_ctx *ctx;
+ u32 start, ivsize;
+
+ sreq = container_of(areq, struct skcipher_request, base);
+ stfm = crypto_skcipher_reqtfm(sreq);
+ ctx = crypto_skcipher_ctx(stfm);
+ if (ctx->cipher_type == OTX_CPT_AES_CBC ||
+ ctx->cipher_type == OTX_CPT_DES3_CBC) {
+ rctx = skcipher_request_ctx(sreq);
+ req_info = &rctx->cpt_req;
+ ivsize = crypto_skcipher_ivsize(stfm);
+ start = sreq->cryptlen - ivsize;
+
+ if (req_info->is_enc) {
+ scatterwalk_map_and_copy(sreq->iv, sreq->dst, start,
+ ivsize, 0);
+ } else {
+ if (sreq->src != sreq->dst) {
+ scatterwalk_map_and_copy(sreq->iv, sreq->src,
+ start, ivsize, 0);
+ } else {
+ memcpy(sreq->iv, req_info->iv_out, ivsize);
+ kfree(req_info->iv_out);
+ }
+ }
+ }
+}
+
+static void otx_cpt_skcipher_callback(int status, void *arg1, void *arg2)
+{
+ struct otx_cpt_info_buffer *cpt_info = arg2;
+ struct crypto_async_request *areq = arg1;
+ struct pci_dev *pdev;
+
+ if (areq) {
+ if (!status)
+ output_iv_copyback(areq);
+ if (cpt_info) {
+ pdev = cpt_info->pdev;
+ do_request_cleanup(pdev, cpt_info);
+ }
+ areq->complete(areq, status);
+ }
+}
+
+static inline void update_input_data(struct otx_cpt_req_info *req_info,
+ struct scatterlist *inp_sg,
+ u32 nbytes, u32 *argcnt)
+{
+ req_info->req.dlen += nbytes;
+
+ while (nbytes) {
+ u32 len = min(nbytes, inp_sg->length);
+ u8 *ptr = sg_virt(inp_sg);
+
+ req_info->in[*argcnt].vptr = (void *)ptr;
+ req_info->in[*argcnt].size = len;
+ nbytes -= len;
+ ++(*argcnt);
+ inp_sg = sg_next(inp_sg);
+ }
+}
+
+static inline void update_output_data(struct otx_cpt_req_info *req_info,
+ struct scatterlist *outp_sg,
+ u32 offset, u32 nbytes, u32 *argcnt)
+{
+ req_info->rlen += nbytes;
+
+ while (nbytes) {
+ u32 len = min(nbytes, outp_sg->length - offset);
+ u8 *ptr = sg_virt(outp_sg);
+
+ req_info->out[*argcnt].vptr = (void *) (ptr + offset);
+ req_info->out[*argcnt].size = len;
+ nbytes -= len;
+ ++(*argcnt);
+ offset = 0;
+ outp_sg = sg_next(outp_sg);
+ }
+}
+
+static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc,
+ u32 *argcnt)
+{
+ struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req);
+ struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req);
+ struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+ struct crypto_tfm *tfm = crypto_skcipher_tfm(stfm);
+ struct otx_cpt_enc_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct otx_cpt_fc_ctx *fctx = &rctx->fctx;
+ int ivsize = crypto_skcipher_ivsize(stfm);
+ u32 start = req->cryptlen - ivsize;
+ u64 *ctrl_flags = NULL;
+ gfp_t flags;
+
+ flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+ GFP_KERNEL : GFP_ATOMIC;
+ req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER;
+ req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ;
+
+ req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_FC |
+ DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER);
+ if (enc) {
+ req_info->req.opcode.s.minor = 2;
+ } else {
+ req_info->req.opcode.s.minor = 3;
+ if ((ctx->cipher_type == OTX_CPT_AES_CBC ||
+ ctx->cipher_type == OTX_CPT_DES3_CBC) &&
+ req->src == req->dst) {
+ req_info->iv_out = kmalloc(ivsize, flags);
+ if (!req_info->iv_out)
+ return -ENOMEM;
+
+ scatterwalk_map_and_copy(req_info->iv_out, req->src,
+ start, ivsize, 0);
+ }
+ }
+ /* Encryption data length */
+ req_info->req.param1 = req->cryptlen;
+ /* Authentication data length */
+ req_info->req.param2 = 0;
+
+ fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type;
+ fctx->enc.enc_ctrl.e.aes_key = ctx->key_type;
+ fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_CPTR;
+
+ if (ctx->cipher_type == OTX_CPT_AES_XTS)
+ memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len * 2);
+ else
+ memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len);
+
+ memcpy(fctx->enc.encr_iv, req->iv, crypto_skcipher_ivsize(stfm));
+
+ ctrl_flags = (u64 *)&fctx->enc.enc_ctrl.flags;
+ *ctrl_flags = cpu_to_be64(*ctrl_flags);
+
+ /*
+ * Storing Packet Data Information in offset
+ * Control Word First 8 bytes
+ */
+ req_info->in[*argcnt].vptr = (u8 *)&rctx->ctrl_word;
+ req_info->in[*argcnt].size = CONTROL_WORD_LEN;
+ req_info->req.dlen += CONTROL_WORD_LEN;
+ ++(*argcnt);
+
+ req_info->in[*argcnt].vptr = (u8 *)fctx;
+ req_info->in[*argcnt].size = sizeof(struct otx_cpt_fc_ctx);
+ req_info->req.dlen += sizeof(struct otx_cpt_fc_ctx);
+
+ ++(*argcnt);
+
+ return 0;
+}
+
+static inline u32 create_input_list(struct skcipher_request *req, u32 enc,
+ u32 enc_iv_len)
+{
+ struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req);
+ struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+ u32 argcnt = 0;
+ int ret;
+
+ ret = create_ctx_hdr(req, enc, &argcnt);
+ if (ret)
+ return ret;
+
+ update_input_data(req_info, req->src, req->cryptlen, &argcnt);
+ req_info->incnt = argcnt;
+
+ return 0;
+}
+
+static inline void create_output_list(struct skcipher_request *req,
+ u32 enc_iv_len)
+{
+ struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req);
+ struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+ u32 argcnt = 0;
+
+ /*
+ * OUTPUT Buffer Processing
+ * AES encryption/decryption output would be
+ * received in the following format
+ *
+ * ------IV--------|------ENCRYPTED/DECRYPTED DATA-----|
+ * [ 16 Bytes/ [ Request Enc/Dec/ DATA Len AES CBC ]
+ */
+ update_output_data(req_info, req->dst, 0, req->cryptlen, &argcnt);
+ req_info->outcnt = argcnt;
+}
+
+static inline int cpt_enc_dec(struct skcipher_request *req, u32 enc)
+{
+ struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req);
+ struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req);
+ struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+ u32 enc_iv_len = crypto_skcipher_ivsize(stfm);
+ struct pci_dev *pdev;
+ int status, cpu_num;
+
+ /* Validate that request doesn't exceed maximum CPT supported size */
+ if (req->cryptlen > OTX_CPT_MAX_REQ_SIZE)
+ return -E2BIG;
+
+ /* Clear control words */
+ rctx->ctrl_word.flags = 0;
+ rctx->fctx.enc.enc_ctrl.flags = 0;
+
+ status = create_input_list(req, enc, enc_iv_len);
+ if (status)
+ return status;
+ create_output_list(req, enc_iv_len);
+
+ status = get_se_device(&pdev, &cpu_num);
+ if (status)
+ return status;
+
+ req_info->callback = (void *)otx_cpt_skcipher_callback;
+ req_info->areq = &req->base;
+ req_info->req_type = OTX_CPT_ENC_DEC_REQ;
+ req_info->is_enc = enc;
+ req_info->is_trunc_hmac = false;
+ req_info->ctrl.s.grp = 0;
+
+ /*
+ * We perform an asynchronous send and once
+ * the request is completed the driver would
+ * intimate through registered call back functions
+ */
+ status = otx_cpt_do_request(pdev, req_info, cpu_num);
+
+ return status;
+}
+
+static int otx_cpt_skcipher_encrypt(struct skcipher_request *req)
+{
+ return cpt_enc_dec(req, true);
+}
+
+static int otx_cpt_skcipher_decrypt(struct skcipher_request *req)
+{
+ return cpt_enc_dec(req, false);
+}
+
+static int otx_cpt_skcipher_xts_setkey(struct crypto_skcipher *tfm,
+ const u8 *key, u32 keylen)
+{
+ struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm);
+ const u8 *key2 = key + (keylen / 2);
+ const u8 *key1 = key;
+ int ret;
+
+ ret = xts_check_key(crypto_skcipher_tfm(tfm), key, keylen);
+ if (ret)
+ return ret;
+ ctx->key_len = keylen;
+ memcpy(ctx->enc_key, key1, keylen / 2);
+ memcpy(ctx->enc_key + KEY2_OFFSET, key2, keylen / 2);
+ ctx->cipher_type = OTX_CPT_AES_XTS;
+ switch (ctx->key_len) {
+ case 2 * AES_KEYSIZE_128:
+ ctx->key_type = OTX_CPT_AES_128_BIT;
+ break;
+ case 2 * AES_KEYSIZE_256:
+ ctx->key_type = OTX_CPT_AES_256_BIT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int cpt_des_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ u32 keylen, u8 cipher_type)
+{
+ struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ if (keylen != DES3_EDE_KEY_SIZE)
+ return -EINVAL;
+
+ ctx->key_len = keylen;
+ ctx->cipher_type = cipher_type;
+
+ memcpy(ctx->enc_key, key, keylen);
+
+ return 0;
+}
+
+static int cpt_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ u32 keylen, u8 cipher_type)
+{
+ struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ switch (keylen) {
+ case AES_KEYSIZE_128:
+ ctx->key_type = OTX_CPT_AES_128_BIT;
+ break;
+ case AES_KEYSIZE_192:
+ ctx->key_type = OTX_CPT_AES_192_BIT;
+ break;
+ case AES_KEYSIZE_256:
+ ctx->key_type = OTX_CPT_AES_256_BIT;
+ break;
+ default:
+ return -EINVAL;
+ }
+ ctx->key_len = keylen;
+ ctx->cipher_type = cipher_type;
+
+ memcpy(ctx->enc_key, key, keylen);
+
+ return 0;
+}
+
+static int otx_cpt_skcipher_cbc_aes_setkey(struct crypto_skcipher *tfm,
+ const u8 *key, u32 keylen)
+{
+ return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_CBC);
+}
+
+static int otx_cpt_skcipher_ecb_aes_setkey(struct crypto_skcipher *tfm,
+ const u8 *key, u32 keylen)
+{
+ return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_ECB);
+}
+
+static int otx_cpt_skcipher_cfb_aes_setkey(struct crypto_skcipher *tfm,
+ const u8 *key, u32 keylen)
+{
+ return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_CFB);
+}
+
+static int otx_cpt_skcipher_cbc_des3_setkey(struct crypto_skcipher *tfm,
+ const u8 *key, u32 keylen)
+{
+ return cpt_des_setkey(tfm, key, keylen, OTX_CPT_DES3_CBC);
+}
+
+static int otx_cpt_skcipher_ecb_des3_setkey(struct crypto_skcipher *tfm,
+ const u8 *key, u32 keylen)
+{
+ return cpt_des_setkey(tfm, key, keylen, OTX_CPT_DES3_ECB);
+}
+
+static int otx_cpt_enc_dec_init(struct crypto_skcipher *tfm)
+{
+ struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ memset(ctx, 0, sizeof(*ctx));
+ /*
+ * Additional memory for skcipher_request is
+ * allocated since the cryptd daemon uses
+ * this memory for request_ctx information
+ */
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct otx_cpt_req_ctx) +
+ sizeof(struct skcipher_request));
+
+ return 0;
+}
+
+static int cpt_aead_init(struct crypto_aead *tfm, u8 cipher_type, u8 mac_type)
+{
+ struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+ ctx->cipher_type = cipher_type;
+ ctx->mac_type = mac_type;
+
+ /*
+ * When selected cipher is NULL we use HMAC opcode instead of
+ * FLEXICRYPTO opcode therefore we don't need to use HASH algorithms
+ * for calculating ipad and opad
+ */
+ if (ctx->cipher_type != OTX_CPT_CIPHER_NULL) {
+ switch (ctx->mac_type) {
+ case OTX_CPT_SHA1:
+ ctx->hashalg = crypto_alloc_shash("sha1", 0,
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(ctx->hashalg))
+ return PTR_ERR(ctx->hashalg);
+ break;
+
+ case OTX_CPT_SHA256:
+ ctx->hashalg = crypto_alloc_shash("sha256", 0,
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(ctx->hashalg))
+ return PTR_ERR(ctx->hashalg);
+ break;
+
+ case OTX_CPT_SHA384:
+ ctx->hashalg = crypto_alloc_shash("sha384", 0,
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(ctx->hashalg))
+ return PTR_ERR(ctx->hashalg);
+ break;
+
+ case OTX_CPT_SHA512:
+ ctx->hashalg = crypto_alloc_shash("sha512", 0,
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(ctx->hashalg))
+ return PTR_ERR(ctx->hashalg);
+ break;
+ }
+ }
+
+ crypto_aead_set_reqsize(tfm, sizeof(struct otx_cpt_req_ctx));
+
+ return 0;
+}
+
+static int otx_cpt_aead_cbc_aes_sha1_init(struct crypto_aead *tfm)
+{
+ return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA1);
+}
+
+static int otx_cpt_aead_cbc_aes_sha256_init(struct crypto_aead *tfm)
+{
+ return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA256);
+}
+
+static int otx_cpt_aead_cbc_aes_sha384_init(struct crypto_aead *tfm)
+{
+ return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA384);
+}
+
+static int otx_cpt_aead_cbc_aes_sha512_init(struct crypto_aead *tfm)
+{
+ return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA512);
+}
+
+static int otx_cpt_aead_ecb_null_sha1_init(struct crypto_aead *tfm)
+{
+ return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA1);
+}
+
+static int otx_cpt_aead_ecb_null_sha256_init(struct crypto_aead *tfm)
+{
+ return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA256);
+}
+
+static int otx_cpt_aead_ecb_null_sha384_init(struct crypto_aead *tfm)
+{
+ return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA384);
+}
+
+static int otx_cpt_aead_ecb_null_sha512_init(struct crypto_aead *tfm)
+{
+ return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA512);
+}
+
+static int otx_cpt_aead_gcm_aes_init(struct crypto_aead *tfm)
+{
+ return cpt_aead_init(tfm, OTX_CPT_AES_GCM, OTX_CPT_MAC_NULL);
+}
+
+static void otx_cpt_aead_exit(struct crypto_aead *tfm)
+{
+ struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+ kfree(ctx->ipad);
+ kfree(ctx->opad);
+ if (ctx->hashalg)
+ crypto_free_shash(ctx->hashalg);
+ kfree(ctx->sdesc);
+}
+
+/*
+ * This is the Integrity Check Value validation (aka the authentication tag
+ * length)
+ */
+static int otx_cpt_aead_set_authsize(struct crypto_aead *tfm,
+ unsigned int authsize)
+{
+ struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+ switch (ctx->mac_type) {
+ case OTX_CPT_SHA1:
+ if (authsize != SHA1_DIGEST_SIZE &&
+ authsize != SHA1_TRUNC_DIGEST_SIZE)
+ return -EINVAL;
+
+ if (authsize == SHA1_TRUNC_DIGEST_SIZE)
+ ctx->is_trunc_hmac = true;
+ break;
+
+ case OTX_CPT_SHA256:
+ if (authsize != SHA256_DIGEST_SIZE &&
+ authsize != SHA256_TRUNC_DIGEST_SIZE)
+ return -EINVAL;
+
+ if (authsize == SHA256_TRUNC_DIGEST_SIZE)
+ ctx->is_trunc_hmac = true;
+ break;
+
+ case OTX_CPT_SHA384:
+ if (authsize != SHA384_DIGEST_SIZE &&
+ authsize != SHA384_TRUNC_DIGEST_SIZE)
+ return -EINVAL;
+
+ if (authsize == SHA384_TRUNC_DIGEST_SIZE)
+ ctx->is_trunc_hmac = true;
+ break;
+
+ case OTX_CPT_SHA512:
+ if (authsize != SHA512_DIGEST_SIZE &&
+ authsize != SHA512_TRUNC_DIGEST_SIZE)
+ return -EINVAL;
+
+ if (authsize == SHA512_TRUNC_DIGEST_SIZE)
+ ctx->is_trunc_hmac = true;
+ break;
+
+ case OTX_CPT_MAC_NULL:
+ if (ctx->cipher_type == OTX_CPT_AES_GCM) {
+ if (authsize != AES_GCM_ICV_SIZE)
+ return -EINVAL;
+ } else
+ return -EINVAL;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ tfm->authsize = authsize;
+ return 0;
+}
+
+static struct otx_cpt_sdesc *alloc_sdesc(struct crypto_shash *alg)
+{
+ struct otx_cpt_sdesc *sdesc;
+ int size;
+
+ size = sizeof(struct shash_desc) + crypto_shash_descsize(alg);
+ sdesc = kmalloc(size, GFP_KERNEL);
+ if (!sdesc)
+ return NULL;
+
+ sdesc->shash.tfm = alg;
+
+ return sdesc;
+}
+
+static inline void swap_data32(void *buf, u32 len)
+{
+ u32 *store = (u32 *) buf;
+ int i = 0;
+
+ for (i = 0 ; i < len/sizeof(u32); i++, store++)
+ *store = cpu_to_be32(*store);
+}
+
+static inline void swap_data64(void *buf, u32 len)
+{
+ u64 *store = (u64 *) buf;
+ int i = 0;
+
+ for (i = 0 ; i < len/sizeof(u64); i++, store++)
+ *store = cpu_to_be64(*store);
+}
+
+static int copy_pad(u8 mac_type, u8 *out_pad, u8 *in_pad)
+{
+ struct sha512_state *sha512;
+ struct sha256_state *sha256;
+ struct sha1_state *sha1;
+
+ switch (mac_type) {
+ case OTX_CPT_SHA1:
+ sha1 = (struct sha1_state *) in_pad;
+ swap_data32(sha1->state, SHA1_DIGEST_SIZE);
+ memcpy(out_pad, &sha1->state, SHA1_DIGEST_SIZE);
+ break;
+
+ case OTX_CPT_SHA256:
+ sha256 = (struct sha256_state *) in_pad;
+ swap_data32(sha256->state, SHA256_DIGEST_SIZE);
+ memcpy(out_pad, &sha256->state, SHA256_DIGEST_SIZE);
+ break;
+
+ case OTX_CPT_SHA384:
+ case OTX_CPT_SHA512:
+ sha512 = (struct sha512_state *) in_pad;
+ swap_data64(sha512->state, SHA512_DIGEST_SIZE);
+ memcpy(out_pad, &sha512->state, SHA512_DIGEST_SIZE);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int aead_hmac_init(struct crypto_aead *cipher)
+{
+ struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher);
+ int state_size = crypto_shash_statesize(ctx->hashalg);
+ int ds = crypto_shash_digestsize(ctx->hashalg);
+ int bs = crypto_shash_blocksize(ctx->hashalg);
+ int authkeylen = ctx->auth_key_len;
+ u8 *ipad = NULL, *opad = NULL;
+ int ret = 0, icount = 0;
+
+ ctx->sdesc = alloc_sdesc(ctx->hashalg);
+ if (!ctx->sdesc)
+ return -ENOMEM;
+
+ ctx->ipad = kzalloc(bs, GFP_KERNEL);
+ if (!ctx->ipad) {
+ ret = -ENOMEM;
+ goto calc_fail;
+ }
+
+ ctx->opad = kzalloc(bs, GFP_KERNEL);
+ if (!ctx->opad) {
+ ret = -ENOMEM;
+ goto calc_fail;
+ }
+
+ ipad = kzalloc(state_size, GFP_KERNEL);
+ if (!ipad) {
+ ret = -ENOMEM;
+ goto calc_fail;
+ }
+
+ opad = kzalloc(state_size, GFP_KERNEL);
+ if (!opad) {
+ ret = -ENOMEM;
+ goto calc_fail;
+ }
+
+ if (authkeylen > bs) {
+ ret = crypto_shash_digest(&ctx->sdesc->shash, ctx->key,
+ authkeylen, ipad);
+ if (ret)
+ goto calc_fail;
+
+ authkeylen = ds;
+ } else {
+ memcpy(ipad, ctx->key, authkeylen);
+ }
+
+ memset(ipad + authkeylen, 0, bs - authkeylen);
+ memcpy(opad, ipad, bs);
+
+ for (icount = 0; icount < bs; icount++) {
+ ipad[icount] ^= 0x36;
+ opad[icount] ^= 0x5c;
+ }
+
+ /*
+ * Partial Hash calculated from the software
+ * algorithm is retrieved for IPAD & OPAD
+ */
+
+ /* IPAD Calculation */
+ crypto_shash_init(&ctx->sdesc->shash);
+ crypto_shash_update(&ctx->sdesc->shash, ipad, bs);
+ crypto_shash_export(&ctx->sdesc->shash, ipad);
+ ret = copy_pad(ctx->mac_type, ctx->ipad, ipad);
+ if (ret)
+ goto calc_fail;
+
+ /* OPAD Calculation */
+ crypto_shash_init(&ctx->sdesc->shash);
+ crypto_shash_update(&ctx->sdesc->shash, opad, bs);
+ crypto_shash_export(&ctx->sdesc->shash, opad);
+ ret = copy_pad(ctx->mac_type, ctx->opad, opad);
+ if (ret)
+ goto calc_fail;
+
+ kfree(ipad);
+ kfree(opad);
+
+ return 0;
+
+calc_fail:
+ kfree(ctx->ipad);
+ ctx->ipad = NULL;
+ kfree(ctx->opad);
+ ctx->opad = NULL;
+ kfree(ipad);
+ kfree(opad);
+ kfree(ctx->sdesc);
+ ctx->sdesc = NULL;
+
+ return ret;
+}
+
+static int otx_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher,
+ const unsigned char *key,
+ unsigned int keylen)
+{
+ struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher);
+ struct crypto_authenc_key_param *param;
+ int enckeylen = 0, authkeylen = 0;
+ struct rtattr *rta = (void *)key;
+ int status = -EINVAL;
+
+ if (!RTA_OK(rta, keylen))
+ goto badkey;
+
+ if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM)
+ goto badkey;
+
+ if (RTA_PAYLOAD(rta) < sizeof(*param))
+ goto badkey;
+
+ param = RTA_DATA(rta);
+ enckeylen = be32_to_cpu(param->enckeylen);
+ key += RTA_ALIGN(rta->rta_len);
+ keylen -= RTA_ALIGN(rta->rta_len);
+ if (keylen < enckeylen)
+ goto badkey;
+
+ if (keylen > OTX_CPT_MAX_KEY_SIZE)
+ goto badkey;
+
+ authkeylen = keylen - enckeylen;
+ memcpy(ctx->key, key, keylen);
+
+ switch (enckeylen) {
+ case AES_KEYSIZE_128:
+ ctx->key_type = OTX_CPT_AES_128_BIT;
+ break;
+ case AES_KEYSIZE_192:
+ ctx->key_type = OTX_CPT_AES_192_BIT;
+ break;
+ case AES_KEYSIZE_256:
+ ctx->key_type = OTX_CPT_AES_256_BIT;
+ break;
+ default:
+ /* Invalid key length */
+ goto badkey;
+ }
+
+ ctx->enc_key_len = enckeylen;
+ ctx->auth_key_len = authkeylen;
+
+ status = aead_hmac_init(cipher);
+ if (status)
+ goto badkey;
+
+ return 0;
+badkey:
+ return status;
+}
+
+static int otx_cpt_aead_ecb_null_sha_setkey(struct crypto_aead *cipher,
+ const unsigned char *key,
+ unsigned int keylen)
+{
+ struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher);
+ struct crypto_authenc_key_param *param;
+ struct rtattr *rta = (void *)key;
+ int enckeylen = 0;
+
+ if (!RTA_OK(rta, keylen))
+ goto badkey;
+
+ if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM)
+ goto badkey;
+
+ if (RTA_PAYLOAD(rta) < sizeof(*param))
+ goto badkey;
+
+ param = RTA_DATA(rta);
+ enckeylen = be32_to_cpu(param->enckeylen);
+ key += RTA_ALIGN(rta->rta_len);
+ keylen -= RTA_ALIGN(rta->rta_len);
+ if (enckeylen != 0)
+ goto badkey;
+
+ if (keylen > OTX_CPT_MAX_KEY_SIZE)
+ goto badkey;
+
+ memcpy(ctx->key, key, keylen);
+ ctx->enc_key_len = enckeylen;
+ ctx->auth_key_len = keylen;
+ return 0;
+badkey:
+ return -EINVAL;
+}
+
+static int otx_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher,
+ const unsigned char *key,
+ unsigned int keylen)
+{
+ struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher);
+
+ /*
+ * For aes gcm we expect to get encryption key (16, 24, 32 bytes)
+ * and salt (4 bytes)
+ */
+ switch (keylen) {
+ case AES_KEYSIZE_128 + AES_GCM_SALT_SIZE:
+ ctx->key_type = OTX_CPT_AES_128_BIT;
+ ctx->enc_key_len = AES_KEYSIZE_128;
+ break;
+ case AES_KEYSIZE_192 + AES_GCM_SALT_SIZE:
+ ctx->key_type = OTX_CPT_AES_192_BIT;
+ ctx->enc_key_len = AES_KEYSIZE_192;
+ break;
+ case AES_KEYSIZE_256 + AES_GCM_SALT_SIZE:
+ ctx->key_type = OTX_CPT_AES_256_BIT;
+ ctx->enc_key_len = AES_KEYSIZE_256;
+ break;
+ default:
+ /* Invalid key and salt length */
+ return -EINVAL;
+ }
+
+ /* Store encryption key and salt */
+ memcpy(ctx->key, key, keylen);
+
+ return 0;
+}
+
+static inline u32 create_aead_ctx_hdr(struct aead_request *req, u32 enc,
+ u32 *argcnt)
+{
+ struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+ struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+ struct otx_cpt_fc_ctx *fctx = &rctx->fctx;
+ int mac_len = crypto_aead_authsize(tfm);
+ int ds;
+
+ rctx->ctrl_word.e.enc_data_offset = req->assoclen;
+
+ switch (ctx->cipher_type) {
+ case OTX_CPT_AES_CBC:
+ fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_CPTR;
+ /* Copy encryption key to context */
+ memcpy(fctx->enc.encr_key, ctx->key + ctx->auth_key_len,
+ ctx->enc_key_len);
+ /* Copy IV to context */
+ memcpy(fctx->enc.encr_iv, req->iv, crypto_aead_ivsize(tfm));
+
+ ds = crypto_shash_digestsize(ctx->hashalg);
+ if (ctx->mac_type == OTX_CPT_SHA384)
+ ds = SHA512_DIGEST_SIZE;
+ if (ctx->ipad)
+ memcpy(fctx->hmac.e.ipad, ctx->ipad, ds);
+ if (ctx->opad)
+ memcpy(fctx->hmac.e.opad, ctx->opad, ds);
+ break;
+
+ case OTX_CPT_AES_GCM:
+ fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_DPTR;
+ /* Copy encryption key to context */
+ memcpy(fctx->enc.encr_key, ctx->key, ctx->enc_key_len);
+ /* Copy salt to context */
+ memcpy(fctx->enc.encr_iv, ctx->key + ctx->enc_key_len,
+ AES_GCM_SALT_SIZE);
+
+ rctx->ctrl_word.e.iv_offset = req->assoclen - AES_GCM_IV_OFFSET;
+ break;
+
+ default:
+ /* Unknown cipher type */
+ return -EINVAL;
+ }
+ rctx->ctrl_word.flags = cpu_to_be64(rctx->ctrl_word.flags);
+
+ req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER;
+ req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ;
+ req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_FC |
+ DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER);
+ if (enc) {
+ req_info->req.opcode.s.minor = 2;
+ req_info->req.param1 = req->cryptlen;
+ req_info->req.param2 = req->cryptlen + req->assoclen;
+ } else {
+ req_info->req.opcode.s.minor = 3;
+ req_info->req.param1 = req->cryptlen - mac_len;
+ req_info->req.param2 = req->cryptlen + req->assoclen - mac_len;
+ }
+
+ fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type;
+ fctx->enc.enc_ctrl.e.aes_key = ctx->key_type;
+ fctx->enc.enc_ctrl.e.mac_type = ctx->mac_type;
+ fctx->enc.enc_ctrl.e.mac_len = mac_len;
+ fctx->enc.enc_ctrl.flags = cpu_to_be64(fctx->enc.enc_ctrl.flags);
+
+ /*
+ * Storing Packet Data Information in offset
+ * Control Word First 8 bytes
+ */
+ req_info->in[*argcnt].vptr = (u8 *)&rctx->ctrl_word;
+ req_info->in[*argcnt].size = CONTROL_WORD_LEN;
+ req_info->req.dlen += CONTROL_WORD_LEN;
+ ++(*argcnt);
+
+ req_info->in[*argcnt].vptr = (u8 *)fctx;
+ req_info->in[*argcnt].size = sizeof(struct otx_cpt_fc_ctx);
+ req_info->req.dlen += sizeof(struct otx_cpt_fc_ctx);
+ ++(*argcnt);
+
+ return 0;
+}
+
+static inline u32 create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt,
+ u32 enc)
+{
+ struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+ struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+
+ req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER;
+ req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ;
+ req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_HMAC |
+ DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER);
+ req_info->is_trunc_hmac = ctx->is_trunc_hmac;
+
+ req_info->req.opcode.s.minor = 0;
+ req_info->req.param1 = ctx->auth_key_len;
+ req_info->req.param2 = ctx->mac_type << 8;
+
+ /* Add authentication key */
+ req_info->in[*argcnt].vptr = ctx->key;
+ req_info->in[*argcnt].size = round_up(ctx->auth_key_len, 8);
+ req_info->req.dlen += round_up(ctx->auth_key_len, 8);
+ ++(*argcnt);
+
+ return 0;
+}
+
+static inline u32 create_aead_input_list(struct aead_request *req, u32 enc)
+{
+ struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+ struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+ u32 inputlen = req->cryptlen + req->assoclen;
+ u32 status, argcnt = 0;
+
+ status = create_aead_ctx_hdr(req, enc, &argcnt);
+ if (status)
+ return status;
+ update_input_data(req_info, req->src, inputlen, &argcnt);
+ req_info->incnt = argcnt;
+
+ return 0;
+}
+
+static inline u32 create_aead_output_list(struct aead_request *req, u32 enc,
+ u32 mac_len)
+{
+ struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+ struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+ u32 argcnt = 0, outputlen = 0;
+
+ if (enc)
+ outputlen = req->cryptlen + req->assoclen + mac_len;
+ else
+ outputlen = req->cryptlen + req->assoclen - mac_len;
+
+ update_output_data(req_info, req->dst, 0, outputlen, &argcnt);
+ req_info->outcnt = argcnt;
+
+ return 0;
+}
+
+static inline u32 create_aead_null_input_list(struct aead_request *req,
+ u32 enc, u32 mac_len)
+{
+ struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+ struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+ u32 inputlen, argcnt = 0;
+
+ if (enc)
+ inputlen = req->cryptlen + req->assoclen;
+ else
+ inputlen = req->cryptlen + req->assoclen - mac_len;
+
+ create_hmac_ctx_hdr(req, &argcnt, enc);
+ update_input_data(req_info, req->src, inputlen, &argcnt);
+ req_info->incnt = argcnt;
+
+ return 0;
+}
+
+static inline u32 create_aead_null_output_list(struct aead_request *req,
+ u32 enc, u32 mac_len)
+{
+ struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+ struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+ struct scatterlist *dst;
+ u8 *ptr = NULL;
+ int argcnt = 0, status, offset;
+ u32 inputlen;
+
+ if (enc)
+ inputlen = req->cryptlen + req->assoclen;
+ else
+ inputlen = req->cryptlen + req->assoclen - mac_len;
+
+ /*
+ * If source and destination are different
+ * then copy payload to destination
+ */
+ if (req->src != req->dst) {
+
+ ptr = kmalloc(inputlen, (req_info->areq->flags &
+ CRYPTO_TFM_REQ_MAY_SLEEP) ?
+ GFP_KERNEL : GFP_ATOMIC);
+ if (!ptr) {
+ status = -ENOMEM;
+ goto error;
+ }
+
+ status = sg_copy_to_buffer(req->src, sg_nents(req->src), ptr,
+ inputlen);
+ if (status != inputlen) {
+ status = -EINVAL;
+ goto error;
+ }
+ status = sg_copy_from_buffer(req->dst, sg_nents(req->dst), ptr,
+ inputlen);
+ if (status != inputlen) {
+ status = -EINVAL;
+ goto error;
+ }
+ kfree(ptr);
+ }
+
+ if (enc) {
+ /*
+ * In an encryption scenario hmac needs
+ * to be appended after payload
+ */
+ dst = req->dst;
+ offset = inputlen;
+ while (offset >= dst->length) {
+ offset -= dst->length;
+ dst = sg_next(dst);
+ if (!dst) {
+ status = -ENOENT;
+ goto error;
+ }
+ }
+
+ update_output_data(req_info, dst, offset, mac_len, &argcnt);
+ } else {
+ /*
+ * In a decryption scenario calculated hmac for received
+ * payload needs to be compare with hmac received
+ */
+ status = sg_copy_buffer(req->src, sg_nents(req->src),
+ rctx->fctx.hmac.s.hmac_recv, mac_len,
+ inputlen, true);
+ if (status != mac_len) {
+ status = -EINVAL;
+ goto error;
+ }
+
+ req_info->out[argcnt].vptr = rctx->fctx.hmac.s.hmac_calc;
+ req_info->out[argcnt].size = mac_len;
+ argcnt++;
+ }
+
+ req_info->outcnt = argcnt;
+ return 0;
+error:
+ kfree(ptr);
+ return status;
+}
+
+static u32 cpt_aead_enc_dec(struct aead_request *req, u8 reg_type, u8 enc)
+{
+ struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+ struct otx_cpt_req_info *req_info = &rctx->cpt_req;
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct pci_dev *pdev;
+ u32 status, cpu_num;
+
+ /* Clear control words */
+ rctx->ctrl_word.flags = 0;
+ rctx->fctx.enc.enc_ctrl.flags = 0;
+
+ req_info->callback = otx_cpt_aead_callback;
+ req_info->areq = &req->base;
+ req_info->req_type = reg_type;
+ req_info->is_enc = enc;
+ req_info->is_trunc_hmac = false;
+
+ switch (reg_type) {
+ case OTX_CPT_AEAD_ENC_DEC_REQ:
+ status = create_aead_input_list(req, enc);
+ if (status)
+ return status;
+ status = create_aead_output_list(req, enc,
+ crypto_aead_authsize(tfm));
+ if (status)
+ return status;
+ break;
+
+ case OTX_CPT_AEAD_ENC_DEC_NULL_REQ:
+ status = create_aead_null_input_list(req, enc,
+ crypto_aead_authsize(tfm));
+ if (status)
+ return status;
+ status = create_aead_null_output_list(req, enc,
+ crypto_aead_authsize(tfm));
+ if (status)
+ return status;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ /* Validate that request doesn't exceed maximum CPT supported size */
+ if (req_info->req.param1 > OTX_CPT_MAX_REQ_SIZE ||
+ req_info->req.param2 > OTX_CPT_MAX_REQ_SIZE)
+ return -E2BIG;
+
+ status = get_se_device(&pdev, &cpu_num);
+ if (status)
+ return status;
+
+ req_info->ctrl.s.grp = 0;
+
+ status = otx_cpt_do_request(pdev, req_info, cpu_num);
+ /*
+ * We perform an asynchronous send and once
+ * the request is completed the driver would
+ * intimate through registered call back functions
+ */
+ return status;
+}
+
+static int otx_cpt_aead_encrypt(struct aead_request *req)
+{
+ return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_REQ, true);
+}
+
+static int otx_cpt_aead_decrypt(struct aead_request *req)
+{
+ return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_REQ, false);
+}
+
+static int otx_cpt_aead_null_encrypt(struct aead_request *req)
+{
+ return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_NULL_REQ, true);
+}
+
+static int otx_cpt_aead_null_decrypt(struct aead_request *req)
+{
+ return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_NULL_REQ, false);
+}
+
+static struct skcipher_alg otx_cpt_skciphers[] = { {
+ .base.cra_name = "xts(aes)",
+ .base.cra_driver_name = "cpt_xts_aes",
+ .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_priority = 4001,
+ .base.cra_module = THIS_MODULE,
+
+ .init = otx_cpt_enc_dec_init,
+ .ivsize = AES_BLOCK_SIZE,
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .setkey = otx_cpt_skcipher_xts_setkey,
+ .encrypt = otx_cpt_skcipher_encrypt,
+ .decrypt = otx_cpt_skcipher_decrypt,
+}, {
+ .base.cra_name = "cbc(aes)",
+ .base.cra_driver_name = "cpt_cbc_aes",
+ .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_priority = 4001,
+ .base.cra_module = THIS_MODULE,
+
+ .init = otx_cpt_enc_dec_init,
+ .ivsize = AES_BLOCK_SIZE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = otx_cpt_skcipher_cbc_aes_setkey,
+ .encrypt = otx_cpt_skcipher_encrypt,
+ .decrypt = otx_cpt_skcipher_decrypt,
+}, {
+ .base.cra_name = "ecb(aes)",
+ .base.cra_driver_name = "cpt_ecb_aes",
+ .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_priority = 4001,
+ .base.cra_module = THIS_MODULE,
+
+ .init = otx_cpt_enc_dec_init,
+ .ivsize = 0,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = otx_cpt_skcipher_ecb_aes_setkey,
+ .encrypt = otx_cpt_skcipher_encrypt,
+ .decrypt = otx_cpt_skcipher_decrypt,
+}, {
+ .base.cra_name = "cfb(aes)",
+ .base.cra_driver_name = "cpt_cfb_aes",
+ .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_priority = 4001,
+ .base.cra_module = THIS_MODULE,
+
+ .init = otx_cpt_enc_dec_init,
+ .ivsize = AES_BLOCK_SIZE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = otx_cpt_skcipher_cfb_aes_setkey,
+ .encrypt = otx_cpt_skcipher_encrypt,
+ .decrypt = otx_cpt_skcipher_decrypt,
+}, {
+ .base.cra_name = "cbc(des3_ede)",
+ .base.cra_driver_name = "cpt_cbc_des3_ede",
+ .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct otx_cpt_des3_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_priority = 4001,
+ .base.cra_module = THIS_MODULE,
+
+ .init = otx_cpt_enc_dec_init,
+ .min_keysize = DES3_EDE_KEY_SIZE,
+ .max_keysize = DES3_EDE_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = otx_cpt_skcipher_cbc_des3_setkey,
+ .encrypt = otx_cpt_skcipher_encrypt,
+ .decrypt = otx_cpt_skcipher_decrypt,
+}, {
+ .base.cra_name = "ecb(des3_ede)",
+ .base.cra_driver_name = "cpt_ecb_des3_ede",
+ .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct otx_cpt_des3_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_priority = 4001,
+ .base.cra_module = THIS_MODULE,
+
+ .init = otx_cpt_enc_dec_init,
+ .min_keysize = DES3_EDE_KEY_SIZE,
+ .max_keysize = DES3_EDE_KEY_SIZE,
+ .ivsize = 0,
+ .setkey = otx_cpt_skcipher_ecb_des3_setkey,
+ .encrypt = otx_cpt_skcipher_encrypt,
+ .decrypt = otx_cpt_skcipher_decrypt,
+} };
+
+static struct aead_alg otx_cpt_aeads[] = { {
+ .base = {
+ .cra_name = "authenc(hmac(sha1),cbc(aes))",
+ .cra_driver_name = "cpt_hmac_sha1_cbc_aes",
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+ .cra_priority = 4001,
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ },
+ .init = otx_cpt_aead_cbc_aes_sha1_init,
+ .exit = otx_cpt_aead_exit,
+ .setkey = otx_cpt_aead_cbc_aes_sha_setkey,
+ .setauthsize = otx_cpt_aead_set_authsize,
+ .encrypt = otx_cpt_aead_encrypt,
+ .decrypt = otx_cpt_aead_decrypt,
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = SHA1_DIGEST_SIZE,
+}, {
+ .base = {
+ .cra_name = "authenc(hmac(sha256),cbc(aes))",
+ .cra_driver_name = "cpt_hmac_sha256_cbc_aes",
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+ .cra_priority = 4001,
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ },
+ .init = otx_cpt_aead_cbc_aes_sha256_init,
+ .exit = otx_cpt_aead_exit,
+ .setkey = otx_cpt_aead_cbc_aes_sha_setkey,
+ .setauthsize = otx_cpt_aead_set_authsize,
+ .encrypt = otx_cpt_aead_encrypt,
+ .decrypt = otx_cpt_aead_decrypt,
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = SHA256_DIGEST_SIZE,
+}, {
+ .base = {
+ .cra_name = "authenc(hmac(sha384),cbc(aes))",
+ .cra_driver_name = "cpt_hmac_sha384_cbc_aes",
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+ .cra_priority = 4001,
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ },
+ .init = otx_cpt_aead_cbc_aes_sha384_init,
+ .exit = otx_cpt_aead_exit,
+ .setkey = otx_cpt_aead_cbc_aes_sha_setkey,
+ .setauthsize = otx_cpt_aead_set_authsize,
+ .encrypt = otx_cpt_aead_encrypt,
+ .decrypt = otx_cpt_aead_decrypt,
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = SHA384_DIGEST_SIZE,
+}, {
+ .base = {
+ .cra_name = "authenc(hmac(sha512),cbc(aes))",
+ .cra_driver_name = "cpt_hmac_sha512_cbc_aes",
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+ .cra_priority = 4001,
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ },
+ .init = otx_cpt_aead_cbc_aes_sha512_init,
+ .exit = otx_cpt_aead_exit,
+ .setkey = otx_cpt_aead_cbc_aes_sha_setkey,
+ .setauthsize = otx_cpt_aead_set_authsize,
+ .encrypt = otx_cpt_aead_encrypt,
+ .decrypt = otx_cpt_aead_decrypt,
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = SHA512_DIGEST_SIZE,
+}, {
+ .base = {
+ .cra_name = "authenc(hmac(sha1),ecb(cipher_null))",
+ .cra_driver_name = "cpt_hmac_sha1_ecb_null",
+ .cra_blocksize = 1,
+ .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+ .cra_priority = 4001,
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ },
+ .init = otx_cpt_aead_ecb_null_sha1_init,
+ .exit = otx_cpt_aead_exit,
+ .setkey = otx_cpt_aead_ecb_null_sha_setkey,
+ .setauthsize = otx_cpt_aead_set_authsize,
+ .encrypt = otx_cpt_aead_null_encrypt,
+ .decrypt = otx_cpt_aead_null_decrypt,
+ .ivsize = 0,
+ .maxauthsize = SHA1_DIGEST_SIZE,
+}, {
+ .base = {
+ .cra_name = "authenc(hmac(sha256),ecb(cipher_null))",
+ .cra_driver_name = "cpt_hmac_sha256_ecb_null",
+ .cra_blocksize = 1,
+ .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+ .cra_priority = 4001,
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ },
+ .init = otx_cpt_aead_ecb_null_sha256_init,
+ .exit = otx_cpt_aead_exit,
+ .setkey = otx_cpt_aead_ecb_null_sha_setkey,
+ .setauthsize = otx_cpt_aead_set_authsize,
+ .encrypt = otx_cpt_aead_null_encrypt,
+ .decrypt = otx_cpt_aead_null_decrypt,
+ .ivsize = 0,
+ .maxauthsize = SHA256_DIGEST_SIZE,
+}, {
+ .base = {
+ .cra_name = "authenc(hmac(sha384),ecb(cipher_null))",
+ .cra_driver_name = "cpt_hmac_sha384_ecb_null",
+ .cra_blocksize = 1,
+ .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+ .cra_priority = 4001,
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ },
+ .init = otx_cpt_aead_ecb_null_sha384_init,
+ .exit = otx_cpt_aead_exit,
+ .setkey = otx_cpt_aead_ecb_null_sha_setkey,
+ .setauthsize = otx_cpt_aead_set_authsize,
+ .encrypt = otx_cpt_aead_null_encrypt,
+ .decrypt = otx_cpt_aead_null_decrypt,
+ .ivsize = 0,
+ .maxauthsize = SHA384_DIGEST_SIZE,
+}, {
+ .base = {
+ .cra_name = "authenc(hmac(sha512),ecb(cipher_null))",
+ .cra_driver_name = "cpt_hmac_sha512_ecb_null",
+ .cra_blocksize = 1,
+ .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+ .cra_priority = 4001,
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ },
+ .init = otx_cpt_aead_ecb_null_sha512_init,
+ .exit = otx_cpt_aead_exit,
+ .setkey = otx_cpt_aead_ecb_null_sha_setkey,
+ .setauthsize = otx_cpt_aead_set_authsize,
+ .encrypt = otx_cpt_aead_null_encrypt,
+ .decrypt = otx_cpt_aead_null_decrypt,
+ .ivsize = 0,
+ .maxauthsize = SHA512_DIGEST_SIZE,
+}, {
+ .base = {
+ .cra_name = "rfc4106(gcm(aes))",
+ .cra_driver_name = "cpt_rfc4106_gcm_aes",
+ .cra_blocksize = 1,
+ .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+ .cra_priority = 4001,
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ },
+ .init = otx_cpt_aead_gcm_aes_init,
+ .exit = otx_cpt_aead_exit,
+ .setkey = otx_cpt_aead_gcm_aes_setkey,
+ .setauthsize = otx_cpt_aead_set_authsize,
+ .encrypt = otx_cpt_aead_encrypt,
+ .decrypt = otx_cpt_aead_decrypt,
+ .ivsize = AES_GCM_IV_SIZE,
+ .maxauthsize = AES_GCM_ICV_SIZE,
+} };
+
+static inline int is_any_alg_used(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(otx_cpt_skciphers); i++)
+ if (refcount_read(&otx_cpt_skciphers[i].base.cra_refcnt) != 1)
+ return true;
+ for (i = 0; i < ARRAY_SIZE(otx_cpt_aeads); i++)
+ if (refcount_read(&otx_cpt_aeads[i].base.cra_refcnt) != 1)
+ return true;
+ return false;
+}
+
+static inline int cpt_register_algs(void)
+{
+ int i, err = 0;
+
+ if (!IS_ENABLED(CONFIG_DM_CRYPT)) {
+ for (i = 0; i < ARRAY_SIZE(otx_cpt_skciphers); i++)
+ otx_cpt_skciphers[i].base.cra_flags &= ~CRYPTO_ALG_DEAD;
+
+ err = crypto_register_skciphers(otx_cpt_skciphers,
+ ARRAY_SIZE(otx_cpt_skciphers));
+ if (err)
+ return err;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(otx_cpt_aeads); i++)
+ otx_cpt_aeads[i].base.cra_flags &= ~CRYPTO_ALG_DEAD;
+
+ err = crypto_register_aeads(otx_cpt_aeads, ARRAY_SIZE(otx_cpt_aeads));
+ if (err) {
+ crypto_unregister_skciphers(otx_cpt_skciphers,
+ ARRAY_SIZE(otx_cpt_skciphers));
+ return err;
+ }
+
+ return 0;
+}
+
+static inline void cpt_unregister_algs(void)
+{
+ crypto_unregister_skciphers(otx_cpt_skciphers,
+ ARRAY_SIZE(otx_cpt_skciphers));
+ crypto_unregister_aeads(otx_cpt_aeads, ARRAY_SIZE(otx_cpt_aeads));
+}
+
+static int compare_func(const void *lptr, const void *rptr)
+{
+ struct cpt_device_desc *ldesc = (struct cpt_device_desc *) lptr;
+ struct cpt_device_desc *rdesc = (struct cpt_device_desc *) rptr;
+
+ if (ldesc->dev->devfn < rdesc->dev->devfn)
+ return -1;
+ if (ldesc->dev->devfn > rdesc->dev->devfn)
+ return 1;
+ return 0;
+}
+
+static void swap_func(void *lptr, void *rptr, int size)
+{
+ struct cpt_device_desc *ldesc = (struct cpt_device_desc *) lptr;
+ struct cpt_device_desc *rdesc = (struct cpt_device_desc *) rptr;
+ struct cpt_device_desc desc;
+
+ desc = *ldesc;
+ *ldesc = *rdesc;
+ *rdesc = desc;
+}
+
+int otx_cpt_crypto_init(struct pci_dev *pdev, struct module *mod,
+ enum otx_cptpf_type pf_type,
+ enum otx_cptvf_type engine_type,
+ int num_queues, int num_devices)
+{
+ int ret = 0;
+ int count;
+
+ mutex_lock(&mutex);
+ switch (engine_type) {
+ case OTX_CPT_SE_TYPES:
+ count = atomic_read(&se_devices.count);
+ if (count >= CPT_MAX_VF_NUM) {
+ dev_err(&pdev->dev, "No space to add a new device");
+ ret = -ENOSPC;
+ goto err;
+ }
+ se_devices.desc[count].pf_type = pf_type;
+ se_devices.desc[count].num_queues = num_queues;
+ se_devices.desc[count++].dev = pdev;
+ atomic_inc(&se_devices.count);
+
+ if (atomic_read(&se_devices.count) == num_devices &&
+ is_crypto_registered == false) {
+ if (cpt_register_algs()) {
+ dev_err(&pdev->dev,
+ "Error in registering crypto algorithms\n");
+ ret = -EINVAL;
+ goto err;
+ }
+ try_module_get(mod);
+ is_crypto_registered = true;
+ }
+ sort(se_devices.desc, count, sizeof(struct cpt_device_desc),
+ compare_func, swap_func);
+ break;
+
+ case OTX_CPT_AE_TYPES:
+ count = atomic_read(&ae_devices.count);
+ if (count >= CPT_MAX_VF_NUM) {
+ dev_err(&pdev->dev, "No space to a add new device");
+ ret = -ENOSPC;
+ goto err;
+ }
+ ae_devices.desc[count].pf_type = pf_type;
+ ae_devices.desc[count].num_queues = num_queues;
+ ae_devices.desc[count++].dev = pdev;
+ atomic_inc(&ae_devices.count);
+ sort(ae_devices.desc, count, sizeof(struct cpt_device_desc),
+ compare_func, swap_func);
+ break;
+
+ default:
+ dev_err(&pdev->dev, "Unknown VF type %d\n", engine_type);
+ ret = BAD_OTX_CPTVF_TYPE;
+ }
+err:
+ mutex_unlock(&mutex);
+ return ret;
+}
+
+void otx_cpt_crypto_exit(struct pci_dev *pdev, struct module *mod,
+ enum otx_cptvf_type engine_type)
+{
+ struct cpt_device_table *dev_tbl;
+ bool dev_found = false;
+ int i, j, count;
+
+ mutex_lock(&mutex);
+
+ dev_tbl = (engine_type == OTX_CPT_AE_TYPES) ? &ae_devices : &se_devices;
+ count = atomic_read(&dev_tbl->count);
+ for (i = 0; i < count; i++)
+ if (pdev == dev_tbl->desc[i].dev) {
+ for (j = i; j < count-1; j++)
+ dev_tbl->desc[j] = dev_tbl->desc[j+1];
+ dev_found = true;
+ break;
+ }
+
+ if (!dev_found) {
+ dev_err(&pdev->dev, "%s device not found", __func__);
+ goto exit;
+ }
+
+ if (engine_type != OTX_CPT_AE_TYPES) {
+ if (atomic_dec_and_test(&se_devices.count) &&
+ !is_any_alg_used()) {
+ cpt_unregister_algs();
+ module_put(mod);
+ is_crypto_registered = false;
+ }
+ } else
+ atomic_dec(&ae_devices.count);
+exit:
+ mutex_unlock(&mutex);
+}
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h
new file mode 100644
index 000000000000..67cc0025f5d5
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h
@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __OTX_CPT_ALGS_H
+#define __OTX_CPT_ALGS_H
+
+#include <crypto/hash.h>
+#include "otx_cpt_common.h"
+
+#define OTX_CPT_MAX_ENC_KEY_SIZE 32
+#define OTX_CPT_MAX_HASH_KEY_SIZE 64
+#define OTX_CPT_MAX_KEY_SIZE (OTX_CPT_MAX_ENC_KEY_SIZE + \
+ OTX_CPT_MAX_HASH_KEY_SIZE)
+enum otx_cpt_request_type {
+ OTX_CPT_ENC_DEC_REQ = 0x1,
+ OTX_CPT_AEAD_ENC_DEC_REQ = 0x2,
+ OTX_CPT_AEAD_ENC_DEC_NULL_REQ = 0x3,
+ OTX_CPT_PASSTHROUGH_REQ = 0x4
+};
+
+enum otx_cpt_major_opcodes {
+ OTX_CPT_MAJOR_OP_MISC = 0x01,
+ OTX_CPT_MAJOR_OP_FC = 0x33,
+ OTX_CPT_MAJOR_OP_HMAC = 0x35,
+};
+
+enum otx_cpt_req_type {
+ OTX_CPT_AE_CORE_REQ,
+ OTX_CPT_SE_CORE_REQ
+};
+
+enum otx_cpt_cipher_type {
+ OTX_CPT_CIPHER_NULL = 0x0,
+ OTX_CPT_DES3_CBC = 0x1,
+ OTX_CPT_DES3_ECB = 0x2,
+ OTX_CPT_AES_CBC = 0x3,
+ OTX_CPT_AES_ECB = 0x4,
+ OTX_CPT_AES_CFB = 0x5,
+ OTX_CPT_AES_CTR = 0x6,
+ OTX_CPT_AES_GCM = 0x7,
+ OTX_CPT_AES_XTS = 0x8
+};
+
+enum otx_cpt_mac_type {
+ OTX_CPT_MAC_NULL = 0x0,
+ OTX_CPT_MD5 = 0x1,
+ OTX_CPT_SHA1 = 0x2,
+ OTX_CPT_SHA224 = 0x3,
+ OTX_CPT_SHA256 = 0x4,
+ OTX_CPT_SHA384 = 0x5,
+ OTX_CPT_SHA512 = 0x6,
+ OTX_CPT_GMAC = 0x7
+};
+
+enum otx_cpt_aes_key_len {
+ OTX_CPT_AES_128_BIT = 0x1,
+ OTX_CPT_AES_192_BIT = 0x2,
+ OTX_CPT_AES_256_BIT = 0x3
+};
+
+union otx_cpt_encr_ctrl {
+ u64 flags;
+ struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 enc_cipher:4;
+ u64 reserved1:1;
+ u64 aes_key:2;
+ u64 iv_source:1;
+ u64 mac_type:4;
+ u64 reserved2:3;
+ u64 auth_input_type:1;
+ u64 mac_len:8;
+ u64 reserved3:8;
+ u64 encr_offset:16;
+ u64 iv_offset:8;
+ u64 auth_offset:8;
+#else
+ u64 auth_offset:8;
+ u64 iv_offset:8;
+ u64 encr_offset:16;
+ u64 reserved3:8;
+ u64 mac_len:8;
+ u64 auth_input_type:1;
+ u64 reserved2:3;
+ u64 mac_type:4;
+ u64 iv_source:1;
+ u64 aes_key:2;
+ u64 reserved1:1;
+ u64 enc_cipher:4;
+#endif
+ } e;
+};
+
+struct otx_cpt_cipher {
+ const char *name;
+ u8 value;
+};
+
+struct otx_cpt_enc_context {
+ union otx_cpt_encr_ctrl enc_ctrl;
+ u8 encr_key[32];
+ u8 encr_iv[16];
+};
+
+union otx_cpt_fchmac_ctx {
+ struct {
+ u8 ipad[64];
+ u8 opad[64];
+ } e;
+ struct {
+ u8 hmac_calc[64]; /* HMAC calculated */
+ u8 hmac_recv[64]; /* HMAC received */
+ } s;
+};
+
+struct otx_cpt_fc_ctx {
+ struct otx_cpt_enc_context enc;
+ union otx_cpt_fchmac_ctx hmac;
+};
+
+struct otx_cpt_enc_ctx {
+ u32 key_len;
+ u8 enc_key[OTX_CPT_MAX_KEY_SIZE];
+ u8 cipher_type;
+ u8 key_type;
+};
+
+struct otx_cpt_des3_ctx {
+ u32 key_len;
+ u8 des3_key[OTX_CPT_MAX_KEY_SIZE];
+};
+
+union otx_cpt_offset_ctrl_word {
+ u64 flags;
+ struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 reserved:32;
+ u64 enc_data_offset:16;
+ u64 iv_offset:8;
+ u64 auth_offset:8;
+#else
+ u64 auth_offset:8;
+ u64 iv_offset:8;
+ u64 enc_data_offset:16;
+ u64 reserved:32;
+#endif
+ } e;
+};
+
+struct otx_cpt_req_ctx {
+ struct otx_cpt_req_info cpt_req;
+ union otx_cpt_offset_ctrl_word ctrl_word;
+ struct otx_cpt_fc_ctx fctx;
+};
+
+struct otx_cpt_sdesc {
+ struct shash_desc shash;
+};
+
+struct otx_cpt_aead_ctx {
+ u8 key[OTX_CPT_MAX_KEY_SIZE];
+ struct crypto_shash *hashalg;
+ struct otx_cpt_sdesc *sdesc;
+ u8 *ipad;
+ u8 *opad;
+ u32 enc_key_len;
+ u32 auth_key_len;
+ u8 cipher_type;
+ u8 mac_type;
+ u8 key_type;
+ u8 is_trunc_hmac;
+};
+int otx_cpt_crypto_init(struct pci_dev *pdev, struct module *mod,
+ enum otx_cptpf_type pf_type,
+ enum otx_cptvf_type engine_type,
+ int num_queues, int num_devices);
+void otx_cpt_crypto_exit(struct pci_dev *pdev, struct module *mod,
+ enum otx_cptvf_type engine_type);
+void otx_cpt_callback(int status, void *arg, void *req);
+
+#endif /* __OTX_CPT_ALGS_H */
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
new file mode 100644
index 000000000000..a91860b5dc77
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
@@ -0,0 +1,985 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include "otx_cptvf.h"
+#include "otx_cptvf_algs.h"
+#include "otx_cptvf_reqmgr.h"
+
+#define DRV_NAME "octeontx-cptvf"
+#define DRV_VERSION "1.0"
+
+static void vq_work_handler(unsigned long data)
+{
+ struct otx_cptvf_wqe_info *cwqe_info =
+ (struct otx_cptvf_wqe_info *) data;
+
+ otx_cpt_post_process(&cwqe_info->vq_wqe[0]);
+}
+
+static int init_worker_threads(struct otx_cptvf *cptvf)
+{
+ struct pci_dev *pdev = cptvf->pdev;
+ struct otx_cptvf_wqe_info *cwqe_info;
+ int i;
+
+ cwqe_info = kzalloc(sizeof(*cwqe_info), GFP_KERNEL);
+ if (!cwqe_info)
+ return -ENOMEM;
+
+ if (cptvf->num_queues) {
+ dev_dbg(&pdev->dev, "Creating VQ worker threads (%d)\n",
+ cptvf->num_queues);
+ }
+
+ for (i = 0; i < cptvf->num_queues; i++) {
+ tasklet_init(&cwqe_info->vq_wqe[i].twork, vq_work_handler,
+ (u64)cwqe_info);
+ cwqe_info->vq_wqe[i].cptvf = cptvf;
+ }
+ cptvf->wqe_info = cwqe_info;
+
+ return 0;
+}
+
+static void cleanup_worker_threads(struct otx_cptvf *cptvf)
+{
+ struct pci_dev *pdev = cptvf->pdev;
+ struct otx_cptvf_wqe_info *cwqe_info;
+ int i;
+
+ cwqe_info = (struct otx_cptvf_wqe_info *)cptvf->wqe_info;
+ if (!cwqe_info)
+ return;
+
+ if (cptvf->num_queues) {
+ dev_dbg(&pdev->dev, "Cleaning VQ worker threads (%u)\n",
+ cptvf->num_queues);
+ }
+
+ for (i = 0; i < cptvf->num_queues; i++)
+ tasklet_kill(&cwqe_info->vq_wqe[i].twork);
+
+ kzfree(cwqe_info);
+ cptvf->wqe_info = NULL;
+}
+
+static void free_pending_queues(struct otx_cpt_pending_qinfo *pqinfo)
+{
+ struct otx_cpt_pending_queue *queue;
+ int i;
+
+ for_each_pending_queue(pqinfo, queue, i) {
+ if (!queue->head)
+ continue;
+
+ /* free single queue */
+ kzfree((queue->head));
+ queue->front = 0;
+ queue->rear = 0;
+ queue->qlen = 0;
+ }
+ pqinfo->num_queues = 0;
+}
+
+static int alloc_pending_queues(struct otx_cpt_pending_qinfo *pqinfo, u32 qlen,
+ u32 num_queues)
+{
+ struct otx_cpt_pending_queue *queue = NULL;
+ size_t size;
+ int ret;
+ u32 i;
+
+ pqinfo->num_queues = num_queues;
+ size = (qlen * sizeof(struct otx_cpt_pending_entry));
+
+ for_each_pending_queue(pqinfo, queue, i) {
+ queue->head = kzalloc((size), GFP_KERNEL);
+ if (!queue->head) {
+ ret = -ENOMEM;
+ goto pending_qfail;
+ }
+
+ queue->pending_count = 0;
+ queue->front = 0;
+ queue->rear = 0;
+ queue->qlen = qlen;
+
+ /* init queue spin lock */
+ spin_lock_init(&queue->lock);
+ }
+ return 0;
+
+pending_qfail:
+ free_pending_queues(pqinfo);
+
+ return ret;
+}
+
+static int init_pending_queues(struct otx_cptvf *cptvf, u32 qlen,
+ u32 num_queues)
+{
+ struct pci_dev *pdev = cptvf->pdev;
+ int ret;
+
+ if (!num_queues)
+ return 0;
+
+ ret = alloc_pending_queues(&cptvf->pqinfo, qlen, num_queues);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to setup pending queues (%u)\n",
+ num_queues);
+ return ret;
+ }
+ return 0;
+}
+
+static void cleanup_pending_queues(struct otx_cptvf *cptvf)
+{
+ struct pci_dev *pdev = cptvf->pdev;
+
+ if (!cptvf->num_queues)
+ return;
+
+ dev_dbg(&pdev->dev, "Cleaning VQ pending queue (%u)\n",
+ cptvf->num_queues);
+ free_pending_queues(&cptvf->pqinfo);
+}
+
+static void free_command_queues(struct otx_cptvf *cptvf,
+ struct otx_cpt_cmd_qinfo *cqinfo)
+{
+ struct otx_cpt_cmd_queue *queue = NULL;
+ struct otx_cpt_cmd_chunk *chunk = NULL;
+ struct pci_dev *pdev = cptvf->pdev;
+ int i;
+
+ /* clean up for each queue */
+ for (i = 0; i < cptvf->num_queues; i++) {
+ queue = &cqinfo->queue[i];
+
+ while (!list_empty(&cqinfo->queue[i].chead)) {
+ chunk = list_first_entry(&cqinfo->queue[i].chead,
+ struct otx_cpt_cmd_chunk, nextchunk);
+
+ dma_free_coherent(&pdev->dev, chunk->size,
+ chunk->head,
+ chunk->dma_addr);
+ chunk->head = NULL;
+ chunk->dma_addr = 0;
+ list_del(&chunk->nextchunk);
+ kzfree(chunk);
+ }
+ queue->num_chunks = 0;
+ queue->idx = 0;
+
+ }
+}
+
+static int alloc_command_queues(struct otx_cptvf *cptvf,
+ struct otx_cpt_cmd_qinfo *cqinfo,
+ u32 qlen)
+{
+ struct otx_cpt_cmd_chunk *curr, *first, *last;
+ struct otx_cpt_cmd_queue *queue = NULL;
+ struct pci_dev *pdev = cptvf->pdev;
+ size_t q_size, c_size, rem_q_size;
+ u32 qcsize_bytes;
+ int i;
+
+
+ /* Qsize in dwords, needed for SADDR config, 1-next chunk pointer */
+ cptvf->qsize = min(qlen, cqinfo->qchunksize) *
+ OTX_CPT_NEXT_CHUNK_PTR_SIZE + 1;
+ /* Qsize in bytes to create space for alignment */
+ q_size = qlen * OTX_CPT_INST_SIZE;
+
+ qcsize_bytes = cqinfo->qchunksize * OTX_CPT_INST_SIZE;
+
+ /* per queue initialization */
+ for (i = 0; i < cptvf->num_queues; i++) {
+ c_size = 0;
+ rem_q_size = q_size;
+ first = NULL;
+ last = NULL;
+
+ queue = &cqinfo->queue[i];
+ INIT_LIST_HEAD(&queue->chead);
+ do {
+ curr = kzalloc(sizeof(*curr), GFP_KERNEL);
+ if (!curr)
+ goto cmd_qfail;
+
+ c_size = (rem_q_size > qcsize_bytes) ? qcsize_bytes :
+ rem_q_size;
+ curr->head = dma_alloc_coherent(&pdev->dev,
+ c_size + OTX_CPT_NEXT_CHUNK_PTR_SIZE,
+ &curr->dma_addr, GFP_KERNEL);
+ if (!curr->head) {
+ dev_err(&pdev->dev,
+ "Command Q (%d) chunk (%d) allocation failed\n",
+ i, queue->num_chunks);
+ goto free_curr;
+ }
+ curr->size = c_size;
+
+ if (queue->num_chunks == 0) {
+ first = curr;
+ queue->base = first;
+ }
+ list_add_tail(&curr->nextchunk,
+ &cqinfo->queue[i].chead);
+
+ queue->num_chunks++;
+ rem_q_size -= c_size;
+ if (last)
+ *((u64 *)(&last->head[last->size])) =
+ (u64)curr->dma_addr;
+
+ last = curr;
+ } while (rem_q_size);
+
+ /*
+ * Make the queue circular, tie back last chunk entry to head
+ */
+ curr = first;
+ *((u64 *)(&last->head[last->size])) = (u64)curr->dma_addr;
+ queue->qhead = curr;
+ }
+ return 0;
+free_curr:
+ kfree(curr);
+cmd_qfail:
+ free_command_queues(cptvf, cqinfo);
+ return -ENOMEM;
+}
+
+static int init_command_queues(struct otx_cptvf *cptvf, u32 qlen)
+{
+ struct pci_dev *pdev = cptvf->pdev;
+ int ret;
+
+ /* setup command queues */
+ ret = alloc_command_queues(cptvf, &cptvf->cqinfo, qlen);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to allocate command queues (%u)\n",
+ cptvf->num_queues);
+ return ret;
+ }
+ return ret;
+}
+
+static void cleanup_command_queues(struct otx_cptvf *cptvf)
+{
+ struct pci_dev *pdev = cptvf->pdev;
+
+ if (!cptvf->num_queues)
+ return;
+
+ dev_dbg(&pdev->dev, "Cleaning VQ command queue (%u)\n",
+ cptvf->num_queues);
+ free_command_queues(cptvf, &cptvf->cqinfo);
+}
+
+static void cptvf_sw_cleanup(struct otx_cptvf *cptvf)
+{
+ cleanup_worker_threads(cptvf);
+ cleanup_pending_queues(cptvf);
+ cleanup_command_queues(cptvf);
+}
+
+static int cptvf_sw_init(struct otx_cptvf *cptvf, u32 qlen, u32 num_queues)
+{
+ struct pci_dev *pdev = cptvf->pdev;
+ u32 max_dev_queues = 0;
+ int ret;
+
+ max_dev_queues = OTX_CPT_NUM_QS_PER_VF;
+ /* possible cpus */
+ num_queues = min_t(u32, num_queues, max_dev_queues);
+ cptvf->num_queues = num_queues;
+
+ ret = init_command_queues(cptvf, qlen);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to setup command queues (%u)\n",
+ num_queues);
+ return ret;
+ }
+
+ ret = init_pending_queues(cptvf, qlen, num_queues);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to setup pending queues (%u)\n",
+ num_queues);
+ goto setup_pqfail;
+ }
+
+ /* Create worker threads for BH processing */
+ ret = init_worker_threads(cptvf);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to setup worker threads\n");
+ goto init_work_fail;
+ }
+ return 0;
+
+init_work_fail:
+ cleanup_worker_threads(cptvf);
+ cleanup_pending_queues(cptvf);
+
+setup_pqfail:
+ cleanup_command_queues(cptvf);
+
+ return ret;
+}
+
+static void cptvf_free_irq_affinity(struct otx_cptvf *cptvf, int vec)
+{
+ irq_set_affinity_hint(pci_irq_vector(cptvf->pdev, vec), NULL);
+ free_cpumask_var(cptvf->affinity_mask[vec]);
+}
+
+static void cptvf_write_vq_ctl(struct otx_cptvf *cptvf, bool val)
+{
+ union otx_cptx_vqx_ctl vqx_ctl;
+
+ vqx_ctl.u = readq(cptvf->reg_base + OTX_CPT_VQX_CTL(0));
+ vqx_ctl.s.ena = val;
+ writeq(vqx_ctl.u, cptvf->reg_base + OTX_CPT_VQX_CTL(0));
+}
+
+void otx_cptvf_write_vq_doorbell(struct otx_cptvf *cptvf, u32 val)
+{
+ union otx_cptx_vqx_doorbell vqx_dbell;
+
+ vqx_dbell.u = readq(cptvf->reg_base + OTX_CPT_VQX_DOORBELL(0));
+ vqx_dbell.s.dbell_cnt = val * 8; /* Num of Instructions * 8 words */
+ writeq(vqx_dbell.u, cptvf->reg_base + OTX_CPT_VQX_DOORBELL(0));
+}
+
+static void cptvf_write_vq_inprog(struct otx_cptvf *cptvf, u8 val)
+{
+ union otx_cptx_vqx_inprog vqx_inprg;
+
+ vqx_inprg.u = readq(cptvf->reg_base + OTX_CPT_VQX_INPROG(0));
+ vqx_inprg.s.inflight = val;
+ writeq(vqx_inprg.u, cptvf->reg_base + OTX_CPT_VQX_INPROG(0));
+}
+
+static void cptvf_write_vq_done_numwait(struct otx_cptvf *cptvf, u32 val)
+{
+ union otx_cptx_vqx_done_wait vqx_dwait;
+
+ vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0));
+ vqx_dwait.s.num_wait = val;
+ writeq(vqx_dwait.u, cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0));
+}
+
+static u32 cptvf_read_vq_done_numwait(struct otx_cptvf *cptvf)
+{
+ union otx_cptx_vqx_done_wait vqx_dwait;
+
+ vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0));
+ return vqx_dwait.s.num_wait;
+}
+
+static void cptvf_write_vq_done_timewait(struct otx_cptvf *cptvf, u16 time)
+{
+ union otx_cptx_vqx_done_wait vqx_dwait;
+
+ vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0));
+ vqx_dwait.s.time_wait = time;
+ writeq(vqx_dwait.u, cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0));
+}
+
+
+static u16 cptvf_read_vq_done_timewait(struct otx_cptvf *cptvf)
+{
+ union otx_cptx_vqx_done_wait vqx_dwait;
+
+ vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0));
+ return vqx_dwait.s.time_wait;
+}
+
+static void cptvf_enable_swerr_interrupts(struct otx_cptvf *cptvf)
+{
+ union otx_cptx_vqx_misc_ena_w1s vqx_misc_ena;
+
+ vqx_misc_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0));
+ /* Enable SWERR interrupts for the requested VF */
+ vqx_misc_ena.s.swerr = 1;
+ writeq(vqx_misc_ena.u, cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0));
+}
+
+static void cptvf_enable_mbox_interrupts(struct otx_cptvf *cptvf)
+{
+ union otx_cptx_vqx_misc_ena_w1s vqx_misc_ena;
+
+ vqx_misc_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0));
+ /* Enable MBOX interrupt for the requested VF */
+ vqx_misc_ena.s.mbox = 1;
+ writeq(vqx_misc_ena.u, cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0));
+}
+
+static void cptvf_enable_done_interrupts(struct otx_cptvf *cptvf)
+{
+ union otx_cptx_vqx_done_ena_w1s vqx_done_ena;
+
+ vqx_done_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_ENA_W1S(0));
+ /* Enable DONE interrupt for the requested VF */
+ vqx_done_ena.s.done = 1;
+ writeq(vqx_done_ena.u, cptvf->reg_base + OTX_CPT_VQX_DONE_ENA_W1S(0));
+}
+
+static void cptvf_clear_dovf_intr(struct otx_cptvf *cptvf)
+{
+ union otx_cptx_vqx_misc_int vqx_misc_int;
+
+ vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+ /* W1C for the VF */
+ vqx_misc_int.s.dovf = 1;
+ writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+}
+
+static void cptvf_clear_irde_intr(struct otx_cptvf *cptvf)
+{
+ union otx_cptx_vqx_misc_int vqx_misc_int;
+
+ vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+ /* W1C for the VF */
+ vqx_misc_int.s.irde = 1;
+ writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+}
+
+static void cptvf_clear_nwrp_intr(struct otx_cptvf *cptvf)
+{
+ union otx_cptx_vqx_misc_int vqx_misc_int;
+
+ vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+ /* W1C for the VF */
+ vqx_misc_int.s.nwrp = 1;
+ writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+}
+
+static void cptvf_clear_mbox_intr(struct otx_cptvf *cptvf)
+{
+ union otx_cptx_vqx_misc_int vqx_misc_int;
+
+ vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+ /* W1C for the VF */
+ vqx_misc_int.s.mbox = 1;
+ writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+}
+
+static void cptvf_clear_swerr_intr(struct otx_cptvf *cptvf)
+{
+ union otx_cptx_vqx_misc_int vqx_misc_int;
+
+ vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+ /* W1C for the VF */
+ vqx_misc_int.s.swerr = 1;
+ writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+}
+
+static u64 cptvf_read_vf_misc_intr_status(struct otx_cptvf *cptvf)
+{
+ return readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0));
+}
+
+static irqreturn_t cptvf_misc_intr_handler(int __always_unused irq,
+ void *arg)
+{
+ struct otx_cptvf *cptvf = arg;
+ struct pci_dev *pdev = cptvf->pdev;
+ u64 intr;
+
+ intr = cptvf_read_vf_misc_intr_status(cptvf);
+ /* Check for MISC interrupt types */
+ if (likely(intr & OTX_CPT_VF_INTR_MBOX_MASK)) {
+ dev_dbg(&pdev->dev, "Mailbox interrupt 0x%llx on CPT VF %d\n",
+ intr, cptvf->vfid);
+ otx_cptvf_handle_mbox_intr(cptvf);
+ cptvf_clear_mbox_intr(cptvf);
+ } else if (unlikely(intr & OTX_CPT_VF_INTR_DOVF_MASK)) {
+ cptvf_clear_dovf_intr(cptvf);
+ /* Clear doorbell count */
+ otx_cptvf_write_vq_doorbell(cptvf, 0);
+ dev_err(&pdev->dev,
+ "Doorbell overflow error interrupt 0x%llx on CPT VF %d\n",
+ intr, cptvf->vfid);
+ } else if (unlikely(intr & OTX_CPT_VF_INTR_IRDE_MASK)) {
+ cptvf_clear_irde_intr(cptvf);
+ dev_err(&pdev->dev,
+ "Instruction NCB read error interrupt 0x%llx on CPT VF %d\n",
+ intr, cptvf->vfid);
+ } else if (unlikely(intr & OTX_CPT_VF_INTR_NWRP_MASK)) {
+ cptvf_clear_nwrp_intr(cptvf);
+ dev_err(&pdev->dev,
+ "NCB response write error interrupt 0x%llx on CPT VF %d\n",
+ intr, cptvf->vfid);
+ } else if (unlikely(intr & OTX_CPT_VF_INTR_SERR_MASK)) {
+ cptvf_clear_swerr_intr(cptvf);
+ dev_err(&pdev->dev,
+ "Software error interrupt 0x%llx on CPT VF %d\n",
+ intr, cptvf->vfid);
+ } else {
+ dev_err(&pdev->dev, "Unhandled interrupt in OTX_CPT VF %d\n",
+ cptvf->vfid);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static inline struct otx_cptvf_wqe *get_cptvf_vq_wqe(struct otx_cptvf *cptvf,
+ int qno)
+{
+ struct otx_cptvf_wqe_info *nwqe_info;
+
+ if (unlikely(qno >= cptvf->num_queues))
+ return NULL;
+ nwqe_info = (struct otx_cptvf_wqe_info *)cptvf->wqe_info;
+
+ return &nwqe_info->vq_wqe[qno];
+}
+
+static inline u32 cptvf_read_vq_done_count(struct otx_cptvf *cptvf)
+{
+ union otx_cptx_vqx_done vqx_done;
+
+ vqx_done.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE(0));
+ return vqx_done.s.done;
+}
+
+static inline void cptvf_write_vq_done_ack(struct otx_cptvf *cptvf,
+ u32 ackcnt)
+{
+ union otx_cptx_vqx_done_ack vqx_dack_cnt;
+
+ vqx_dack_cnt.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_ACK(0));
+ vqx_dack_cnt.s.done_ack = ackcnt;
+ writeq(vqx_dack_cnt.u, cptvf->reg_base + OTX_CPT_VQX_DONE_ACK(0));
+}
+
+static irqreturn_t cptvf_done_intr_handler(int __always_unused irq,
+ void *cptvf_dev)
+{
+ struct otx_cptvf *cptvf = (struct otx_cptvf *)cptvf_dev;
+ struct pci_dev *pdev = cptvf->pdev;
+ /* Read the number of completions */
+ u32 intr = cptvf_read_vq_done_count(cptvf);
+
+ if (intr) {
+ struct otx_cptvf_wqe *wqe;
+
+ /*
+ * Acknowledge the number of scheduled completions for
+ * processing
+ */
+ cptvf_write_vq_done_ack(cptvf, intr);
+ wqe = get_cptvf_vq_wqe(cptvf, 0);
+ if (unlikely(!wqe)) {
+ dev_err(&pdev->dev, "No work to schedule for VF (%d)",
+ cptvf->vfid);
+ return IRQ_NONE;
+ }
+ tasklet_hi_schedule(&wqe->twork);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void cptvf_set_irq_affinity(struct otx_cptvf *cptvf, int vec)
+{
+ struct pci_dev *pdev = cptvf->pdev;
+ int cpu;
+
+ if (!zalloc_cpumask_var(&cptvf->affinity_mask[vec],
+ GFP_KERNEL)) {
+ dev_err(&pdev->dev,
+ "Allocation failed for affinity_mask for VF %d",
+ cptvf->vfid);
+ return;
+ }
+
+ cpu = cptvf->vfid % num_online_cpus();
+ cpumask_set_cpu(cpumask_local_spread(cpu, cptvf->node),
+ cptvf->affinity_mask[vec]);
+ irq_set_affinity_hint(pci_irq_vector(pdev, vec),
+ cptvf->affinity_mask[vec]);
+}
+
+static void cptvf_write_vq_saddr(struct otx_cptvf *cptvf, u64 val)
+{
+ union otx_cptx_vqx_saddr vqx_saddr;
+
+ vqx_saddr.u = val;
+ writeq(vqx_saddr.u, cptvf->reg_base + OTX_CPT_VQX_SADDR(0));
+}
+
+static void cptvf_device_init(struct otx_cptvf *cptvf)
+{
+ u64 base_addr = 0;
+
+ /* Disable the VQ */
+ cptvf_write_vq_ctl(cptvf, 0);
+ /* Reset the doorbell */
+ otx_cptvf_write_vq_doorbell(cptvf, 0);
+ /* Clear inflight */
+ cptvf_write_vq_inprog(cptvf, 0);
+ /* Write VQ SADDR */
+ base_addr = (u64)(cptvf->cqinfo.queue[0].qhead->dma_addr);
+ cptvf_write_vq_saddr(cptvf, base_addr);
+ /* Configure timerhold / coalescence */
+ cptvf_write_vq_done_timewait(cptvf, OTX_CPT_TIMER_HOLD);
+ cptvf_write_vq_done_numwait(cptvf, OTX_CPT_COUNT_HOLD);
+ /* Enable the VQ */
+ cptvf_write_vq_ctl(cptvf, 1);
+ /* Flag the VF ready */
+ cptvf->flags |= OTX_CPT_FLAG_DEVICE_READY;
+}
+
+static ssize_t vf_type_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct otx_cptvf *cptvf = dev_get_drvdata(dev);
+ char *msg;
+
+ switch (cptvf->vftype) {
+ case OTX_CPT_AE_TYPES:
+ msg = "AE";
+ break;
+
+ case OTX_CPT_SE_TYPES:
+ msg = "SE";
+ break;
+
+ default:
+ msg = "Invalid";
+ }
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", msg);
+}
+
+static ssize_t vf_engine_group_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct otx_cptvf *cptvf = dev_get_drvdata(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", cptvf->vfgrp);
+}
+
+static ssize_t vf_engine_group_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct otx_cptvf *cptvf = dev_get_drvdata(dev);
+ int val, ret;
+
+ ret = kstrtoint(buf, 10, &val);
+ if (ret)
+ return ret;
+
+ if (val < 0)
+ return -EINVAL;
+
+ if (val >= OTX_CPT_MAX_ENGINE_GROUPS) {
+ dev_err(dev, "Engine group >= than max available groups %d",
+ OTX_CPT_MAX_ENGINE_GROUPS);
+ return -EINVAL;
+ }
+
+ ret = otx_cptvf_send_vf_to_grp_msg(cptvf, val);
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+static ssize_t vf_coalesc_time_wait_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct otx_cptvf *cptvf = dev_get_drvdata(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n",
+ cptvf_read_vq_done_timewait(cptvf));
+}
+
+static ssize_t vf_coalesc_num_wait_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct otx_cptvf *cptvf = dev_get_drvdata(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n",
+ cptvf_read_vq_done_numwait(cptvf));
+}
+
+static ssize_t vf_coalesc_time_wait_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct otx_cptvf *cptvf = dev_get_drvdata(dev);
+ long val;
+ int ret;
+
+ ret = kstrtol(buf, 10, &val);
+ if (ret != 0)
+ return ret;
+
+ if (val < OTX_CPT_COALESC_MIN_TIME_WAIT ||
+ val > OTX_CPT_COALESC_MAX_TIME_WAIT)
+ return -EINVAL;
+
+ cptvf_write_vq_done_timewait(cptvf, val);
+ return count;
+}
+
+static ssize_t vf_coalesc_num_wait_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct otx_cptvf *cptvf = dev_get_drvdata(dev);
+ long val;
+ int ret;
+
+ ret = kstrtol(buf, 10, &val);
+ if (ret != 0)
+ return ret;
+
+ if (val < OTX_CPT_COALESC_MIN_NUM_WAIT ||
+ val > OTX_CPT_COALESC_MAX_NUM_WAIT)
+ return -EINVAL;
+
+ cptvf_write_vq_done_numwait(cptvf, val);
+ return count;
+}
+
+static DEVICE_ATTR_RO(vf_type);
+static DEVICE_ATTR_RW(vf_engine_group);
+static DEVICE_ATTR_RW(vf_coalesc_time_wait);
+static DEVICE_ATTR_RW(vf_coalesc_num_wait);
+
+static struct attribute *otx_cptvf_attrs[] = {
+ &dev_attr_vf_type.attr,
+ &dev_attr_vf_engine_group.attr,
+ &dev_attr_vf_coalesc_time_wait.attr,
+ &dev_attr_vf_coalesc_num_wait.attr,
+ NULL
+};
+
+static const struct attribute_group otx_cptvf_sysfs_group = {
+ .attrs = otx_cptvf_attrs,
+};
+
+static int otx_cptvf_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct device *dev = &pdev->dev;
+ struct otx_cptvf *cptvf;
+ int err;
+
+ cptvf = devm_kzalloc(dev, sizeof(*cptvf), GFP_KERNEL);
+ if (!cptvf)
+ return -ENOMEM;
+
+ pci_set_drvdata(pdev, cptvf);
+ cptvf->pdev = pdev;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(dev, "Failed to enable PCI device\n");
+ goto clear_drvdata;
+ }
+ err = pci_request_regions(pdev, DRV_NAME);
+ if (err) {
+ dev_err(dev, "PCI request regions failed 0x%x\n", err);
+ goto disable_device;
+ }
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+ if (err) {
+ dev_err(dev, "Unable to get usable DMA configuration\n");
+ goto release_regions;
+ }
+
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
+ if (err) {
+ dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n");
+ goto release_regions;
+ }
+
+ /* MAP PF's configuration registers */
+ cptvf->reg_base = pci_iomap(pdev, OTX_CPT_VF_PCI_CFG_BAR, 0);
+ if (!cptvf->reg_base) {
+ dev_err(dev, "Cannot map config register space, aborting\n");
+ err = -ENOMEM;
+ goto release_regions;
+ }
+
+ cptvf->node = dev_to_node(&pdev->dev);
+ err = pci_alloc_irq_vectors(pdev, OTX_CPT_VF_MSIX_VECTORS,
+ OTX_CPT_VF_MSIX_VECTORS, PCI_IRQ_MSIX);
+ if (err < 0) {
+ dev_err(dev, "Request for #%d msix vectors failed\n",
+ OTX_CPT_VF_MSIX_VECTORS);
+ goto unmap_region;
+ }
+
+ err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC),
+ cptvf_misc_intr_handler, 0, "CPT VF misc intr",
+ cptvf);
+ if (err) {
+ dev_err(dev, "Failed to request misc irq");
+ goto free_vectors;
+ }
+
+ /* Enable mailbox interrupt */
+ cptvf_enable_mbox_interrupts(cptvf);
+ cptvf_enable_swerr_interrupts(cptvf);
+
+ /* Check cpt pf status, gets chip ID / device Id from PF if ready */
+ err = otx_cptvf_check_pf_ready(cptvf);
+ if (err)
+ goto free_misc_irq;
+
+ /* CPT VF software resources initialization */
+ cptvf->cqinfo.qchunksize = OTX_CPT_CMD_QCHUNK_SIZE;
+ err = cptvf_sw_init(cptvf, OTX_CPT_CMD_QLEN, OTX_CPT_NUM_QS_PER_VF);
+ if (err) {
+ dev_err(dev, "cptvf_sw_init() failed");
+ goto free_misc_irq;
+ }
+ /* Convey VQ LEN to PF */
+ err = otx_cptvf_send_vq_size_msg(cptvf);
+ if (err)
+ goto sw_cleanup;
+
+ /* CPT VF device initialization */
+ cptvf_device_init(cptvf);
+ /* Send msg to PF to assign currnet Q to required group */
+ err = otx_cptvf_send_vf_to_grp_msg(cptvf, cptvf->vfgrp);
+ if (err)
+ goto sw_cleanup;
+
+ cptvf->priority = 1;
+ err = otx_cptvf_send_vf_priority_msg(cptvf);
+ if (err)
+ goto sw_cleanup;
+
+ err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE),
+ cptvf_done_intr_handler, 0, "CPT VF done intr",
+ cptvf);
+ if (err) {
+ dev_err(dev, "Failed to request done irq\n");
+ goto free_done_irq;
+ }
+
+ /* Enable done interrupt */
+ cptvf_enable_done_interrupts(cptvf);
+
+ /* Set irq affinity masks */
+ cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC);
+ cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE);
+
+ err = otx_cptvf_send_vf_up(cptvf);
+ if (err)
+ goto free_irq_affinity;
+
+ /* Initialize algorithms and set ops */
+ err = otx_cpt_crypto_init(pdev, THIS_MODULE,
+ cptvf->vftype == OTX_CPT_SE_TYPES ? OTX_CPT_SE : OTX_CPT_AE,
+ cptvf->vftype, 1, cptvf->num_vfs);
+ if (err) {
+ dev_err(dev, "Failed to register crypto algs\n");
+ goto free_irq_affinity;
+ }
+
+ err = sysfs_create_group(&dev->kobj, &otx_cptvf_sysfs_group);
+ if (err) {
+ dev_err(dev, "Creating sysfs entries failed\n");
+ goto crypto_exit;
+ }
+
+ return 0;
+
+crypto_exit:
+ otx_cpt_crypto_exit(pdev, THIS_MODULE, cptvf->vftype);
+free_irq_affinity:
+ cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE);
+ cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC);
+free_done_irq:
+ free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), cptvf);
+sw_cleanup:
+ cptvf_sw_cleanup(cptvf);
+free_misc_irq:
+ free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf);
+free_vectors:
+ pci_free_irq_vectors(cptvf->pdev);
+unmap_region:
+ pci_iounmap(pdev, cptvf->reg_base);
+release_regions:
+ pci_release_regions(pdev);
+disable_device:
+ pci_disable_device(pdev);
+clear_drvdata:
+ pci_set_drvdata(pdev, NULL);
+
+ return err;
+}
+
+static void otx_cptvf_remove(struct pci_dev *pdev)
+{
+ struct otx_cptvf *cptvf = pci_get_drvdata(pdev);
+
+ if (!cptvf) {
+ dev_err(&pdev->dev, "Invalid CPT-VF device\n");
+ return;
+ }
+
+ /* Convey DOWN to PF */
+ if (otx_cptvf_send_vf_down(cptvf)) {
+ dev_err(&pdev->dev, "PF not responding to DOWN msg");
+ } else {
+ sysfs_remove_group(&pdev->dev.kobj, &otx_cptvf_sysfs_group);
+ otx_cpt_crypto_exit(pdev, THIS_MODULE, cptvf->vftype);
+ cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE);
+ cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC);
+ free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), cptvf);
+ free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf);
+ cptvf_sw_cleanup(cptvf);
+ pci_free_irq_vectors(cptvf->pdev);
+ pci_iounmap(pdev, cptvf->reg_base);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+ }
+}
+
+/* Supported devices */
+static const struct pci_device_id otx_cptvf_id_table[] = {
+ {PCI_VDEVICE(CAVIUM, OTX_CPT_PCI_VF_DEVICE_ID), 0},
+ { 0, } /* end of table */
+};
+
+static struct pci_driver otx_cptvf_pci_driver = {
+ .name = DRV_NAME,
+ .id_table = otx_cptvf_id_table,
+ .probe = otx_cptvf_probe,
+ .remove = otx_cptvf_remove,
+};
+
+module_pci_driver(otx_cptvf_pci_driver);
+
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_DESCRIPTION("Marvell OcteonTX CPT Virtual Function Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, otx_cptvf_id_table);
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c b/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c
new file mode 100644
index 000000000000..5663787c7a62
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include "otx_cptvf.h"
+
+#define CPT_MBOX_MSG_TIMEOUT 2000
+
+static char *get_mbox_opcode_str(int msg_opcode)
+{
+ char *str = "Unknown";
+
+ switch (msg_opcode) {
+ case OTX_CPT_MSG_VF_UP:
+ str = "UP";
+ break;
+
+ case OTX_CPT_MSG_VF_DOWN:
+ str = "DOWN";
+ break;
+
+ case OTX_CPT_MSG_READY:
+ str = "READY";
+ break;
+
+ case OTX_CPT_MSG_QLEN:
+ str = "QLEN";
+ break;
+
+ case OTX_CPT_MSG_QBIND_GRP:
+ str = "QBIND_GRP";
+ break;
+
+ case OTX_CPT_MSG_VQ_PRIORITY:
+ str = "VQ_PRIORITY";
+ break;
+
+ case OTX_CPT_MSG_PF_TYPE:
+ str = "PF_TYPE";
+ break;
+
+ case OTX_CPT_MSG_ACK:
+ str = "ACK";
+ break;
+
+ case OTX_CPT_MSG_NACK:
+ str = "NACK";
+ break;
+ }
+ return str;
+}
+
+static void dump_mbox_msg(struct otx_cpt_mbox *mbox_msg, int vf_id)
+{
+ char raw_data_str[OTX_CPT_MAX_MBOX_DATA_STR_SIZE];
+
+ hex_dump_to_buffer(mbox_msg, sizeof(struct otx_cpt_mbox), 16, 8,
+ raw_data_str, OTX_CPT_MAX_MBOX_DATA_STR_SIZE, false);
+ if (vf_id >= 0)
+ pr_debug("MBOX msg %s received from VF%d raw_data %s",
+ get_mbox_opcode_str(mbox_msg->msg), vf_id,
+ raw_data_str);
+ else
+ pr_debug("MBOX msg %s received from PF raw_data %s",
+ get_mbox_opcode_str(mbox_msg->msg), raw_data_str);
+}
+
+static void cptvf_send_msg_to_pf(struct otx_cptvf *cptvf,
+ struct otx_cpt_mbox *mbx)
+{
+ /* Writing mbox(1) causes interrupt */
+ writeq(mbx->msg, cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 0));
+ writeq(mbx->data, cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 1));
+}
+
+/* Interrupt handler to handle mailbox messages from VFs */
+void otx_cptvf_handle_mbox_intr(struct otx_cptvf *cptvf)
+{
+ struct otx_cpt_mbox mbx = {};
+
+ /*
+ * MBOX[0] contains msg
+ * MBOX[1] contains data
+ */
+ mbx.msg = readq(cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 0));
+ mbx.data = readq(cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 1));
+
+ dump_mbox_msg(&mbx, -1);
+
+ switch (mbx.msg) {
+ case OTX_CPT_MSG_VF_UP:
+ cptvf->pf_acked = true;
+ cptvf->num_vfs = mbx.data;
+ break;
+ case OTX_CPT_MSG_READY:
+ cptvf->pf_acked = true;
+ cptvf->vfid = mbx.data;
+ dev_dbg(&cptvf->pdev->dev, "Received VFID %d\n", cptvf->vfid);
+ break;
+ case OTX_CPT_MSG_QBIND_GRP:
+ cptvf->pf_acked = true;
+ cptvf->vftype = mbx.data;
+ dev_dbg(&cptvf->pdev->dev, "VF %d type %s group %d\n",
+ cptvf->vfid,
+ ((mbx.data == OTX_CPT_SE_TYPES) ? "SE" : "AE"),
+ cptvf->vfgrp);
+ break;
+ case OTX_CPT_MSG_ACK:
+ cptvf->pf_acked = true;
+ break;
+ case OTX_CPT_MSG_NACK:
+ cptvf->pf_nacked = true;
+ break;
+ default:
+ dev_err(&cptvf->pdev->dev, "Invalid msg from PF, msg 0x%llx\n",
+ mbx.msg);
+ break;
+ }
+}
+
+static int cptvf_send_msg_to_pf_timeout(struct otx_cptvf *cptvf,
+ struct otx_cpt_mbox *mbx)
+{
+ int timeout = CPT_MBOX_MSG_TIMEOUT;
+ int sleep = 10;
+
+ cptvf->pf_acked = false;
+ cptvf->pf_nacked = false;
+ cptvf_send_msg_to_pf(cptvf, mbx);
+ /* Wait for previous message to be acked, timeout 2sec */
+ while (!cptvf->pf_acked) {
+ if (cptvf->pf_nacked)
+ return -EINVAL;
+ msleep(sleep);
+ if (cptvf->pf_acked)
+ break;
+ timeout -= sleep;
+ if (!timeout) {
+ dev_err(&cptvf->pdev->dev,
+ "PF didn't ack to mbox msg %llx from VF%u\n",
+ mbx->msg, cptvf->vfid);
+ return -EBUSY;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Checks if VF is able to comminicate with PF
+ * and also gets the CPT number this VF is associated to.
+ */
+int otx_cptvf_check_pf_ready(struct otx_cptvf *cptvf)
+{
+ struct otx_cpt_mbox mbx = {};
+ int ret;
+
+ mbx.msg = OTX_CPT_MSG_READY;
+ ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx);
+
+ return ret;
+}
+
+/*
+ * Communicate VQs size to PF to program CPT(0)_PF_Q(0-15)_CTL of the VF.
+ * Must be ACKed.
+ */
+int otx_cptvf_send_vq_size_msg(struct otx_cptvf *cptvf)
+{
+ struct otx_cpt_mbox mbx = {};
+ int ret;
+
+ mbx.msg = OTX_CPT_MSG_QLEN;
+ mbx.data = cptvf->qsize;
+ ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx);
+
+ return ret;
+}
+
+/*
+ * Communicate VF group required to PF and get the VQ binded to that group
+ */
+int otx_cptvf_send_vf_to_grp_msg(struct otx_cptvf *cptvf, int group)
+{
+ struct otx_cpt_mbox mbx = {};
+ int ret;
+
+ mbx.msg = OTX_CPT_MSG_QBIND_GRP;
+ /* Convey group of the VF */
+ mbx.data = group;
+ ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx);
+ if (ret)
+ return ret;
+ cptvf->vfgrp = group;
+
+ return 0;
+}
+
+/*
+ * Communicate VF group required to PF and get the VQ binded to that group
+ */
+int otx_cptvf_send_vf_priority_msg(struct otx_cptvf *cptvf)
+{
+ struct otx_cpt_mbox mbx = {};
+ int ret;
+
+ mbx.msg = OTX_CPT_MSG_VQ_PRIORITY;
+ /* Convey group of the VF */
+ mbx.data = cptvf->priority;
+ ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx);
+
+ return ret;
+}
+
+/*
+ * Communicate to PF that VF is UP and running
+ */
+int otx_cptvf_send_vf_up(struct otx_cptvf *cptvf)
+{
+ struct otx_cpt_mbox mbx = {};
+ int ret;
+
+ mbx.msg = OTX_CPT_MSG_VF_UP;
+ ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx);
+
+ return ret;
+}
+
+/*
+ * Communicate to PF that VF is DOWN and running
+ */
+int otx_cptvf_send_vf_down(struct otx_cptvf *cptvf)
+{
+ struct otx_cpt_mbox mbx = {};
+ int ret;
+
+ mbx.msg = OTX_CPT_MSG_VF_DOWN;
+ ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx);
+
+ return ret;
+}
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c
new file mode 100644
index 000000000000..df839b880354
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c
@@ -0,0 +1,612 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "otx_cptvf.h"
+#include "otx_cptvf_algs.h"
+
+/* Completion code size and initial value */
+#define COMPLETION_CODE_SIZE 8
+#define COMPLETION_CODE_INIT 0
+
+/* SG list header size in bytes */
+#define SG_LIST_HDR_SIZE 8
+
+/* Default timeout when waiting for free pending entry in us */
+#define CPT_PENTRY_TIMEOUT 1000
+#define CPT_PENTRY_STEP 50
+
+/* Default threshold for stopping and resuming sender requests */
+#define CPT_IQ_STOP_MARGIN 128
+#define CPT_IQ_RESUME_MARGIN 512
+
+#define CPT_DMA_ALIGN 128
+
+void otx_cpt_dump_sg_list(struct pci_dev *pdev, struct otx_cpt_req_info *req)
+{
+ int i;
+
+ pr_debug("Gather list size %d\n", req->incnt);
+ for (i = 0; i < req->incnt; i++) {
+ pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i,
+ req->in[i].size, req->in[i].vptr,
+ (void *) req->in[i].dma_addr);
+ pr_debug("Buffer hexdump (%d bytes)\n",
+ req->in[i].size);
+ print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1,
+ req->in[i].vptr, req->in[i].size, false);
+ }
+
+ pr_debug("Scatter list size %d\n", req->outcnt);
+ for (i = 0; i < req->outcnt; i++) {
+ pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i,
+ req->out[i].size, req->out[i].vptr,
+ (void *) req->out[i].dma_addr);
+ pr_debug("Buffer hexdump (%d bytes)\n", req->out[i].size);
+ print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1,
+ req->out[i].vptr, req->out[i].size, false);
+ }
+}
+
+static inline struct otx_cpt_pending_entry *get_free_pending_entry(
+ struct otx_cpt_pending_queue *q,
+ int qlen)
+{
+ struct otx_cpt_pending_entry *ent = NULL;
+
+ ent = &q->head[q->rear];
+ if (unlikely(ent->busy))
+ return NULL;
+
+ q->rear++;
+ if (unlikely(q->rear == qlen))
+ q->rear = 0;
+
+ return ent;
+}
+
+static inline u32 modulo_inc(u32 index, u32 length, u32 inc)
+{
+ if (WARN_ON(inc > length))
+ inc = length;
+
+ index += inc;
+ if (unlikely(index >= length))
+ index -= length;
+
+ return index;
+}
+
+static inline void free_pentry(struct otx_cpt_pending_entry *pentry)
+{
+ pentry->completion_addr = NULL;
+ pentry->info = NULL;
+ pentry->callback = NULL;
+ pentry->areq = NULL;
+ pentry->resume_sender = false;
+ pentry->busy = false;
+}
+
+static inline int setup_sgio_components(struct pci_dev *pdev,
+ struct otx_cpt_buf_ptr *list,
+ int buf_count, u8 *buffer)
+{
+ struct otx_cpt_sglist_component *sg_ptr = NULL;
+ int ret = 0, i, j;
+ int components;
+
+ if (unlikely(!list)) {
+ dev_err(&pdev->dev, "Input list pointer is NULL\n");
+ return -EFAULT;
+ }
+
+ for (i = 0; i < buf_count; i++) {
+ if (likely(list[i].vptr)) {
+ list[i].dma_addr = dma_map_single(&pdev->dev,
+ list[i].vptr,
+ list[i].size,
+ DMA_BIDIRECTIONAL);
+ if (unlikely(dma_mapping_error(&pdev->dev,
+ list[i].dma_addr))) {
+ dev_err(&pdev->dev, "Dma mapping failed\n");
+ ret = -EIO;
+ goto sg_cleanup;
+ }
+ }
+ }
+
+ components = buf_count / 4;
+ sg_ptr = (struct otx_cpt_sglist_component *)buffer;
+ for (i = 0; i < components; i++) {
+ sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size);
+ sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size);
+ sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size);
+ sg_ptr->u.s.len3 = cpu_to_be16(list[i * 4 + 3].size);
+ sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr);
+ sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr);
+ sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr);
+ sg_ptr->ptr3 = cpu_to_be64(list[i * 4 + 3].dma_addr);
+ sg_ptr++;
+ }
+ components = buf_count % 4;
+
+ switch (components) {
+ case 3:
+ sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size);
+ sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr);
+ /* Fall through */
+ case 2:
+ sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size);
+ sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr);
+ /* Fall through */
+ case 1:
+ sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size);
+ sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr);
+ break;
+ default:
+ break;
+ }
+ return ret;
+
+sg_cleanup:
+ for (j = 0; j < i; j++) {
+ if (list[j].dma_addr) {
+ dma_unmap_single(&pdev->dev, list[i].dma_addr,
+ list[i].size, DMA_BIDIRECTIONAL);
+ }
+
+ list[j].dma_addr = 0;
+ }
+ return ret;
+}
+
+static inline int setup_sgio_list(struct pci_dev *pdev,
+ struct otx_cpt_info_buffer **pinfo,
+ struct otx_cpt_req_info *req, gfp_t gfp)
+{
+ u32 dlen, align_dlen, info_len, rlen;
+ struct otx_cpt_info_buffer *info;
+ u16 g_sz_bytes, s_sz_bytes;
+ int align = CPT_DMA_ALIGN;
+ u32 total_mem_len;
+
+ if (unlikely(req->incnt > OTX_CPT_MAX_SG_IN_CNT ||
+ req->outcnt > OTX_CPT_MAX_SG_OUT_CNT)) {
+ dev_err(&pdev->dev, "Error too many sg components\n");
+ return -EINVAL;
+ }
+
+ g_sz_bytes = ((req->incnt + 3) / 4) *
+ sizeof(struct otx_cpt_sglist_component);
+ s_sz_bytes = ((req->outcnt + 3) / 4) *
+ sizeof(struct otx_cpt_sglist_component);
+
+ dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
+ align_dlen = ALIGN(dlen, align);
+ info_len = ALIGN(sizeof(*info), align);
+ rlen = ALIGN(sizeof(union otx_cpt_res_s), align);
+ total_mem_len = align_dlen + info_len + rlen + COMPLETION_CODE_SIZE;
+
+ info = kzalloc(total_mem_len, gfp);
+ if (unlikely(!info)) {
+ dev_err(&pdev->dev, "Memory allocation failed\n");
+ return -ENOMEM;
+ }
+ *pinfo = info;
+ info->dlen = dlen;
+ info->in_buffer = (u8 *)info + info_len;
+
+ ((u16 *)info->in_buffer)[0] = req->outcnt;
+ ((u16 *)info->in_buffer)[1] = req->incnt;
+ ((u16 *)info->in_buffer)[2] = 0;
+ ((u16 *)info->in_buffer)[3] = 0;
+ *(u64 *)info->in_buffer = cpu_to_be64p((u64 *)info->in_buffer);
+
+ /* Setup gather (input) components */
+ if (setup_sgio_components(pdev, req->in, req->incnt,
+ &info->in_buffer[8])) {
+ dev_err(&pdev->dev, "Failed to setup gather list\n");
+ return -EFAULT;
+ }
+
+ if (setup_sgio_components(pdev, req->out, req->outcnt,
+ &info->in_buffer[8 + g_sz_bytes])) {
+ dev_err(&pdev->dev, "Failed to setup scatter list\n");
+ return -EFAULT;
+ }
+
+ info->dma_len = total_mem_len - info_len;
+ info->dptr_baddr = dma_map_single(&pdev->dev, (void *)info->in_buffer,
+ info->dma_len, DMA_BIDIRECTIONAL);
+ if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) {
+ dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n");
+ return -EIO;
+ }
+ /*
+ * Get buffer for union otx_cpt_res_s response
+ * structure and its physical address
+ */
+ info->completion_addr = (u64 *)(info->in_buffer + align_dlen);
+ info->comp_baddr = info->dptr_baddr + align_dlen;
+
+ /* Create and initialize RPTR */
+ info->out_buffer = (u8 *)info->completion_addr + rlen;
+ info->rptr_baddr = info->comp_baddr + rlen;
+
+ *((u64 *) info->out_buffer) = ~((u64) COMPLETION_CODE_INIT);
+
+ return 0;
+}
+
+
+static void cpt_fill_inst(union otx_cpt_inst_s *inst,
+ struct otx_cpt_info_buffer *info,
+ struct otx_cpt_iq_cmd *cmd)
+{
+ inst->u[0] = 0x0;
+ inst->s.doneint = true;
+ inst->s.res_addr = (u64)info->comp_baddr;
+ inst->u[2] = 0x0;
+ inst->s.wq_ptr = 0;
+ inst->s.ei0 = cmd->cmd.u64;
+ inst->s.ei1 = cmd->dptr;
+ inst->s.ei2 = cmd->rptr;
+ inst->s.ei3 = cmd->cptr.u64;
+}
+
+/*
+ * On OcteonTX platform the parameter db_count is used as a count for ringing
+ * door bell. The valid values for db_count are:
+ * 0 - 1 CPT instruction will be enqueued however CPT will not be informed
+ * 1 - 1 CPT instruction will be enqueued and CPT will be informed
+ */
+static void cpt_send_cmd(union otx_cpt_inst_s *cptinst, struct otx_cptvf *cptvf)
+{
+ struct otx_cpt_cmd_qinfo *qinfo = &cptvf->cqinfo;
+ struct otx_cpt_cmd_queue *queue;
+ struct otx_cpt_cmd_chunk *curr;
+ u8 *ent;
+
+ queue = &qinfo->queue[0];
+ /*
+ * cpt_send_cmd is currently called only from critical section
+ * therefore no locking is required for accessing instruction queue
+ */
+ ent = &queue->qhead->head[queue->idx * OTX_CPT_INST_SIZE];
+ memcpy(ent, (void *) cptinst, OTX_CPT_INST_SIZE);
+
+ if (++queue->idx >= queue->qhead->size / 64) {
+ curr = queue->qhead;
+
+ if (list_is_last(&curr->nextchunk, &queue->chead))
+ queue->qhead = queue->base;
+ else
+ queue->qhead = list_next_entry(queue->qhead, nextchunk);
+ queue->idx = 0;
+ }
+ /* make sure all memory stores are done before ringing doorbell */
+ smp_wmb();
+ otx_cptvf_write_vq_doorbell(cptvf, 1);
+}
+
+static int process_request(struct pci_dev *pdev, struct otx_cpt_req_info *req,
+ struct otx_cpt_pending_queue *pqueue,
+ struct otx_cptvf *cptvf)
+{
+ struct otx_cptvf_request *cpt_req = &req->req;
+ struct otx_cpt_pending_entry *pentry = NULL;
+ union otx_cpt_ctrl_info *ctrl = &req->ctrl;
+ struct otx_cpt_info_buffer *info = NULL;
+ union otx_cpt_res_s *result = NULL;
+ struct otx_cpt_iq_cmd iq_cmd;
+ union otx_cpt_inst_s cptinst;
+ int retry, ret = 0;
+ u8 resume_sender;
+ gfp_t gfp;
+
+ gfp = (req->areq->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL :
+ GFP_ATOMIC;
+ ret = setup_sgio_list(pdev, &info, req, gfp);
+ if (unlikely(ret)) {
+ dev_err(&pdev->dev, "Setting up SG list failed");
+ goto request_cleanup;
+ }
+ cpt_req->dlen = info->dlen;
+
+ result = (union otx_cpt_res_s *) info->completion_addr;
+ result->s.compcode = COMPLETION_CODE_INIT;
+
+ spin_lock_bh(&pqueue->lock);
+ pentry = get_free_pending_entry(pqueue, pqueue->qlen);
+ retry = CPT_PENTRY_TIMEOUT / CPT_PENTRY_STEP;
+ while (unlikely(!pentry) && retry--) {
+ spin_unlock_bh(&pqueue->lock);
+ udelay(CPT_PENTRY_STEP);
+ spin_lock_bh(&pqueue->lock);
+ pentry = get_free_pending_entry(pqueue, pqueue->qlen);
+ }
+
+ if (unlikely(!pentry)) {
+ ret = -ENOSPC;
+ spin_unlock_bh(&pqueue->lock);
+ goto request_cleanup;
+ }
+
+ /*
+ * Check if we are close to filling in entire pending queue,
+ * if so then tell the sender to stop/sleep by returning -EBUSY
+ * We do it only for context which can sleep (GFP_KERNEL)
+ */
+ if (gfp == GFP_KERNEL &&
+ pqueue->pending_count > (pqueue->qlen - CPT_IQ_STOP_MARGIN)) {
+ pentry->resume_sender = true;
+ } else
+ pentry->resume_sender = false;
+ resume_sender = pentry->resume_sender;
+ pqueue->pending_count++;
+
+ pentry->completion_addr = info->completion_addr;
+ pentry->info = info;
+ pentry->callback = req->callback;
+ pentry->areq = req->areq;
+ pentry->busy = true;
+ info->pentry = pentry;
+ info->time_in = jiffies;
+ info->req = req;
+
+ /* Fill in the command */
+ iq_cmd.cmd.u64 = 0;
+ iq_cmd.cmd.s.opcode = cpu_to_be16(cpt_req->opcode.flags);
+ iq_cmd.cmd.s.param1 = cpu_to_be16(cpt_req->param1);
+ iq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2);
+ iq_cmd.cmd.s.dlen = cpu_to_be16(cpt_req->dlen);
+
+ /* 64-bit swap for microcode data reads, not needed for addresses*/
+ iq_cmd.cmd.u64 = cpu_to_be64(iq_cmd.cmd.u64);
+ iq_cmd.dptr = info->dptr_baddr;
+ iq_cmd.rptr = info->rptr_baddr;
+ iq_cmd.cptr.u64 = 0;
+ iq_cmd.cptr.s.grp = ctrl->s.grp;
+
+ /* Fill in the CPT_INST_S type command for HW interpretation */
+ cpt_fill_inst(&cptinst, info, &iq_cmd);
+
+ /* Print debug info if enabled */
+ otx_cpt_dump_sg_list(pdev, req);
+ pr_debug("Cpt_inst_s hexdump (%d bytes)\n", OTX_CPT_INST_SIZE);
+ print_hex_dump_debug("", 0, 16, 1, &cptinst, OTX_CPT_INST_SIZE, false);
+ pr_debug("Dptr hexdump (%d bytes)\n", cpt_req->dlen);
+ print_hex_dump_debug("", 0, 16, 1, info->in_buffer,
+ cpt_req->dlen, false);
+
+ /* Send CPT command */
+ cpt_send_cmd(&cptinst, cptvf);
+
+ /*
+ * We allocate and prepare pending queue entry in critical section
+ * together with submitting CPT instruction to CPT instruction queue
+ * to make sure that order of CPT requests is the same in both
+ * pending and instruction queues
+ */
+ spin_unlock_bh(&pqueue->lock);
+
+ ret = resume_sender ? -EBUSY : -EINPROGRESS;
+ return ret;
+
+request_cleanup:
+ do_request_cleanup(pdev, info);
+ return ret;
+}
+
+int otx_cpt_do_request(struct pci_dev *pdev, struct otx_cpt_req_info *req,
+ int cpu_num)
+{
+ struct otx_cptvf *cptvf = pci_get_drvdata(pdev);
+
+ if (!otx_cpt_device_ready(cptvf)) {
+ dev_err(&pdev->dev, "CPT Device is not ready");
+ return -ENODEV;
+ }
+
+ if ((cptvf->vftype == OTX_CPT_SE_TYPES) && (!req->ctrl.s.se_req)) {
+ dev_err(&pdev->dev, "CPTVF-%d of SE TYPE got AE request",
+ cptvf->vfid);
+ return -EINVAL;
+ } else if ((cptvf->vftype == OTX_CPT_AE_TYPES) &&
+ (req->ctrl.s.se_req)) {
+ dev_err(&pdev->dev, "CPTVF-%d of AE TYPE got SE request",
+ cptvf->vfid);
+ return -EINVAL;
+ }
+
+ return process_request(pdev, req, &cptvf->pqinfo.queue[0], cptvf);
+}
+
+static int cpt_process_ccode(struct pci_dev *pdev,
+ union otx_cpt_res_s *cpt_status,
+ struct otx_cpt_info_buffer *cpt_info,
+ struct otx_cpt_req_info *req, u32 *res_code)
+{
+ u8 ccode = cpt_status->s.compcode;
+ union otx_cpt_error_code ecode;
+
+ ecode.u = be64_to_cpu(*((u64 *) cpt_info->out_buffer));
+ switch (ccode) {
+ case CPT_COMP_E_FAULT:
+ dev_err(&pdev->dev,
+ "Request failed with DMA fault\n");
+ otx_cpt_dump_sg_list(pdev, req);
+ break;
+
+ case CPT_COMP_E_SWERR:
+ dev_err(&pdev->dev,
+ "Request failed with software error code %d\n",
+ ecode.s.ccode);
+ otx_cpt_dump_sg_list(pdev, req);
+ break;
+
+ case CPT_COMP_E_HWERR:
+ dev_err(&pdev->dev,
+ "Request failed with hardware error\n");
+ otx_cpt_dump_sg_list(pdev, req);
+ break;
+
+ case COMPLETION_CODE_INIT:
+ /* check for timeout */
+ if (time_after_eq(jiffies, cpt_info->time_in +
+ OTX_CPT_COMMAND_TIMEOUT * HZ))
+ dev_warn(&pdev->dev, "Request timed out 0x%p", req);
+ else if (cpt_info->extra_time < OTX_CPT_TIME_IN_RESET_COUNT) {
+ cpt_info->time_in = jiffies;
+ cpt_info->extra_time++;
+ }
+ return 1;
+
+ case CPT_COMP_E_GOOD:
+ /* Check microcode completion code */
+ if (ecode.s.ccode) {
+ /*
+ * If requested hmac is truncated and ucode returns
+ * s/g write length error then we report success
+ * because ucode writes as many bytes of calculated
+ * hmac as available in gather buffer and reports
+ * s/g write length error if number of bytes in gather
+ * buffer is less than full hmac size.
+ */
+ if (req->is_trunc_hmac &&
+ ecode.s.ccode == ERR_SCATTER_GATHER_WRITE_LENGTH) {
+ *res_code = 0;
+ break;
+ }
+
+ dev_err(&pdev->dev,
+ "Request failed with software error code 0x%x\n",
+ ecode.s.ccode);
+ otx_cpt_dump_sg_list(pdev, req);
+ break;
+ }
+
+ /* Request has been processed with success */
+ *res_code = 0;
+ break;
+
+ default:
+ dev_err(&pdev->dev, "Request returned invalid status\n");
+ break;
+ }
+
+ return 0;
+}
+
+static inline void process_pending_queue(struct pci_dev *pdev,
+ struct otx_cpt_pending_queue *pqueue)
+{
+ void (*callback)(int status, void *arg1, void *arg2);
+ struct otx_cpt_pending_entry *resume_pentry = NULL;
+ struct otx_cpt_pending_entry *pentry = NULL;
+ struct otx_cpt_info_buffer *cpt_info = NULL;
+ union otx_cpt_res_s *cpt_status = NULL;
+ struct otx_cpt_req_info *req = NULL;
+ struct crypto_async_request *areq;
+ u32 res_code, resume_index;
+
+ while (1) {
+ spin_lock_bh(&pqueue->lock);
+ pentry = &pqueue->head[pqueue->front];
+
+ if (WARN_ON(!pentry)) {
+ spin_unlock_bh(&pqueue->lock);
+ break;
+ }
+
+ res_code = -EINVAL;
+ if (unlikely(!pentry->busy)) {
+ spin_unlock_bh(&pqueue->lock);
+ break;
+ }
+
+ if (unlikely(!pentry->callback)) {
+ dev_err(&pdev->dev, "Callback NULL\n");
+ goto process_pentry;
+ }
+
+ cpt_info = pentry->info;
+ if (unlikely(!cpt_info)) {
+ dev_err(&pdev->dev, "Pending entry post arg NULL\n");
+ goto process_pentry;
+ }
+
+ req = cpt_info->req;
+ if (unlikely(!req)) {
+ dev_err(&pdev->dev, "Request NULL\n");
+ goto process_pentry;
+ }
+
+ cpt_status = (union otx_cpt_res_s *) pentry->completion_addr;
+ if (unlikely(!cpt_status)) {
+ dev_err(&pdev->dev, "Completion address NULL\n");
+ goto process_pentry;
+ }
+
+ if (cpt_process_ccode(pdev, cpt_status, cpt_info, req,
+ &res_code)) {
+ spin_unlock_bh(&pqueue->lock);
+ return;
+ }
+ cpt_info->pdev = pdev;
+
+process_pentry:
+ /*
+ * Check if we should inform sending side to resume
+ * We do it CPT_IQ_RESUME_MARGIN elements in advance before
+ * pending queue becomes empty
+ */
+ resume_index = modulo_inc(pqueue->front, pqueue->qlen,
+ CPT_IQ_RESUME_MARGIN);
+ resume_pentry = &pqueue->head[resume_index];
+ if (resume_pentry &&
+ resume_pentry->resume_sender) {
+ resume_pentry->resume_sender = false;
+ callback = resume_pentry->callback;
+ areq = resume_pentry->areq;
+
+ if (callback) {
+ spin_unlock_bh(&pqueue->lock);
+
+ /*
+ * EINPROGRESS is an indication for sending
+ * side that it can resume sending requests
+ */
+ callback(-EINPROGRESS, areq, cpt_info);
+ spin_lock_bh(&pqueue->lock);
+ }
+ }
+
+ callback = pentry->callback;
+ areq = pentry->areq;
+ free_pentry(pentry);
+
+ pqueue->pending_count--;
+ pqueue->front = modulo_inc(pqueue->front, pqueue->qlen, 1);
+ spin_unlock_bh(&pqueue->lock);
+
+ /*
+ * Call callback after current pending entry has been
+ * processed, we don't do it if the callback pointer is
+ * invalid.
+ */
+ if (callback)
+ callback(res_code, areq, cpt_info);
+ }
+}
+
+void otx_cpt_post_process(struct otx_cptvf_wqe *wqe)
+{
+ process_pending_queue(wqe->cptvf->pdev, &wqe->cptvf->pqinfo.queue[0]);
+}
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h
new file mode 100644
index 000000000000..a4c9ff730b13
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTX CPT driver
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __OTX_CPTVF_REQUEST_MANAGER_H
+#define __OTX_CPTVF_REQUEST_MANAGER_H
+
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/pci.h>
+#include "otx_cpt_hw_types.h"
+
+/*
+ * Maximum total number of SG buffers is 100, we divide it equally
+ * between input and output
+ */
+#define OTX_CPT_MAX_SG_IN_CNT 50
+#define OTX_CPT_MAX_SG_OUT_CNT 50
+
+/* DMA mode direct or SG */
+#define OTX_CPT_DMA_DIRECT_DIRECT 0
+#define OTX_CPT_DMA_GATHER_SCATTER 1
+
+/* Context source CPTR or DPTR */
+#define OTX_CPT_FROM_CPTR 0
+#define OTX_CPT_FROM_DPTR 1
+
+/* CPT instruction queue alignment */
+#define OTX_CPT_INST_Q_ALIGNMENT 128
+#define OTX_CPT_MAX_REQ_SIZE 65535
+
+/* Default command timeout in seconds */
+#define OTX_CPT_COMMAND_TIMEOUT 4
+#define OTX_CPT_TIMER_HOLD 0x03F
+#define OTX_CPT_COUNT_HOLD 32
+#define OTX_CPT_TIME_IN_RESET_COUNT 5
+
+/* Minimum and maximum values for interrupt coalescing */
+#define OTX_CPT_COALESC_MIN_TIME_WAIT 0x0
+#define OTX_CPT_COALESC_MAX_TIME_WAIT ((1<<16)-1)
+#define OTX_CPT_COALESC_MIN_NUM_WAIT 0x0
+#define OTX_CPT_COALESC_MAX_NUM_WAIT ((1<<20)-1)
+
+union otx_cpt_opcode_info {
+ u16 flags;
+ struct {
+ u8 major;
+ u8 minor;
+ } s;
+};
+
+struct otx_cptvf_request {
+ u32 param1;
+ u32 param2;
+ u16 dlen;
+ union otx_cpt_opcode_info opcode;
+};
+
+struct otx_cpt_buf_ptr {
+ u8 *vptr;
+ dma_addr_t dma_addr;
+ u16 size;
+};
+
+union otx_cpt_ctrl_info {
+ u32 flags;
+ struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u32 reserved0:26;
+ u32 grp:3; /* Group bits */
+ u32 dma_mode:2; /* DMA mode */
+ u32 se_req:1; /* To SE core */
+#else
+ u32 se_req:1; /* To SE core */
+ u32 dma_mode:2; /* DMA mode */
+ u32 grp:3; /* Group bits */
+ u32 reserved0:26;
+#endif
+ } s;
+};
+
+/*
+ * CPT_INST_S software command definitions
+ * Words EI (0-3)
+ */
+union otx_cpt_iq_cmd_word0 {
+ u64 u64;
+ struct {
+ u16 opcode;
+ u16 param1;
+ u16 param2;
+ u16 dlen;
+ } s;
+};
+
+union otx_cpt_iq_cmd_word3 {
+ u64 u64;
+ struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 grp:3;
+ u64 cptr:61;
+#else
+ u64 cptr:61;
+ u64 grp:3;
+#endif
+ } s;
+};
+
+struct otx_cpt_iq_cmd {
+ union otx_cpt_iq_cmd_word0 cmd;
+ u64 dptr;
+ u64 rptr;
+ union otx_cpt_iq_cmd_word3 cptr;
+};
+
+struct otx_cpt_sglist_component {
+ union {
+ u64 len;
+ struct {
+ u16 len0;
+ u16 len1;
+ u16 len2;
+ u16 len3;
+ } s;
+ } u;
+ u64 ptr0;
+ u64 ptr1;
+ u64 ptr2;
+ u64 ptr3;
+};
+
+struct otx_cpt_pending_entry {
+ u64 *completion_addr; /* Completion address */
+ struct otx_cpt_info_buffer *info;
+ /* Kernel async request callback */
+ void (*callback)(int status, void *arg1, void *arg2);
+ struct crypto_async_request *areq; /* Async request callback arg */
+ u8 resume_sender; /* Notify sender to resume sending requests */
+ u8 busy; /* Entry status (free/busy) */
+};
+
+struct otx_cpt_pending_queue {
+ struct otx_cpt_pending_entry *head; /* Head of the queue */
+ u32 front; /* Process work from here */
+ u32 rear; /* Append new work here */
+ u32 pending_count; /* Pending requests count */
+ u32 qlen; /* Queue length */
+ spinlock_t lock; /* Queue lock */
+};
+
+struct otx_cpt_req_info {
+ /* Kernel async request callback */
+ void (*callback)(int status, void *arg1, void *arg2);
+ struct crypto_async_request *areq; /* Async request callback arg */
+ struct otx_cptvf_request req;/* Request information (core specific) */
+ union otx_cpt_ctrl_info ctrl;/* User control information */
+ struct otx_cpt_buf_ptr in[OTX_CPT_MAX_SG_IN_CNT];
+ struct otx_cpt_buf_ptr out[OTX_CPT_MAX_SG_OUT_CNT];
+ u8 *iv_out; /* IV to send back */
+ u16 rlen; /* Output length */
+ u8 incnt; /* Number of input buffers */
+ u8 outcnt; /* Number of output buffers */
+ u8 req_type; /* Type of request */
+ u8 is_enc; /* Is a request an encryption request */
+ u8 is_trunc_hmac;/* Is truncated hmac used */
+};
+
+struct otx_cpt_info_buffer {
+ struct otx_cpt_pending_entry *pentry;
+ struct otx_cpt_req_info *req;
+ struct pci_dev *pdev;
+ u64 *completion_addr;
+ u8 *out_buffer;
+ u8 *in_buffer;
+ dma_addr_t dptr_baddr;
+ dma_addr_t rptr_baddr;
+ dma_addr_t comp_baddr;
+ unsigned long time_in;
+ u32 dlen;
+ u32 dma_len;
+ u8 extra_time;
+};
+
+static inline void do_request_cleanup(struct pci_dev *pdev,
+ struct otx_cpt_info_buffer *info)
+{
+ struct otx_cpt_req_info *req;
+ int i;
+
+ if (info->dptr_baddr)
+ dma_unmap_single(&pdev->dev, info->dptr_baddr,
+ info->dma_len, DMA_BIDIRECTIONAL);
+
+ if (info->req) {
+ req = info->req;
+ for (i = 0; i < req->outcnt; i++) {
+ if (req->out[i].dma_addr)
+ dma_unmap_single(&pdev->dev,
+ req->out[i].dma_addr,
+ req->out[i].size,
+ DMA_BIDIRECTIONAL);
+ }
+
+ for (i = 0; i < req->incnt; i++) {
+ if (req->in[i].dma_addr)
+ dma_unmap_single(&pdev->dev,
+ req->in[i].dma_addr,
+ req->in[i].size,
+ DMA_BIDIRECTIONAL);
+ }
+ }
+ kzfree(info);
+}
+
+struct otx_cptvf_wqe;
+void otx_cpt_dump_sg_list(struct pci_dev *pdev, struct otx_cpt_req_info *req);
+void otx_cpt_post_process(struct otx_cptvf_wqe *wqe);
+int otx_cpt_do_request(struct pci_dev *pdev, struct otx_cpt_req_info *req,
+ int cpu_num);
+
+#endif /* __OTX_CPTVF_REQUEST_MANAGER_H */
diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c
index 9e9f48bb7f85..bd6309e57ab8 100644
--- a/drivers/crypto/mediatek/mtk-sha.c
+++ b/drivers/crypto/mediatek/mtk-sha.c
@@ -107,7 +107,7 @@ struct mtk_sha_ctx {
u8 id;
u8 buf[SHA_BUF_SIZE] __aligned(sizeof(u32));
- struct mtk_sha_hmac_ctx base[0];
+ struct mtk_sha_hmac_ctx base[];
};
struct mtk_sha_drv {
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index 435ac1c83df9..d84530293036 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -20,6 +20,7 @@
#include <crypto/sha.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
#define DCP_MAX_CHANS 4
#define DCP_BUF_SZ PAGE_SIZE
@@ -611,49 +612,46 @@ static int dcp_sha_req_to_buf(struct crypto_async_request *arq)
struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm);
struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req);
struct hash_alg_common *halg = crypto_hash_alg_common(tfm);
- const int nents = sg_nents(req->src);
uint8_t *in_buf = sdcp->coh->sha_in_buf;
uint8_t *out_buf = sdcp->coh->sha_out_buf;
- uint8_t *src_buf;
-
struct scatterlist *src;
- unsigned int i, len, clen;
+ unsigned int i, len, clen, oft = 0;
int ret;
int fin = rctx->fini;
if (fin)
rctx->fini = 0;
- for_each_sg(req->src, src, nents, i) {
- src_buf = sg_virt(src);
- len = sg_dma_len(src);
-
- do {
- if (actx->fill + len > DCP_BUF_SZ)
- clen = DCP_BUF_SZ - actx->fill;
- else
- clen = len;
-
- memcpy(in_buf + actx->fill, src_buf, clen);
- len -= clen;
- src_buf += clen;
- actx->fill += clen;
+ src = req->src;
+ len = req->nbytes;
- /*
- * If we filled the buffer and still have some
- * more data, submit the buffer.
- */
- if (len && actx->fill == DCP_BUF_SZ) {
- ret = mxs_dcp_run_sha(req);
- if (ret)
- return ret;
- actx->fill = 0;
- rctx->init = 0;
- }
- } while (len);
+ while (len) {
+ if (actx->fill + len > DCP_BUF_SZ)
+ clen = DCP_BUF_SZ - actx->fill;
+ else
+ clen = len;
+
+ scatterwalk_map_and_copy(in_buf + actx->fill, src, oft, clen,
+ 0);
+
+ len -= clen;
+ oft += clen;
+ actx->fill += clen;
+
+ /*
+ * If we filled the buffer and still have some
+ * more data, submit the buffer.
+ */
+ if (len && actx->fill == DCP_BUF_SZ) {
+ ret = mxs_dcp_run_sha(req);
+ if (ret)
+ return ret;
+ actx->fill = 0;
+ rctx->init = 0;
+ }
}
if (fin) {
diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h
index 91c54289124a..c6233173c612 100644
--- a/drivers/crypto/nx/nx.h
+++ b/drivers/crypto/nx/nx.h
@@ -37,7 +37,7 @@ struct max_sync_cop {
u32 fc;
u32 mode;
u32 triplets;
- struct msc_triplet trip[0];
+ struct msc_triplet trip[];
} __packed;
struct alg_props {
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 4f915a4ef5b0..e4072cd38585 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -159,7 +159,7 @@ struct omap_sham_reqctx {
int sg_len;
unsigned int total; /* total request */
- u8 buffer[0] OMAP_ALIGNED;
+ u8 buffer[] OMAP_ALIGNED;
};
struct omap_sham_hmac_ctx {
@@ -176,7 +176,7 @@ struct omap_sham_ctx {
/* fallback stuff */
struct crypto_shash *fallback;
- struct omap_sham_hmac_ctx base[0];
+ struct omap_sham_hmac_ctx base[];
};
#define OMAP_SHAM_QUEUE_LENGTH 10
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index 833bb1d3a11b..e14d3dd291f0 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -97,7 +97,7 @@ struct qat_alg_cd {
struct icp_qat_hw_cipher_algo_blk cipher;
struct icp_qat_hw_auth_algo_blk hash;
} qat_enc_cd;
- struct qat_dec { /* Decrytp content desc */
+ struct qat_dec { /* Decrypt content desc */
struct icp_qat_hw_auth_algo_blk hash;
struct icp_qat_hw_cipher_algo_blk cipher;
} qat_dec_cd;
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c
index 3852d31ce0a4..fb504cee0305 100644
--- a/drivers/crypto/qat/qat_common/qat_crypto.c
+++ b/drivers/crypto/qat/qat_common/qat_crypto.c
@@ -250,8 +250,7 @@ static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev)
char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES];
INIT_LIST_HEAD(&accel_dev->crypto_list);
- strlcpy(key, ADF_NUM_CY, sizeof(key));
- if (adf_cfg_get_param_value(accel_dev, SEC, key, val))
+ if (adf_cfg_get_param_value(accel_dev, SEC, ADF_NUM_CY, val))
return -EFAULT;
if (kstrtoul(val, 0, &num_inst))
diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c
index 629e7f34dc09..5006e74c40cd 100644
--- a/drivers/crypto/qce/common.c
+++ b/drivers/crypto/qce/common.c
@@ -15,8 +15,6 @@
#include "regs-v5.h"
#include "sha.h"
-#define QCE_SECTOR_SIZE 512
-
static inline u32 qce_read(struct qce_device *qce, u32 offset)
{
return readl(qce->base + offset);
diff --git a/drivers/crypto/qce/common.h b/drivers/crypto/qce/common.h
index 282d4317470d..9f989cba0f1b 100644
--- a/drivers/crypto/qce/common.h
+++ b/drivers/crypto/qce/common.h
@@ -12,6 +12,9 @@
#include <crypto/hash.h>
#include <crypto/internal/skcipher.h>
+/* xts du size */
+#define QCE_SECTOR_SIZE 512
+
/* key size in bytes */
#define QCE_SHA_HMAC_KEY_SIZE 64
#define QCE_MAX_CIPHER_KEY_SIZE AES_KEYSIZE_256
diff --git a/drivers/crypto/qce/dma.c b/drivers/crypto/qce/dma.c
index 7da893dc00e7..46db5bf366b4 100644
--- a/drivers/crypto/qce/dma.c
+++ b/drivers/crypto/qce/dma.c
@@ -48,9 +48,10 @@ void qce_dma_release(struct qce_dma_data *dma)
struct scatterlist *
qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl,
- int max_ents)
+ unsigned int max_len)
{
struct scatterlist *sg = sgt->sgl, *sg_last = NULL;
+ unsigned int new_len;
while (sg) {
if (!sg_page(sg))
@@ -61,13 +62,13 @@ qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl,
if (!sg)
return ERR_PTR(-EINVAL);
- while (new_sgl && sg && max_ents) {
- sg_set_page(sg, sg_page(new_sgl), new_sgl->length,
- new_sgl->offset);
+ while (new_sgl && sg && max_len) {
+ new_len = new_sgl->length > max_len ? max_len : new_sgl->length;
+ sg_set_page(sg, sg_page(new_sgl), new_len, new_sgl->offset);
sg_last = sg;
sg = sg_next(sg);
new_sgl = sg_next(new_sgl);
- max_ents--;
+ max_len -= new_len;
}
return sg_last;
diff --git a/drivers/crypto/qce/dma.h b/drivers/crypto/qce/dma.h
index ed25a0d9829e..786402169360 100644
--- a/drivers/crypto/qce/dma.h
+++ b/drivers/crypto/qce/dma.h
@@ -43,6 +43,6 @@ void qce_dma_issue_pending(struct qce_dma_data *dma);
int qce_dma_terminate_all(struct qce_dma_data *dma);
struct scatterlist *
qce_sgtable_add(struct sg_table *sgt, struct scatterlist *sg_add,
- int max_ents);
+ unsigned int max_len);
#endif /* _DMA_H_ */
diff --git a/drivers/crypto/qce/skcipher.c b/drivers/crypto/qce/skcipher.c
index 4217b745f124..9412433f3b21 100644
--- a/drivers/crypto/qce/skcipher.c
+++ b/drivers/crypto/qce/skcipher.c
@@ -5,6 +5,7 @@
#include <linux/device.h>
#include <linux/interrupt.h>
+#include <linux/moduleparam.h>
#include <linux/types.h>
#include <crypto/aes.h>
#include <crypto/internal/des.h>
@@ -12,6 +13,13 @@
#include "cipher.h"
+static unsigned int aes_sw_max_len = CONFIG_CRYPTO_DEV_QCE_SW_MAX_LEN;
+module_param(aes_sw_max_len, uint, 0644);
+MODULE_PARM_DESC(aes_sw_max_len,
+ "Only use hardware for AES requests larger than this "
+ "[0=always use hardware; anything <16 breaks AES-GCM; default="
+ __stringify(CONFIG_CRYPTO_DEV_QCE_SW_MAX_LEN)"]");
+
static LIST_HEAD(skcipher_algs);
static void qce_skcipher_done(void *data)
@@ -97,13 +105,14 @@ qce_skcipher_async_req_handle(struct crypto_async_request *async_req)
sg_init_one(&rctx->result_sg, qce->dma.result_buf, QCE_RESULT_BUF_SZ);
- sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, rctx->dst_nents - 1);
+ sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, req->cryptlen);
if (IS_ERR(sg)) {
ret = PTR_ERR(sg);
goto error_free;
}
- sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg, 1);
+ sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg,
+ QCE_RESULT_BUF_SZ);
if (IS_ERR(sg)) {
ret = PTR_ERR(sg);
goto error_free;
@@ -165,15 +174,10 @@ static int qce_skcipher_setkey(struct crypto_skcipher *ablk, const u8 *key,
switch (IS_XTS(flags) ? keylen >> 1 : keylen) {
case AES_KEYSIZE_128:
case AES_KEYSIZE_256:
+ memcpy(ctx->enc_key, key, keylen);
break;
- default:
- goto fallback;
}
- ctx->enc_keylen = keylen;
- memcpy(ctx->enc_key, key, keylen);
- return 0;
-fallback:
ret = crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
if (!ret)
ctx->enc_keylen = keylen;
@@ -223,8 +227,14 @@ static int qce_skcipher_crypt(struct skcipher_request *req, int encrypt)
rctx->flags |= encrypt ? QCE_ENCRYPT : QCE_DECRYPT;
keylen = IS_XTS(rctx->flags) ? ctx->enc_keylen >> 1 : ctx->enc_keylen;
- if (IS_AES(rctx->flags) && keylen != AES_KEYSIZE_128 &&
- keylen != AES_KEYSIZE_256) {
+ /* qce is hanging when AES-XTS request len > QCE_SECTOR_SIZE and
+ * is not a multiple of it; pass such requests to the fallback
+ */
+ if (IS_AES(rctx->flags) &&
+ (((keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256) ||
+ req->cryptlen <= aes_sw_max_len) ||
+ (IS_XTS(rctx->flags) && req->cryptlen > QCE_SECTOR_SIZE &&
+ req->cryptlen % QCE_SECTOR_SIZE))) {
SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
skcipher_request_set_sync_tfm(subreq, ctx->fallback);
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index d66e20a2f54c..2a16800d2579 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -369,7 +369,7 @@ struct s5p_hash_reqctx {
bool error;
u32 bufcnt;
- u8 buffer[0];
+ u8 buffer[];
};
/**
diff --git a/drivers/crypto/xilinx/Makefile b/drivers/crypto/xilinx/Makefile
new file mode 100644
index 000000000000..534e32daf76a
--- /dev/null
+++ b/drivers/crypto/xilinx/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_AES) += zynqmp-aes-gcm.o
diff --git a/drivers/crypto/xilinx/zynqmp-aes-gcm.c b/drivers/crypto/xilinx/zynqmp-aes-gcm.c
new file mode 100644
index 000000000000..09f7f468eef8
--- /dev/null
+++ b/drivers/crypto/xilinx/zynqmp-aes-gcm.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx ZynqMP AES Driver.
+ * Copyright (c) 2020 Xilinx Inc.
+ */
+
+#include <crypto/aes.h>
+#include <crypto/engine.h>
+#include <crypto/gcm.h>
+#include <crypto/internal/aead.h>
+#include <crypto/scatterwalk.h>
+
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include <linux/firmware/xlnx-zynqmp.h>
+
+#define ZYNQMP_DMA_BIT_MASK 32U
+
+#define ZYNQMP_AES_KEY_SIZE AES_KEYSIZE_256
+#define ZYNQMP_AES_AUTH_SIZE 16U
+#define ZYNQMP_KEY_SRC_SEL_KEY_LEN 1U
+#define ZYNQMP_AES_BLK_SIZE 1U
+#define ZYNQMP_AES_MIN_INPUT_BLK_SIZE 4U
+#define ZYNQMP_AES_WORD_LEN 4U
+
+#define ZYNQMP_AES_GCM_TAG_MISMATCH_ERR 0x01
+#define ZYNQMP_AES_WRONG_KEY_SRC_ERR 0x13
+#define ZYNQMP_AES_PUF_NOT_PROGRAMMED 0xE300
+
+enum zynqmp_aead_op {
+ ZYNQMP_AES_DECRYPT = 0,
+ ZYNQMP_AES_ENCRYPT
+};
+
+enum zynqmp_aead_keysrc {
+ ZYNQMP_AES_KUP_KEY = 0,
+ ZYNQMP_AES_DEV_KEY,
+ ZYNQMP_AES_PUF_KEY
+};
+
+struct zynqmp_aead_drv_ctx {
+ union {
+ struct aead_alg aead;
+ } alg;
+ struct device *dev;
+ struct crypto_engine *engine;
+ const struct zynqmp_eemi_ops *eemi_ops;
+};
+
+struct zynqmp_aead_hw_req {
+ u64 src;
+ u64 iv;
+ u64 key;
+ u64 dst;
+ u64 size;
+ u64 op;
+ u64 keysrc;
+};
+
+struct zynqmp_aead_tfm_ctx {
+ struct crypto_engine_ctx engine_ctx;
+ struct device *dev;
+ u8 key[ZYNQMP_AES_KEY_SIZE];
+ u8 *iv;
+ u32 keylen;
+ u32 authsize;
+ enum zynqmp_aead_keysrc keysrc;
+ struct crypto_aead *fbk_cipher;
+};
+
+struct zynqmp_aead_req_ctx {
+ enum zynqmp_aead_op op;
+};
+
+static int zynqmp_aes_aead_cipher(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct zynqmp_aead_tfm_ctx *tfm_ctx = crypto_aead_ctx(aead);
+ struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req);
+ struct device *dev = tfm_ctx->dev;
+ struct aead_alg *alg = crypto_aead_alg(aead);
+ struct zynqmp_aead_drv_ctx *drv_ctx;
+ struct zynqmp_aead_hw_req *hwreq;
+ dma_addr_t dma_addr_data, dma_addr_hw_req;
+ unsigned int data_size;
+ unsigned int status;
+ size_t dma_size;
+ char *kbuf;
+ int err;
+
+ drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead);
+
+ if (!drv_ctx->eemi_ops->aes)
+ return -ENOTSUPP;
+
+ if (tfm_ctx->keysrc == ZYNQMP_AES_KUP_KEY)
+ dma_size = req->cryptlen + ZYNQMP_AES_KEY_SIZE
+ + GCM_AES_IV_SIZE;
+ else
+ dma_size = req->cryptlen + GCM_AES_IV_SIZE;
+
+ kbuf = dma_alloc_coherent(dev, dma_size, &dma_addr_data, GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
+
+ hwreq = dma_alloc_coherent(dev, sizeof(struct zynqmp_aead_hw_req),
+ &dma_addr_hw_req, GFP_KERNEL);
+ if (!hwreq) {
+ dma_free_coherent(dev, dma_size, kbuf, dma_addr_data);
+ return -ENOMEM;
+ }
+
+ data_size = req->cryptlen;
+ scatterwalk_map_and_copy(kbuf, req->src, 0, req->cryptlen, 0);
+ memcpy(kbuf + data_size, req->iv, GCM_AES_IV_SIZE);
+
+ hwreq->src = dma_addr_data;
+ hwreq->dst = dma_addr_data;
+ hwreq->iv = hwreq->src + data_size;
+ hwreq->keysrc = tfm_ctx->keysrc;
+ hwreq->op = rq_ctx->op;
+
+ if (hwreq->op == ZYNQMP_AES_ENCRYPT)
+ hwreq->size = data_size;
+ else
+ hwreq->size = data_size - ZYNQMP_AES_AUTH_SIZE;
+
+ if (hwreq->keysrc == ZYNQMP_AES_KUP_KEY) {
+ memcpy(kbuf + data_size + GCM_AES_IV_SIZE,
+ tfm_ctx->key, ZYNQMP_AES_KEY_SIZE);
+
+ hwreq->key = hwreq->src + data_size + GCM_AES_IV_SIZE;
+ } else {
+ hwreq->key = 0;
+ }
+
+ drv_ctx->eemi_ops->aes(dma_addr_hw_req, &status);
+
+ if (status) {
+ switch (status) {
+ case ZYNQMP_AES_GCM_TAG_MISMATCH_ERR:
+ dev_err(dev, "ERROR: Gcm Tag mismatch\n");
+ break;
+ case ZYNQMP_AES_WRONG_KEY_SRC_ERR:
+ dev_err(dev, "ERROR: Wrong KeySrc, enable secure mode\n");
+ break;
+ case ZYNQMP_AES_PUF_NOT_PROGRAMMED:
+ dev_err(dev, "ERROR: PUF is not registered\n");
+ break;
+ default:
+ dev_err(dev, "ERROR: Unknown error\n");
+ break;
+ }
+ err = -status;
+ } else {
+ if (hwreq->op == ZYNQMP_AES_ENCRYPT)
+ data_size = data_size + ZYNQMP_AES_AUTH_SIZE;
+ else
+ data_size = data_size - ZYNQMP_AES_AUTH_SIZE;
+
+ sg_copy_from_buffer(req->dst, sg_nents(req->dst),
+ kbuf, data_size);
+ err = 0;
+ }
+
+ if (kbuf) {
+ memzero_explicit(kbuf, dma_size);
+ dma_free_coherent(dev, dma_size, kbuf, dma_addr_data);
+ }
+ if (hwreq) {
+ memzero_explicit(hwreq, sizeof(struct zynqmp_aead_hw_req));
+ dma_free_coherent(dev, sizeof(struct zynqmp_aead_hw_req),
+ hwreq, dma_addr_hw_req);
+ }
+ return err;
+}
+
+static int zynqmp_fallback_check(struct zynqmp_aead_tfm_ctx *tfm_ctx,
+ struct aead_request *req)
+{
+ int need_fallback = 0;
+ struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req);
+
+ if (tfm_ctx->authsize != ZYNQMP_AES_AUTH_SIZE)
+ need_fallback = 1;
+
+ if (tfm_ctx->keysrc == ZYNQMP_AES_KUP_KEY &&
+ tfm_ctx->keylen != ZYNQMP_AES_KEY_SIZE) {
+ need_fallback = 1;
+ }
+ if (req->assoclen != 0 ||
+ req->cryptlen < ZYNQMP_AES_MIN_INPUT_BLK_SIZE) {
+ need_fallback = 1;
+ }
+ if ((req->cryptlen % ZYNQMP_AES_WORD_LEN) != 0)
+ need_fallback = 1;
+
+ if (rq_ctx->op == ZYNQMP_AES_DECRYPT &&
+ req->cryptlen <= ZYNQMP_AES_AUTH_SIZE) {
+ need_fallback = 1;
+ }
+ return need_fallback;
+}
+
+static int zynqmp_handle_aes_req(struct crypto_engine *engine,
+ void *req)
+{
+ struct aead_request *areq =
+ container_of(req, struct aead_request, base);
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct zynqmp_aead_tfm_ctx *tfm_ctx = crypto_aead_ctx(aead);
+ struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(areq);
+ struct aead_request *subreq = aead_request_ctx(req);
+ int need_fallback;
+ int err;
+
+ need_fallback = zynqmp_fallback_check(tfm_ctx, areq);
+
+ if (need_fallback) {
+ aead_request_set_tfm(subreq, tfm_ctx->fbk_cipher);
+
+ aead_request_set_callback(subreq, areq->base.flags,
+ NULL, NULL);
+ aead_request_set_crypt(subreq, areq->src, areq->dst,
+ areq->cryptlen, areq->iv);
+ aead_request_set_ad(subreq, areq->assoclen);
+ if (rq_ctx->op == ZYNQMP_AES_ENCRYPT)
+ err = crypto_aead_encrypt(subreq);
+ else
+ err = crypto_aead_decrypt(subreq);
+ } else {
+ err = zynqmp_aes_aead_cipher(areq);
+ }
+
+ crypto_finalize_aead_request(engine, areq, err);
+ return 0;
+}
+
+static int zynqmp_aes_aead_setkey(struct crypto_aead *aead, const u8 *key,
+ unsigned int keylen)
+{
+ struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+ struct zynqmp_aead_tfm_ctx *tfm_ctx =
+ (struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm);
+ unsigned char keysrc;
+
+ if (keylen == ZYNQMP_KEY_SRC_SEL_KEY_LEN) {
+ keysrc = *key;
+ if (keysrc == ZYNQMP_AES_KUP_KEY ||
+ keysrc == ZYNQMP_AES_DEV_KEY ||
+ keysrc == ZYNQMP_AES_PUF_KEY) {
+ tfm_ctx->keysrc = (enum zynqmp_aead_keysrc)keysrc;
+ } else {
+ tfm_ctx->keylen = keylen;
+ }
+ } else {
+ tfm_ctx->keylen = keylen;
+ if (keylen == ZYNQMP_AES_KEY_SIZE) {
+ tfm_ctx->keysrc = ZYNQMP_AES_KUP_KEY;
+ memcpy(tfm_ctx->key, key, keylen);
+ }
+ }
+
+ tfm_ctx->fbk_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+ tfm_ctx->fbk_cipher->base.crt_flags |= (aead->base.crt_flags &
+ CRYPTO_TFM_REQ_MASK);
+
+ return crypto_aead_setkey(tfm_ctx->fbk_cipher, key, keylen);
+}
+
+static int zynqmp_aes_aead_setauthsize(struct crypto_aead *aead,
+ unsigned int authsize)
+{
+ struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+ struct zynqmp_aead_tfm_ctx *tfm_ctx =
+ (struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm);
+
+ tfm_ctx->authsize = authsize;
+ return crypto_aead_setauthsize(tfm_ctx->fbk_cipher, authsize);
+}
+
+static int zynqmp_aes_aead_encrypt(struct aead_request *req)
+{
+ struct zynqmp_aead_drv_ctx *drv_ctx;
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct aead_alg *alg = crypto_aead_alg(aead);
+ struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req);
+
+ rq_ctx->op = ZYNQMP_AES_ENCRYPT;
+ drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead);
+
+ return crypto_transfer_aead_request_to_engine(drv_ctx->engine, req);
+}
+
+static int zynqmp_aes_aead_decrypt(struct aead_request *req)
+{
+ struct zynqmp_aead_drv_ctx *drv_ctx;
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct aead_alg *alg = crypto_aead_alg(aead);
+ struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req);
+
+ rq_ctx->op = ZYNQMP_AES_DECRYPT;
+ drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead);
+
+ return crypto_transfer_aead_request_to_engine(drv_ctx->engine, req);
+}
+
+static int zynqmp_aes_aead_init(struct crypto_aead *aead)
+{
+ struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+ struct zynqmp_aead_tfm_ctx *tfm_ctx =
+ (struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm);
+ struct zynqmp_aead_drv_ctx *drv_ctx;
+ struct aead_alg *alg = crypto_aead_alg(aead);
+
+ drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead);
+ tfm_ctx->dev = drv_ctx->dev;
+
+ tfm_ctx->engine_ctx.op.do_one_request = zynqmp_handle_aes_req;
+ tfm_ctx->engine_ctx.op.prepare_request = NULL;
+ tfm_ctx->engine_ctx.op.unprepare_request = NULL;
+
+ tfm_ctx->fbk_cipher = crypto_alloc_aead(drv_ctx->alg.aead.base.cra_name,
+ 0,
+ CRYPTO_ALG_NEED_FALLBACK);
+
+ if (IS_ERR(tfm_ctx->fbk_cipher)) {
+ pr_err("%s() Error: failed to allocate fallback for %s\n",
+ __func__, drv_ctx->alg.aead.base.cra_name);
+ return PTR_ERR(tfm_ctx->fbk_cipher);
+ }
+
+ crypto_aead_set_reqsize(aead,
+ max(sizeof(struct zynqmp_aead_req_ctx),
+ sizeof(struct aead_request) +
+ crypto_aead_reqsize(tfm_ctx->fbk_cipher)));
+ return 0;
+}
+
+static void zynqmp_aes_aead_exit(struct crypto_aead *aead)
+{
+ struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+ struct zynqmp_aead_tfm_ctx *tfm_ctx =
+ (struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm);
+
+ if (tfm_ctx->fbk_cipher) {
+ crypto_free_aead(tfm_ctx->fbk_cipher);
+ tfm_ctx->fbk_cipher = NULL;
+ }
+ memzero_explicit(tfm_ctx, sizeof(struct zynqmp_aead_tfm_ctx));
+}
+
+static struct zynqmp_aead_drv_ctx aes_drv_ctx = {
+ .alg.aead = {
+ .setkey = zynqmp_aes_aead_setkey,
+ .setauthsize = zynqmp_aes_aead_setauthsize,
+ .encrypt = zynqmp_aes_aead_encrypt,
+ .decrypt = zynqmp_aes_aead_decrypt,
+ .init = zynqmp_aes_aead_init,
+ .exit = zynqmp_aes_aead_exit,
+ .ivsize = GCM_AES_IV_SIZE,
+ .maxauthsize = ZYNQMP_AES_AUTH_SIZE,
+ .base = {
+ .cra_name = "gcm(aes)",
+ .cra_driver_name = "xilinx-zynqmp-aes-gcm",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD |
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_KERN_DRIVER_ONLY |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = ZYNQMP_AES_BLK_SIZE,
+ .cra_ctxsize = sizeof(struct zynqmp_aead_tfm_ctx),
+ .cra_module = THIS_MODULE,
+ }
+ }
+};
+
+static int zynqmp_aes_aead_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ int err;
+
+ /* ZynqMP AES driver supports only one instance */
+ if (!aes_drv_ctx.dev)
+ aes_drv_ctx.dev = dev;
+ else
+ return -ENODEV;
+
+ aes_drv_ctx.eemi_ops = zynqmp_pm_get_eemi_ops();
+ if (IS_ERR(aes_drv_ctx.eemi_ops)) {
+ dev_err(dev, "Failed to get ZynqMP EEMI interface\n");
+ return PTR_ERR(aes_drv_ctx.eemi_ops);
+ }
+
+ err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(ZYNQMP_DMA_BIT_MASK));
+ if (err < 0) {
+ dev_err(dev, "No usable DMA configuration\n");
+ return err;
+ }
+
+ aes_drv_ctx.engine = crypto_engine_alloc_init(dev, 1);
+ if (!aes_drv_ctx.engine) {
+ dev_err(dev, "Cannot alloc AES engine\n");
+ err = -ENOMEM;
+ goto err_engine;
+ }
+
+ err = crypto_engine_start(aes_drv_ctx.engine);
+ if (err) {
+ dev_err(dev, "Cannot start AES engine\n");
+ goto err_engine;
+ }
+
+ err = crypto_register_aead(&aes_drv_ctx.alg.aead);
+ if (err < 0) {
+ dev_err(dev, "Failed to register AEAD alg.\n");
+ goto err_aead;
+ }
+ return 0;
+
+err_aead:
+ crypto_unregister_aead(&aes_drv_ctx.alg.aead);
+
+err_engine:
+ if (aes_drv_ctx.engine)
+ crypto_engine_exit(aes_drv_ctx.engine);
+
+ return err;
+}
+
+static int zynqmp_aes_aead_remove(struct platform_device *pdev)
+{
+ crypto_engine_exit(aes_drv_ctx.engine);
+ crypto_unregister_aead(&aes_drv_ctx.alg.aead);
+
+ return 0;
+}
+
+static const struct of_device_id zynqmp_aes_dt_ids[] = {
+ { .compatible = "xlnx,zynqmp-aes" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, zynqmp_aes_dt_ids);
+
+static struct platform_driver zynqmp_aes_driver = {
+ .probe = zynqmp_aes_aead_probe,
+ .remove = zynqmp_aes_aead_remove,
+ .driver = {
+ .name = "zynqmp-aes",
+ .of_match_table = zynqmp_aes_dt_ids,
+ },
+};
+
+module_platform_driver(zynqmp_aes_driver);
+MODULE_LICENSE("GPL");
diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index f8b03dcfd2f7..41b65164a367 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -709,6 +709,30 @@ static int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities,
qos, ack, NULL);
}
+/**
+ * zynqmp_pm_aes - Access AES hardware to encrypt/decrypt the data using
+ * AES-GCM core.
+ * @address: Address of the AesParams structure.
+ * @out: Returned output value
+ *
+ * Return: Returns status, either success or error code.
+ */
+static int zynqmp_pm_aes_engine(const u64 address, u32 *out)
+{
+ u32 ret_payload[PAYLOAD_ARG_CNT];
+ int ret;
+
+ if (!out)
+ return -EINVAL;
+
+ ret = zynqmp_pm_invoke_fn(PM_SECURE_AES, upper_32_bits(address),
+ lower_32_bits(address),
+ 0, 0, ret_payload);
+ *out = ret_payload[1];
+
+ return ret;
+}
+
static const struct zynqmp_eemi_ops eemi_ops = {
.get_api_version = zynqmp_pm_get_api_version,
.get_chipid = zynqmp_pm_get_chipid,
@@ -732,6 +756,7 @@ static const struct zynqmp_eemi_ops eemi_ops = {
.set_requirement = zynqmp_pm_set_requirement,
.fpga_load = zynqmp_pm_fpga_load,
.fpga_get_status = zynqmp_pm_fpga_get_status,
+ .aes = zynqmp_pm_aes_engine,
};
/**
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 7f0d48f406e3..99e151475d8f 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -480,4 +480,5 @@ source "drivers/misc/cxl/Kconfig"
source "drivers/misc/ocxl/Kconfig"
source "drivers/misc/cardreader/Kconfig"
source "drivers/misc/habanalabs/Kconfig"
+source "drivers/misc/uacce/Kconfig"
endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index c1860d35dc7e..9abf2923d831 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -56,4 +56,5 @@ obj-$(CONFIG_OCXL) += ocxl/
obj-y += cardreader/
obj-$(CONFIG_PVPANIC) += pvpanic.o
obj-$(CONFIG_HABANA_AI) += habanalabs/
+obj-$(CONFIG_UACCE) += uacce/
obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o
diff --git a/drivers/misc/uacce/Kconfig b/drivers/misc/uacce/Kconfig
new file mode 100644
index 000000000000..5e39b6083b0f
--- /dev/null
+++ b/drivers/misc/uacce/Kconfig
@@ -0,0 +1,13 @@
+config UACCE
+ tristate "Accelerator Framework for User Land"
+ depends on IOMMU_API
+ help
+ UACCE provides interface for the user process to access the hardware
+ without interaction with the kernel space in data path.
+
+ The user-space interface is described in
+ include/uapi/misc/uacce/uacce.h
+
+ See Documentation/misc-devices/uacce.rst for more details.
+
+ If you don't know what to do here, say N.
diff --git a/drivers/misc/uacce/Makefile b/drivers/misc/uacce/Makefile
new file mode 100644
index 000000000000..5b4374e8b5f2
--- /dev/null
+++ b/drivers/misc/uacce/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+obj-$(CONFIG_UACCE) += uacce.o
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
new file mode 100644
index 000000000000..d39307f060bd
--- /dev/null
+++ b/drivers/misc/uacce/uacce.c
@@ -0,0 +1,633 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/compat.h>
+#include <linux/dma-mapping.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/uacce.h>
+
+static struct class *uacce_class;
+static dev_t uacce_devt;
+static DEFINE_MUTEX(uacce_mutex);
+static DEFINE_XARRAY_ALLOC(uacce_xa);
+
+static int uacce_start_queue(struct uacce_queue *q)
+{
+ int ret = 0;
+
+ mutex_lock(&uacce_mutex);
+
+ if (q->state != UACCE_Q_INIT) {
+ ret = -EINVAL;
+ goto out_with_lock;
+ }
+
+ if (q->uacce->ops->start_queue) {
+ ret = q->uacce->ops->start_queue(q);
+ if (ret < 0)
+ goto out_with_lock;
+ }
+
+ q->state = UACCE_Q_STARTED;
+
+out_with_lock:
+ mutex_unlock(&uacce_mutex);
+
+ return ret;
+}
+
+static int uacce_put_queue(struct uacce_queue *q)
+{
+ struct uacce_device *uacce = q->uacce;
+
+ mutex_lock(&uacce_mutex);
+
+ if (q->state == UACCE_Q_ZOMBIE)
+ goto out;
+
+ if ((q->state == UACCE_Q_STARTED) && uacce->ops->stop_queue)
+ uacce->ops->stop_queue(q);
+
+ if ((q->state == UACCE_Q_INIT || q->state == UACCE_Q_STARTED) &&
+ uacce->ops->put_queue)
+ uacce->ops->put_queue(q);
+
+ q->state = UACCE_Q_ZOMBIE;
+out:
+ mutex_unlock(&uacce_mutex);
+
+ return 0;
+}
+
+static long uacce_fops_unl_ioctl(struct file *filep,
+ unsigned int cmd, unsigned long arg)
+{
+ struct uacce_queue *q = filep->private_data;
+ struct uacce_device *uacce = q->uacce;
+
+ switch (cmd) {
+ case UACCE_CMD_START_Q:
+ return uacce_start_queue(q);
+
+ case UACCE_CMD_PUT_Q:
+ return uacce_put_queue(q);
+
+ default:
+ if (!uacce->ops->ioctl)
+ return -EINVAL;
+
+ return uacce->ops->ioctl(q, cmd, arg);
+ }
+}
+
+#ifdef CONFIG_COMPAT
+static long uacce_fops_compat_ioctl(struct file *filep,
+ unsigned int cmd, unsigned long arg)
+{
+ arg = (unsigned long)compat_ptr(arg);
+
+ return uacce_fops_unl_ioctl(filep, cmd, arg);
+}
+#endif
+
+static int uacce_sva_exit(struct device *dev, struct iommu_sva *handle,
+ void *data)
+{
+ struct uacce_mm *uacce_mm = data;
+ struct uacce_queue *q;
+
+ /*
+ * No new queue can be added concurrently because no caller can have a
+ * reference to this mm. But there may be concurrent calls to
+ * uacce_mm_put(), so we need the lock.
+ */
+ mutex_lock(&uacce_mm->lock);
+ list_for_each_entry(q, &uacce_mm->queues, list)
+ uacce_put_queue(q);
+ uacce_mm->mm = NULL;
+ mutex_unlock(&uacce_mm->lock);
+
+ return 0;
+}
+
+static struct iommu_sva_ops uacce_sva_ops = {
+ .mm_exit = uacce_sva_exit,
+};
+
+static struct uacce_mm *uacce_mm_get(struct uacce_device *uacce,
+ struct uacce_queue *q,
+ struct mm_struct *mm)
+{
+ struct uacce_mm *uacce_mm = NULL;
+ struct iommu_sva *handle = NULL;
+ int ret;
+
+ lockdep_assert_held(&uacce->mm_lock);
+
+ list_for_each_entry(uacce_mm, &uacce->mm_list, list) {
+ if (uacce_mm->mm == mm) {
+ mutex_lock(&uacce_mm->lock);
+ list_add(&q->list, &uacce_mm->queues);
+ mutex_unlock(&uacce_mm->lock);
+ return uacce_mm;
+ }
+ }
+
+ uacce_mm = kzalloc(sizeof(*uacce_mm), GFP_KERNEL);
+ if (!uacce_mm)
+ return NULL;
+
+ if (uacce->flags & UACCE_DEV_SVA) {
+ /*
+ * Safe to pass an incomplete uacce_mm, since mm_exit cannot
+ * fire while we hold a reference to the mm.
+ */
+ handle = iommu_sva_bind_device(uacce->parent, mm, uacce_mm);
+ if (IS_ERR(handle))
+ goto err_free;
+
+ ret = iommu_sva_set_ops(handle, &uacce_sva_ops);
+ if (ret)
+ goto err_unbind;
+
+ uacce_mm->pasid = iommu_sva_get_pasid(handle);
+ if (uacce_mm->pasid == IOMMU_PASID_INVALID)
+ goto err_unbind;
+ }
+
+ uacce_mm->mm = mm;
+ uacce_mm->handle = handle;
+ INIT_LIST_HEAD(&uacce_mm->queues);
+ mutex_init(&uacce_mm->lock);
+ list_add(&q->list, &uacce_mm->queues);
+ list_add(&uacce_mm->list, &uacce->mm_list);
+
+ return uacce_mm;
+
+err_unbind:
+ if (handle)
+ iommu_sva_unbind_device(handle);
+err_free:
+ kfree(uacce_mm);
+ return NULL;
+}
+
+static void uacce_mm_put(struct uacce_queue *q)
+{
+ struct uacce_mm *uacce_mm = q->uacce_mm;
+
+ lockdep_assert_held(&q->uacce->mm_lock);
+
+ mutex_lock(&uacce_mm->lock);
+ list_del(&q->list);
+ mutex_unlock(&uacce_mm->lock);
+
+ if (list_empty(&uacce_mm->queues)) {
+ if (uacce_mm->handle)
+ iommu_sva_unbind_device(uacce_mm->handle);
+ list_del(&uacce_mm->list);
+ kfree(uacce_mm);
+ }
+}
+
+static int uacce_fops_open(struct inode *inode, struct file *filep)
+{
+ struct uacce_mm *uacce_mm = NULL;
+ struct uacce_device *uacce;
+ struct uacce_queue *q;
+ int ret = 0;
+
+ uacce = xa_load(&uacce_xa, iminor(inode));
+ if (!uacce)
+ return -ENODEV;
+
+ q = kzalloc(sizeof(struct uacce_queue), GFP_KERNEL);
+ if (!q)
+ return -ENOMEM;
+
+ mutex_lock(&uacce->mm_lock);
+ uacce_mm = uacce_mm_get(uacce, q, current->mm);
+ mutex_unlock(&uacce->mm_lock);
+ if (!uacce_mm) {
+ ret = -ENOMEM;
+ goto out_with_mem;
+ }
+
+ q->uacce = uacce;
+ q->uacce_mm = uacce_mm;
+
+ if (uacce->ops->get_queue) {
+ ret = uacce->ops->get_queue(uacce, uacce_mm->pasid, q);
+ if (ret < 0)
+ goto out_with_mm;
+ }
+
+ init_waitqueue_head(&q->wait);
+ filep->private_data = q;
+ uacce->inode = inode;
+ q->state = UACCE_Q_INIT;
+
+ return 0;
+
+out_with_mm:
+ mutex_lock(&uacce->mm_lock);
+ uacce_mm_put(q);
+ mutex_unlock(&uacce->mm_lock);
+out_with_mem:
+ kfree(q);
+ return ret;
+}
+
+static int uacce_fops_release(struct inode *inode, struct file *filep)
+{
+ struct uacce_queue *q = filep->private_data;
+ struct uacce_device *uacce = q->uacce;
+
+ uacce_put_queue(q);
+
+ mutex_lock(&uacce->mm_lock);
+ uacce_mm_put(q);
+ mutex_unlock(&uacce->mm_lock);
+
+ kfree(q);
+
+ return 0;
+}
+
+static vm_fault_t uacce_vma_fault(struct vm_fault *vmf)
+{
+ if (vmf->flags & (FAULT_FLAG_MKWRITE | FAULT_FLAG_WRITE))
+ return VM_FAULT_SIGBUS;
+
+ return 0;
+}
+
+static void uacce_vma_close(struct vm_area_struct *vma)
+{
+ struct uacce_queue *q = vma->vm_private_data;
+ struct uacce_qfile_region *qfr = NULL;
+
+ if (vma->vm_pgoff < UACCE_MAX_REGION)
+ qfr = q->qfrs[vma->vm_pgoff];
+
+ kfree(qfr);
+}
+
+static const struct vm_operations_struct uacce_vm_ops = {
+ .fault = uacce_vma_fault,
+ .close = uacce_vma_close,
+};
+
+static int uacce_fops_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+ struct uacce_queue *q = filep->private_data;
+ struct uacce_device *uacce = q->uacce;
+ struct uacce_qfile_region *qfr;
+ enum uacce_qfrt type = UACCE_MAX_REGION;
+ int ret = 0;
+
+ if (vma->vm_pgoff < UACCE_MAX_REGION)
+ type = vma->vm_pgoff;
+ else
+ return -EINVAL;
+
+ qfr = kzalloc(sizeof(*qfr), GFP_KERNEL);
+ if (!qfr)
+ return -ENOMEM;
+
+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_WIPEONFORK;
+ vma->vm_ops = &uacce_vm_ops;
+ vma->vm_private_data = q;
+ qfr->type = type;
+
+ mutex_lock(&uacce_mutex);
+
+ if (q->state != UACCE_Q_INIT && q->state != UACCE_Q_STARTED) {
+ ret = -EINVAL;
+ goto out_with_lock;
+ }
+
+ if (q->qfrs[type]) {
+ ret = -EEXIST;
+ goto out_with_lock;
+ }
+
+ switch (type) {
+ case UACCE_QFRT_MMIO:
+ if (!uacce->ops->mmap) {
+ ret = -EINVAL;
+ goto out_with_lock;
+ }
+
+ ret = uacce->ops->mmap(q, vma, qfr);
+ if (ret)
+ goto out_with_lock;
+
+ break;
+
+ case UACCE_QFRT_DUS:
+ if (!uacce->ops->mmap) {
+ ret = -EINVAL;
+ goto out_with_lock;
+ }
+
+ ret = uacce->ops->mmap(q, vma, qfr);
+ if (ret)
+ goto out_with_lock;
+ break;
+
+ default:
+ ret = -EINVAL;
+ goto out_with_lock;
+ }
+
+ q->qfrs[type] = qfr;
+ mutex_unlock(&uacce_mutex);
+
+ return ret;
+
+out_with_lock:
+ mutex_unlock(&uacce_mutex);
+ kfree(qfr);
+ return ret;
+}
+
+static __poll_t uacce_fops_poll(struct file *file, poll_table *wait)
+{
+ struct uacce_queue *q = file->private_data;
+ struct uacce_device *uacce = q->uacce;
+
+ poll_wait(file, &q->wait, wait);
+ if (uacce->ops->is_q_updated && uacce->ops->is_q_updated(q))
+ return EPOLLIN | EPOLLRDNORM;
+
+ return 0;
+}
+
+static const struct file_operations uacce_fops = {
+ .owner = THIS_MODULE,
+ .open = uacce_fops_open,
+ .release = uacce_fops_release,
+ .unlocked_ioctl = uacce_fops_unl_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = uacce_fops_compat_ioctl,
+#endif
+ .mmap = uacce_fops_mmap,
+ .poll = uacce_fops_poll,
+};
+
+#define to_uacce_device(dev) container_of(dev, struct uacce_device, dev)
+
+static ssize_t api_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct uacce_device *uacce = to_uacce_device(dev);
+
+ return sprintf(buf, "%s\n", uacce->api_ver);
+}
+
+static ssize_t flags_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct uacce_device *uacce = to_uacce_device(dev);
+
+ return sprintf(buf, "%u\n", uacce->flags);
+}
+
+static ssize_t available_instances_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct uacce_device *uacce = to_uacce_device(dev);
+
+ if (!uacce->ops->get_available_instances)
+ return -ENODEV;
+
+ return sprintf(buf, "%d\n",
+ uacce->ops->get_available_instances(uacce));
+}
+
+static ssize_t algorithms_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct uacce_device *uacce = to_uacce_device(dev);
+
+ return sprintf(buf, "%s\n", uacce->algs);
+}
+
+static ssize_t region_mmio_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct uacce_device *uacce = to_uacce_device(dev);
+
+ return sprintf(buf, "%lu\n",
+ uacce->qf_pg_num[UACCE_QFRT_MMIO] << PAGE_SHIFT);
+}
+
+static ssize_t region_dus_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct uacce_device *uacce = to_uacce_device(dev);
+
+ return sprintf(buf, "%lu\n",
+ uacce->qf_pg_num[UACCE_QFRT_DUS] << PAGE_SHIFT);
+}
+
+static DEVICE_ATTR_RO(api);
+static DEVICE_ATTR_RO(flags);
+static DEVICE_ATTR_RO(available_instances);
+static DEVICE_ATTR_RO(algorithms);
+static DEVICE_ATTR_RO(region_mmio_size);
+static DEVICE_ATTR_RO(region_dus_size);
+
+static struct attribute *uacce_dev_attrs[] = {
+ &dev_attr_api.attr,
+ &dev_attr_flags.attr,
+ &dev_attr_available_instances.attr,
+ &dev_attr_algorithms.attr,
+ &dev_attr_region_mmio_size.attr,
+ &dev_attr_region_dus_size.attr,
+ NULL,
+};
+
+static umode_t uacce_dev_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct uacce_device *uacce = to_uacce_device(dev);
+
+ if (((attr == &dev_attr_region_mmio_size.attr) &&
+ (!uacce->qf_pg_num[UACCE_QFRT_MMIO])) ||
+ ((attr == &dev_attr_region_dus_size.attr) &&
+ (!uacce->qf_pg_num[UACCE_QFRT_DUS])))
+ return 0;
+
+ return attr->mode;
+}
+
+static struct attribute_group uacce_dev_group = {
+ .is_visible = uacce_dev_is_visible,
+ .attrs = uacce_dev_attrs,
+};
+
+__ATTRIBUTE_GROUPS(uacce_dev);
+
+static void uacce_release(struct device *dev)
+{
+ struct uacce_device *uacce = to_uacce_device(dev);
+
+ kfree(uacce);
+}
+
+/**
+ * uacce_alloc() - alloc an accelerator
+ * @parent: pointer of uacce parent device
+ * @interface: pointer of uacce_interface for register
+ *
+ * Returns uacce pointer if success and ERR_PTR if not
+ * Need check returned negotiated uacce->flags
+ */
+struct uacce_device *uacce_alloc(struct device *parent,
+ struct uacce_interface *interface)
+{
+ unsigned int flags = interface->flags;
+ struct uacce_device *uacce;
+ int ret;
+
+ uacce = kzalloc(sizeof(struct uacce_device), GFP_KERNEL);
+ if (!uacce)
+ return ERR_PTR(-ENOMEM);
+
+ if (flags & UACCE_DEV_SVA) {
+ ret = iommu_dev_enable_feature(parent, IOMMU_DEV_FEAT_SVA);
+ if (ret)
+ flags &= ~UACCE_DEV_SVA;
+ }
+
+ uacce->parent = parent;
+ uacce->flags = flags;
+ uacce->ops = interface->ops;
+
+ ret = xa_alloc(&uacce_xa, &uacce->dev_id, uacce, xa_limit_32b,
+ GFP_KERNEL);
+ if (ret < 0)
+ goto err_with_uacce;
+
+ INIT_LIST_HEAD(&uacce->mm_list);
+ mutex_init(&uacce->mm_lock);
+ device_initialize(&uacce->dev);
+ uacce->dev.devt = MKDEV(MAJOR(uacce_devt), uacce->dev_id);
+ uacce->dev.class = uacce_class;
+ uacce->dev.groups = uacce_dev_groups;
+ uacce->dev.parent = uacce->parent;
+ uacce->dev.release = uacce_release;
+ dev_set_name(&uacce->dev, "%s-%d", interface->name, uacce->dev_id);
+
+ return uacce;
+
+err_with_uacce:
+ if (flags & UACCE_DEV_SVA)
+ iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_SVA);
+ kfree(uacce);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(uacce_alloc);
+
+/**
+ * uacce_register() - add the accelerator to cdev and export to user space
+ * @uacce: The initialized uacce device
+ *
+ * Return 0 if register succeeded, or an error.
+ */
+int uacce_register(struct uacce_device *uacce)
+{
+ if (!uacce)
+ return -ENODEV;
+
+ uacce->cdev = cdev_alloc();
+ if (!uacce->cdev)
+ return -ENOMEM;
+
+ uacce->cdev->ops = &uacce_fops;
+ uacce->cdev->owner = THIS_MODULE;
+
+ return cdev_device_add(uacce->cdev, &uacce->dev);
+}
+EXPORT_SYMBOL_GPL(uacce_register);
+
+/**
+ * uacce_remove() - remove the accelerator
+ * @uacce: the accelerator to remove
+ */
+void uacce_remove(struct uacce_device *uacce)
+{
+ struct uacce_mm *uacce_mm;
+ struct uacce_queue *q;
+
+ if (!uacce)
+ return;
+ /*
+ * unmap remaining mapping from user space, preventing user still
+ * access the mmaped area while parent device is already removed
+ */
+ if (uacce->inode)
+ unmap_mapping_range(uacce->inode->i_mapping, 0, 0, 1);
+
+ /* ensure no open queue remains */
+ mutex_lock(&uacce->mm_lock);
+ list_for_each_entry(uacce_mm, &uacce->mm_list, list) {
+ /*
+ * We don't take the uacce_mm->lock here. Since we hold the
+ * device's mm_lock, no queue can be added to or removed from
+ * this uacce_mm. We may run concurrently with mm_exit, but
+ * uacce_put_queue() is serialized and iommu_sva_unbind_device()
+ * waits for the lock that mm_exit is holding.
+ */
+ list_for_each_entry(q, &uacce_mm->queues, list)
+ uacce_put_queue(q);
+
+ if (uacce->flags & UACCE_DEV_SVA) {
+ iommu_sva_unbind_device(uacce_mm->handle);
+ uacce_mm->handle = NULL;
+ }
+ }
+ mutex_unlock(&uacce->mm_lock);
+
+ /* disable sva now since no opened queues */
+ if (uacce->flags & UACCE_DEV_SVA)
+ iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_SVA);
+
+ if (uacce->cdev)
+ cdev_device_del(uacce->cdev, &uacce->dev);
+ xa_erase(&uacce_xa, uacce->dev_id);
+ put_device(&uacce->dev);
+}
+EXPORT_SYMBOL_GPL(uacce_remove);
+
+static int __init uacce_init(void)
+{
+ int ret;
+
+ uacce_class = class_create(THIS_MODULE, UACCE_NAME);
+ if (IS_ERR(uacce_class))
+ return PTR_ERR(uacce_class);
+
+ ret = alloc_chrdev_region(&uacce_devt, 0, MINORMASK, UACCE_NAME);
+ if (ret)
+ class_destroy(uacce_class);
+
+ return ret;
+}
+
+static __exit void uacce_exit(void)
+{
+ unregister_chrdev_region(uacce_devt, MINORMASK);
+ class_destroy(uacce_class);
+}
+
+subsys_initcall(uacce_init);
+module_exit(uacce_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Hisilicon Tech. Co., Ltd.");
+MODULE_DESCRIPTION("Accelerator interface for Userland applications");
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index f34757e8f25f..2d357680094c 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/atomic.h>
#include <linux/device.h>
+#include <linux/pm_runtime.h>
#include <linux/poll.h>
#include <linux/security.h>
@@ -1070,7 +1071,14 @@ static int debugfs_show_regset32(struct seq_file *s, void *data)
{
struct debugfs_regset32 *regset = s->private;
+ if (regset->dev)
+ pm_runtime_get_sync(regset->dev);
+
debugfs_print_regs32(s, regset->regs, regset->nregs, regset->base, "");
+
+ if (regset->dev)
+ pm_runtime_put(regset->dev);
+
return 0;
}
diff --git a/include/crypto/aead.h b/include/crypto/aead.h
index 1b3ebe8593c0..62c68550aab6 100644
--- a/include/crypto/aead.h
+++ b/include/crypto/aead.h
@@ -43,27 +43,33 @@
*
* Memory Structure:
*
- * To support the needs of the most prominent user of AEAD ciphers, namely
- * IPSEC, the AEAD ciphers have a special memory layout the caller must adhere
- * to.
- *
- * The scatter list pointing to the input data must contain:
- *
- * * for RFC4106 ciphers, the concatenation of
- * associated authentication data || IV || plaintext or ciphertext. Note, the
- * same IV (buffer) is also set with the aead_request_set_crypt call. Note,
- * the API call of aead_request_set_ad must provide the length of the AAD and
- * the IV. The API call of aead_request_set_crypt only points to the size of
- * the input plaintext or ciphertext.
- *
- * * for "normal" AEAD ciphers, the concatenation of
- * associated authentication data || plaintext or ciphertext.
- *
- * It is important to note that if multiple scatter gather list entries form
- * the input data mentioned above, the first entry must not point to a NULL
- * buffer. If there is any potential where the AAD buffer can be NULL, the
- * calling code must contain a precaution to ensure that this does not result
- * in the first scatter gather list entry pointing to a NULL buffer.
+ * The source scatterlist must contain the concatenation of
+ * associated data || plaintext or ciphertext.
+ *
+ * The destination scatterlist has the same layout, except that the plaintext
+ * (resp. ciphertext) will grow (resp. shrink) by the authentication tag size
+ * during encryption (resp. decryption).
+ *
+ * In-place encryption/decryption is enabled by using the same scatterlist
+ * pointer for both the source and destination.
+ *
+ * Even in the out-of-place case, space must be reserved in the destination for
+ * the associated data, even though it won't be written to. This makes the
+ * in-place and out-of-place cases more consistent. It is permissible for the
+ * "destination" associated data to alias the "source" associated data.
+ *
+ * As with the other scatterlist crypto APIs, zero-length scatterlist elements
+ * are not allowed in the used part of the scatterlist. Thus, if there is no
+ * associated data, the first element must point to the plaintext/ciphertext.
+ *
+ * To meet the needs of IPsec, a special quirk applies to rfc4106, rfc4309,
+ * rfc4543, and rfc7539esp ciphers. For these ciphers, the final 'ivsize' bytes
+ * of the associated data buffer must contain a second copy of the IV. This is
+ * in addition to the copy passed to aead_request_set_crypt(). These two IV
+ * copies must not differ; different implementations of the same algorithm may
+ * behave differently in that case. Note that the algorithm might not actually
+ * treat the IV as associated data; nevertheless the length passed to
+ * aead_request_set_ad() must include it.
*/
struct crypto_aead;
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index 24cfa96f98ea..56527c85d122 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -66,7 +66,7 @@ struct af_alg_sgl {
struct af_alg_tsgl {
struct list_head list;
unsigned int cur; /* Last processed SG entry */
- struct scatterlist sg[0]; /* Array of SGs forming the SGL */
+ struct scatterlist sg[]; /* Array of SGs forming the SGL */
};
#define MAX_SGL_ENTS ((4096 - sizeof(struct af_alg_tsgl)) / \
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index d672b7db0cfc..a274d95fa66e 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -35,6 +35,7 @@ struct debugfs_regset32 {
const struct debugfs_reg32 *regs;
int nregs;
void __iomem *base;
+ struct device *dev; /* Optional device for Runtime PM */
};
extern struct dentry *arch_debugfs_dir;
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index c8c42214e7fb..a7a2391d6f96 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -82,6 +82,7 @@ enum pm_api_id {
PM_CLOCK_GETRATE,
PM_CLOCK_SETPARENT,
PM_CLOCK_GETPARENT,
+ PM_SECURE_AES = 47,
PM_FEATURE_CHECK = 63,
PM_API_MAX,
};
@@ -322,6 +323,7 @@ struct zynqmp_eemi_ops {
const u32 capabilities,
const u32 qos,
const enum zynqmp_pm_request_ack ack);
+ int (*aes)(const u64 address, u32 *out);
};
int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 arg0, u32 arg1,
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index 54d9436600c7..2b5f8366dbe1 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -381,6 +381,22 @@ int __must_check __fsl_mc_driver_register(struct fsl_mc_driver *fsl_mc_driver,
void fsl_mc_driver_unregister(struct fsl_mc_driver *driver);
+/**
+ * struct fsl_mc_version
+ * @major: Major version number: incremented on API compatibility changes
+ * @minor: Minor version number: incremented on API additions (that are
+ * backward compatible); reset when major version is incremented
+ * @revision: Internal revision number: incremented on implementation changes
+ * and/or bug fixes that have no impact on API
+ */
+struct fsl_mc_version {
+ u32 major;
+ u32 minor;
+ u32 revision;
+};
+
+struct fsl_mc_version *fsl_mc_get_version(void);
+
int __must_check fsl_mc_portal_allocate(struct fsl_mc_device *mc_dev,
u16 mc_io_flags,
struct fsl_mc_io **new_mc_io);
diff --git a/include/linux/uacce.h b/include/linux/uacce.h
new file mode 100644
index 000000000000..0e215e6d0534
--- /dev/null
+++ b/include/linux/uacce.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _LINUX_UACCE_H
+#define _LINUX_UACCE_H
+
+#include <linux/cdev.h>
+#include <uapi/misc/uacce/uacce.h>
+
+#define UACCE_NAME "uacce"
+#define UACCE_MAX_REGION 2
+#define UACCE_MAX_NAME_SIZE 64
+
+struct uacce_queue;
+struct uacce_device;
+
+/**
+ * struct uacce_qfile_region - structure of queue file region
+ * @type: type of the region
+ */
+struct uacce_qfile_region {
+ enum uacce_qfrt type;
+};
+
+/**
+ * struct uacce_ops - uacce device operations
+ * @get_available_instances: get available instances left of the device
+ * @get_queue: get a queue from the device
+ * @put_queue: free a queue to the device
+ * @start_queue: make the queue start work after get_queue
+ * @stop_queue: make the queue stop work before put_queue
+ * @is_q_updated: check whether the task is finished
+ * @mmap: mmap addresses of queue to user space
+ * @ioctl: ioctl for user space users of the queue
+ */
+struct uacce_ops {
+ int (*get_available_instances)(struct uacce_device *uacce);
+ int (*get_queue)(struct uacce_device *uacce, unsigned long arg,
+ struct uacce_queue *q);
+ void (*put_queue)(struct uacce_queue *q);
+ int (*start_queue)(struct uacce_queue *q);
+ void (*stop_queue)(struct uacce_queue *q);
+ int (*is_q_updated)(struct uacce_queue *q);
+ int (*mmap)(struct uacce_queue *q, struct vm_area_struct *vma,
+ struct uacce_qfile_region *qfr);
+ long (*ioctl)(struct uacce_queue *q, unsigned int cmd,
+ unsigned long arg);
+};
+
+/**
+ * struct uacce_interface - interface required for uacce_register()
+ * @name: the uacce device name. Will show up in sysfs
+ * @flags: uacce device attributes
+ * @ops: pointer to the struct uacce_ops
+ */
+struct uacce_interface {
+ char name[UACCE_MAX_NAME_SIZE];
+ unsigned int flags;
+ const struct uacce_ops *ops;
+};
+
+enum uacce_q_state {
+ UACCE_Q_ZOMBIE = 0,
+ UACCE_Q_INIT,
+ UACCE_Q_STARTED,
+};
+
+/**
+ * struct uacce_queue
+ * @uacce: pointer to uacce
+ * @priv: private pointer
+ * @wait: wait queue head
+ * @list: index into uacce_mm
+ * @uacce_mm: the corresponding mm
+ * @qfrs: pointer of qfr regions
+ * @state: queue state machine
+ */
+struct uacce_queue {
+ struct uacce_device *uacce;
+ void *priv;
+ wait_queue_head_t wait;
+ struct list_head list;
+ struct uacce_mm *uacce_mm;
+ struct uacce_qfile_region *qfrs[UACCE_MAX_REGION];
+ enum uacce_q_state state;
+};
+
+/**
+ * struct uacce_device
+ * @algs: supported algorithms
+ * @api_ver: api version
+ * @ops: pointer to the struct uacce_ops
+ * @qf_pg_num: page numbers of the queue file regions
+ * @parent: pointer to the parent device
+ * @is_vf: whether virtual function
+ * @flags: uacce attributes
+ * @dev_id: id of the uacce device
+ * @cdev: cdev of the uacce
+ * @dev: dev of the uacce
+ * @priv: private pointer of the uacce
+ * @mm_list: list head of uacce_mm->list
+ * @mm_lock: lock for mm_list
+ * @inode: core vfs
+ */
+struct uacce_device {
+ const char *algs;
+ const char *api_ver;
+ const struct uacce_ops *ops;
+ unsigned long qf_pg_num[UACCE_MAX_REGION];
+ struct device *parent;
+ bool is_vf;
+ u32 flags;
+ u32 dev_id;
+ struct cdev *cdev;
+ struct device dev;
+ void *priv;
+ struct list_head mm_list;
+ struct mutex mm_lock;
+ struct inode *inode;
+};
+
+/**
+ * struct uacce_mm - keep track of queues bound to a process
+ * @list: index into uacce_device
+ * @queues: list of queues
+ * @mm: the mm struct
+ * @lock: protects the list of queues
+ * @pasid: pasid of the uacce_mm
+ * @handle: iommu_sva handle return from iommu_sva_bind_device
+ */
+struct uacce_mm {
+ struct list_head list;
+ struct list_head queues;
+ struct mm_struct *mm;
+ struct mutex lock;
+ int pasid;
+ struct iommu_sva *handle;
+};
+
+#if IS_ENABLED(CONFIG_UACCE)
+
+struct uacce_device *uacce_alloc(struct device *parent,
+ struct uacce_interface *interface);
+int uacce_register(struct uacce_device *uacce);
+void uacce_remove(struct uacce_device *uacce);
+
+#else /* CONFIG_UACCE */
+
+static inline
+struct uacce_device *uacce_alloc(struct device *parent,
+ struct uacce_interface *interface)
+{
+ return ERR_PTR(-ENODEV);
+}
+
+static inline int uacce_register(struct uacce_device *uacce)
+{
+ return -EINVAL;
+}
+
+static inline void uacce_remove(struct uacce_device *uacce) {}
+
+#endif /* CONFIG_UACCE */
+
+#endif /* _LINUX_UACCE_H */
diff --git a/include/uapi/misc/uacce/hisi_qm.h b/include/uapi/misc/uacce/hisi_qm.h
new file mode 100644
index 000000000000..6435f0bcb556
--- /dev/null
+++ b/include/uapi/misc/uacce/hisi_qm.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _UAPI_HISI_QM_H
+#define _UAPI_HISI_QM_H
+
+#include <linux/types.h>
+
+/**
+ * struct hisi_qp_ctx - User data for hisi qp.
+ * @id: qp_index return to user space
+ * @qc_type: Accelerator algorithm type
+ */
+struct hisi_qp_ctx {
+ __u16 id;
+ __u16 qc_type;
+};
+
+#define HISI_QM_API_VER_BASE "hisi_qm_v1"
+#define HISI_QM_API_VER2_BASE "hisi_qm_v2"
+
+/* UACCE_CMD_QM_SET_QP_CTX: Set qp algorithm type */
+#define UACCE_CMD_QM_SET_QP_CTX _IOWR('H', 10, struct hisi_qp_ctx)
+
+#endif
diff --git a/include/uapi/misc/uacce/uacce.h b/include/uapi/misc/uacce/uacce.h
new file mode 100644
index 000000000000..cc7185678f47
--- /dev/null
+++ b/include/uapi/misc/uacce/uacce.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _UAPIUUACCE_H
+#define _UAPIUUACCE_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/*
+ * UACCE_CMD_START_Q: Start queue
+ */
+#define UACCE_CMD_START_Q _IO('W', 0)
+
+/*
+ * UACCE_CMD_PUT_Q:
+ * User actively stop queue and free queue resource immediately
+ * Optimization method since close fd may delay
+ */
+#define UACCE_CMD_PUT_Q _IO('W', 1)
+
+/*
+ * UACCE Device flags:
+ * UACCE_DEV_SVA: Shared Virtual Addresses
+ * Support PASID
+ * Support device page faults (PCI PRI or SMMU Stall)
+ */
+#define UACCE_DEV_SVA BIT(0)
+
+/**
+ * enum uacce_qfrt: queue file region type
+ * @UACCE_QFRT_MMIO: device mmio region
+ * @UACCE_QFRT_DUS: device user share region
+ */
+enum uacce_qfrt {
+ UACCE_QFRT_MMIO = 0,
+ UACCE_QFRT_DUS = 1,
+};
+
+#endif
diff --git a/kernel/padata.c b/kernel/padata.c
index 72777c10bb9c..a6afa12fb75e 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -512,7 +512,7 @@ static int padata_replace_one(struct padata_shell *ps)
static int padata_replace(struct padata_instance *pinst)
{
struct padata_shell *ps;
- int err;
+ int err = 0;
pinst->flags |= PADATA_RESET;
@@ -1038,12 +1038,13 @@ EXPORT_SYMBOL(padata_alloc_shell);
*/
void padata_free_shell(struct padata_shell *ps)
{
- struct padata_instance *pinst = ps->pinst;
+ if (!ps)
+ return;
- mutex_lock(&pinst->lock);
+ mutex_lock(&ps->pinst->lock);
list_del(&ps->list);
padata_free_pd(rcu_dereference_protected(ps->pd, 1));
- mutex_unlock(&pinst->lock);
+ mutex_unlock(&ps->pinst->lock);
kfree(ps);
}