Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "Algorithms:
   - add private key generation to ecdh

  Drivers:
   - add generic gcm(aes) to aesni-intel
   - add SafeXcel EIP197 crypto engine driver
   - add ecb(aes), cfb(aes) and ecb(des3_ede) to cavium
   - add support for CNN55XX adapters in cavium
   - add ctr mode to chcr
   - add support for gcm(aes) to omap"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (140 commits)
  crypto: testmgr - Reenable sha1/aes in FIPS mode
  crypto: ccp - Release locks before returning
  crypto: cavium/nitrox - dma_mapping_error() returns bool
  crypto: doc - fix typo in docs
  Documentation/bindings: Document the SafeXel cryptographic engine driver
  crypto: caam - fix gfp allocation flags (part II)
  crypto: caam - fix gfp allocation flags (part I)
  crypto: drbg - Fixes panic in wait_for_completion call
  crypto: caam - make of_device_ids const.
  crypto: vmx - remove unnecessary check
  crypto: n2 - make of_device_ids const
  crypto: inside-secure - use the base_end pointer in ring rollback
  crypto: inside-secure - increase the batch size
  crypto: inside-secure - only dequeue when needed
  crypto: inside-secure - get the backlog before dequeueing the request
  crypto: inside-secure - stop requeueing failed requests
  crypto: inside-secure - use one queue per hw ring
  crypto: inside-secure - update the context and request later
  crypto: inside-secure - align the cipher and hash send functions
  crypto: inside-secure - optimize DSE bufferability control
  ...
diff --git a/Documentation/crypto/api-samples.rst b/Documentation/crypto/api-samples.rst
index d021fd9..2531948 100644
--- a/Documentation/crypto/api-samples.rst
+++ b/Documentation/crypto/api-samples.rst
@@ -155,9 +155,9 @@
         char ctx[];
     };
 
-    static struct sdesc init_sdesc(struct crypto_shash *alg)
+    static struct sdesc *init_sdesc(struct crypto_shash *alg)
     {
-        struct sdesc sdesc;
+        struct sdesc *sdesc;
         int size;
 
         size = sizeof(struct shash_desc) + crypto_shash_descsize(alg);
@@ -169,15 +169,16 @@
         return sdesc;
     }
 
-    static int calc_hash(struct crypto_shashalg,
-                 const unsigned chardata, unsigned int datalen,
-                 unsigned chardigest) {
-        struct sdesc sdesc;
+    static int calc_hash(struct crypto_shash *alg,
+                 const unsigned char *data, unsigned int datalen,
+                 unsigned char *digest)
+    {
+        struct sdesc *sdesc;
         int ret;
 
         sdesc = init_sdesc(alg);
         if (IS_ERR(sdesc)) {
-            pr_info("trusted_key: can't alloc %s\n", hash_alg);
+            pr_info("can't alloc sdesc\n");
             return PTR_ERR(sdesc);
         }
 
@@ -186,6 +187,23 @@
         return ret;
     }
 
+    static int test_hash(const unsigned char *data, unsigned int datalen,
+                 unsigned char *digest)
+    {
+        struct crypto_shash *alg;
+        char *hash_alg_name = "sha1-padlock-nano";
+        int ret;
+
+        alg = crypto_alloc_shash(hash_alg_name, CRYPTO_ALG_TYPE_SHASH, 0);
+        if (IS_ERR(alg)) {
+                pr_info("can't alloc alg %s\n", hash_alg_name);
+                return PTR_ERR(alg);
+        }
+        ret = calc_hash(alg, data, datalen, digest);
+        crypto_free_shash(alg);
+        return ret;
+    }
+
 
 Code Example For Random Number Generator Usage
 ----------------------------------------------
@@ -195,8 +213,8 @@
 
     static int get_random_numbers(u8 *buf, unsigned int len)
     {
-        struct crypto_rngrng = NULL;
-        chardrbg = "drbg_nopr_sha256"; /* Hash DRBG with SHA-256, no PR */
+        struct crypto_rng *rng = NULL;
+        char *drbg = "drbg_nopr_sha256"; /* Hash DRBG with SHA-256, no PR */
         int ret;
 
         if (!buf || !len) {
@@ -207,7 +225,7 @@
         rng = crypto_alloc_rng(drbg, 0, 0);
         if (IS_ERR(rng)) {
             pr_debug("could not allocate RNG handle for %s\n", drbg);
-            return -PTR_ERR(rng);
+            return PTR_ERR(rng);
         }
 
         ret = crypto_rng_get_bytes(rng, buf, len);
diff --git a/Documentation/crypto/userspace-if.rst b/Documentation/crypto/userspace-if.rst
index de5a72e..ff86bef 100644
--- a/Documentation/crypto/userspace-if.rst
+++ b/Documentation/crypto/userspace-if.rst
@@ -327,7 +327,7 @@
 operations of the kernel which would defeat the speed gains obtained
 from the zero-copy interface.
 
-The system-interent limit for the size of one zero-copy operation is 16
+The system-inherent limit for the size of one zero-copy operation is 16
 pages. If more data is to be sent to AF_ALG, user space must slice the
 input into segments with a maximum size of 16 pages.
 
diff --git a/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt b/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt
new file mode 100644
index 0000000..f69773f
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt
@@ -0,0 +1,29 @@
+Inside Secure SafeXcel cryptographic engine
+
+Required properties:
+- compatible: Should be "inside-secure,safexcel-eip197".
+- reg: Base physical address of the engine and length of memory mapped region.
+- interrupts: Interrupt numbers for the rings and engine.
+- interrupt-names: Should be "ring0", "ring1", "ring2", "ring3", "eip", "mem".
+
+Optional properties:
+- clocks: Reference to the crypto engine clock.
+- dma-mask: The address mask limitation. Defaults to 64.
+
+Example:
+
+	crypto: crypto@800000 {
+		compatible = "inside-secure,safexcel-eip197";
+		reg = <0x800000 0x200000>;
+		interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "mem", "ring0", "ring1", "ring2", "ring3",
+				  "eip";
+		clocks = <&cpm_syscon0 1 26>;
+		dma-mask = <0xff 0xffffffff>;
+		status = "disabled";
+	};
diff --git a/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt b/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt
index c204725..450da36 100644
--- a/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt
+++ b/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt
@@ -6,8 +6,7 @@
 - interrupts: Should contain the five crypto engines interrupts in numeric
 	order. These are global system and four descriptor rings.
 - clocks: the clock used by the core
-- clock-names: the names of the clock listed in the clocks property. These are
-	"ethif", "cryp"
+- clock-names: Must contain "cryp".
 - power-domains: Must contain a reference to the PM domain.
 
 
@@ -20,8 +19,7 @@
 			     <GIC_SPI 84 IRQ_TYPE_LEVEL_LOW>,
 			     <GIC_SPI 91 IRQ_TYPE_LEVEL_LOW>,
 			     <GIC_SPI 97 IRQ_TYPE_LEVEL_LOW>;
-		clocks = <&topckgen CLK_TOP_ETHIF_SEL>,
-			 <&ethsys CLK_ETHSYS_CRYPTO>;
-		clock-names = "ethif","cryp";
+		clocks = <&ethsys CLK_ETHSYS_CRYPTO>;
+		clock-names = "cryp";
 		power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>;
 	};
diff --git a/Documentation/devicetree/bindings/rng/mtk-rng.txt b/Documentation/devicetree/bindings/rng/mtk-rng.txt
index a6d62a2..366b99b 100644
--- a/Documentation/devicetree/bindings/rng/mtk-rng.txt
+++ b/Documentation/devicetree/bindings/rng/mtk-rng.txt
@@ -2,7 +2,9 @@
 found in Mediatek SoC family
 
 Required properties:
-- compatible	    : Should be "mediatek,mt7623-rng"
+- compatible	    : Should be
+			"mediatek,mt7622-rng", 	"mediatek,mt7623-rng" : for MT7622
+			"mediatek,mt7623-rng" : for MT7623
 - clocks	    : list of clock specifiers, corresponding to
 		      entries in clock-names property;
 - clock-names	    : Should contain "rng" entries;
diff --git a/Documentation/devicetree/bindings/rng/timeriomem_rng.txt b/Documentation/devicetree/bindings/rng/timeriomem_rng.txt
index 6616d15..2149400 100644
--- a/Documentation/devicetree/bindings/rng/timeriomem_rng.txt
+++ b/Documentation/devicetree/bindings/rng/timeriomem_rng.txt
@@ -5,6 +5,13 @@
 - reg : base address to sample from
 - period : wait time in microseconds to use between samples
 
+Optional properties:
+- quality : estimated number of bits of true entropy per 1024 bits read from the
+            rng.  Defaults to zero which causes the kernel's default quality to
+            be used instead.  Note that the default quality is usually zero
+            which disables using this rng to automatically fill the kernel's
+            entropy pool.
+
 N.B. currently 'reg' must be four bytes wide and aligned
 
 Example:
diff --git a/MAINTAINERS b/MAINTAINERS
index 61d0ac6..df62ee1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3746,6 +3746,13 @@
 F:	drivers/infiniband/hw/cxgb4/
 F:	include/uapi/rdma/cxgb4-abi.h
 
+CXGB4 CRYPTO DRIVER (chcr)
+M:	Harsh Jain <harsh@chelsio.com>
+L:	linux-crypto@vger.kernel.org
+W:	http://www.chelsio.com
+S:	Supported
+F:	drivers/crypto/chelsio
+
 CXGB4VF ETHERNET DRIVER (CXGB4VF)
 M:	Casey Leedom <leedom@chelsio.com>
 L:	netdev@vger.kernel.org
@@ -6647,6 +6654,12 @@
 F:	drivers/input/input-mt.c
 K:	\b(ABS|SYN)_MT_
 
+INSIDE SECURE CRYPTO DRIVER
+M:	Antoine Tenart <antoine.tenart@free-electrons.com>
+F:	drivers/crypto/inside-secure/
+S:	Maintained
+L:	linux-crypto@vger.kernel.org
+
 INTEL ASoC BDW/HSW DRIVERS
 M:	Jie Yang <yang.jie@linux.intel.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
@@ -8306,6 +8319,11 @@
 S:	Maintained
 F:	drivers/net/wireless/mediatek/mt7601u/
 
+MEDIATEK RANDOM NUMBER GENERATOR SUPPORT
+M:      Sean Wang <sean.wang@mediatek.com>
+S:      Maintained
+F:      drivers/char/hw_random/mtk-rng.c
+
 MEGACHIPS STDPXXXX-GE-B850V3-FW LVDS/DP++ BRIDGES
 M:	Peter Senna Tschudin <peter.senna@collabora.com>
 M:	Martin Donnelly <martin.donnelly@ge.com>
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index 883b84d..0f966a8 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -14,6 +14,7 @@
 #include <crypto/aes.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
+#include <linux/cpufeature.h>
 #include <linux/module.h>
 #include <crypto/xts.h>
 
@@ -425,9 +426,6 @@ static int __init aes_init(void)
 	int err;
 	int i;
 
-	if (!(elf_hwcap2 & HWCAP2_AES))
-		return -ENODEV;
-
 	err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
 	if (err)
 		return err;
@@ -451,5 +449,5 @@ static int __init aes_init(void)
 	return err;
 }
 
-module_init(aes_init);
+module_cpu_feature_match(AES, aes_init);
 module_exit(aes_exit);
diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c
index e1566be..1b0e0e8 100644
--- a/arch/arm/crypto/crc32-ce-glue.c
+++ b/arch/arm/crypto/crc32-ce-glue.c
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/cpufeature.h>
 #include <linux/crc32.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -233,6 +234,11 @@ static void __exit crc32_pmull_mod_exit(void)
 				  ARRAY_SIZE(crc32_pmull_algs));
 }
 
+static const struct cpu_feature crc32_cpu_feature[] = {
+	{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
+};
+MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
+
 module_init(crc32_pmull_mod_init);
 module_exit(crc32_pmull_mod_exit);
 
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index 7546b3c..6bac8be 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -15,6 +15,7 @@
 #include <crypto/cryptd.h>
 #include <crypto/internal/hash.h>
 #include <crypto/gf128mul.h>
+#include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
 
@@ -311,9 +312,6 @@ static int __init ghash_ce_mod_init(void)
 {
 	int err;
 
-	if (!(elf_hwcap2 & HWCAP2_PMULL))
-		return -ENODEV;
-
 	err = crypto_register_shash(&ghash_alg);
 	if (err)
 		return err;
@@ -334,5 +332,5 @@ static void __exit ghash_ce_mod_exit(void)
 	crypto_unregister_shash(&ghash_alg);
 }
 
-module_init(ghash_ce_mod_init);
+module_cpu_feature_match(PMULL, ghash_ce_mod_init);
 module_exit(ghash_ce_mod_exit);
diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c
index 80bc2fc..555f72b 100644
--- a/arch/arm/crypto/sha1-ce-glue.c
+++ b/arch/arm/crypto/sha1-ce-glue.c
@@ -11,6 +11,7 @@
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
 #include <crypto/sha1_base.h>
+#include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
 
@@ -82,8 +83,6 @@ static struct shash_alg alg = {
 
 static int __init sha1_ce_mod_init(void)
 {
-	if (!(elf_hwcap2 & HWCAP2_SHA1))
-		return -ENODEV;
 	return crypto_register_shash(&alg);
 }
 
@@ -92,5 +91,5 @@ static void __exit sha1_ce_mod_fini(void)
 	crypto_unregister_shash(&alg);
 }
 
-module_init(sha1_ce_mod_init);
+module_cpu_feature_match(SHA1, sha1_ce_mod_init);
 module_exit(sha1_ce_mod_fini);
diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c
index 0755b2d..df4dcef 100644
--- a/arch/arm/crypto/sha2-ce-glue.c
+++ b/arch/arm/crypto/sha2-ce-glue.c
@@ -11,6 +11,7 @@
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
 #include <crypto/sha256_base.h>
+#include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
 
@@ -100,8 +101,6 @@ static struct shash_alg algs[] = { {
 
 static int __init sha2_ce_mod_init(void)
 {
-	if (!(elf_hwcap2 & HWCAP2_SHA2))
-		return -ENODEV;
 	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
 }
 
@@ -110,5 +109,5 @@ static void __exit sha2_ce_mod_fini(void)
 	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
 }
 
-module_init(sha2_ce_mod_init);
+module_cpu_feature_match(SHA2, sha2_ce_mod_init);
 module_exit(sha2_ce_mod_fini);
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index c98e7e8..8550408 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -82,7 +82,8 @@
 	ldr		dgb, [x0, #16]
 
 	/* load sha1_ce_state::finalize */
-	ldr		w4, [x0, #:lo12:sha1_ce_offsetof_finalize]
+	ldr_l		w4, sha1_ce_offsetof_finalize, x4
+	ldr		w4, [x0, x4]
 
 	/* load input */
 0:	ld1		{v8.4s-v11.4s}, [x1], #64
@@ -132,7 +133,8 @@
 	 * the padding is handled by the C code in that case.
 	 */
 	cbz		x4, 3f
-	ldr		x4, [x0, #:lo12:sha1_ce_offsetof_count]
+	ldr_l		w4, sha1_ce_offsetof_count, x4
+	ldr		x4, [x0, x4]
 	movi		v9.2d, #0
 	mov		x8, #0x80000000
 	movi		v10.2d, #0
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index aefda98..ea319c0 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -17,9 +17,6 @@
 #include <linux/crypto.h>
 #include <linux/module.h>
 
-#define ASM_EXPORT(sym, val) \
-	asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
-
 MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
@@ -32,6 +29,9 @@ struct sha1_ce_state {
 asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
 				  int blocks);
 
+const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count);
+const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize);
+
 static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
 			  unsigned int len)
 {
@@ -52,11 +52,6 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 	bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
 
-	ASM_EXPORT(sha1_ce_offsetof_count,
-		   offsetof(struct sha1_ce_state, sst.count));
-	ASM_EXPORT(sha1_ce_offsetof_finalize,
-		   offsetof(struct sha1_ce_state, finalize));
-
 	/*
 	 * Allow the asm code to perform the finalization if there is no
 	 * partial data and the input is a round multiple of the block size.
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 01cfee0..679c6c0 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -88,7 +88,8 @@
 	ld1		{dgav.4s, dgbv.4s}, [x0]
 
 	/* load sha256_ce_state::finalize */
-	ldr		w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
+	ldr_l		w4, sha256_ce_offsetof_finalize, x4
+	ldr		w4, [x0, x4]
 
 	/* load input */
 0:	ld1		{v16.4s-v19.4s}, [x1], #64
@@ -136,7 +137,8 @@
 	 * the padding is handled by the C code in that case.
 	 */
 	cbz		x4, 3f
-	ldr		x4, [x0, #:lo12:sha256_ce_offsetof_count]
+	ldr_l		w4, sha256_ce_offsetof_count, x4
+	ldr		x4, [x0, x4]
 	movi		v17.2d, #0
 	mov		x8, #0x80000000
 	movi		v18.2d, #0
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 7cd5875..0ed9486 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -17,9 +17,6 @@
 #include <linux/crypto.h>
 #include <linux/module.h>
 
-#define ASM_EXPORT(sym, val) \
-	asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
-
 MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
@@ -32,6 +29,11 @@ struct sha256_ce_state {
 asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
 				  int blocks);
 
+const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state,
+					      sst.count);
+const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state,
+						 finalize);
+
 static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
 			    unsigned int len)
 {
@@ -52,11 +54,6 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 	bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
 
-	ASM_EXPORT(sha256_ce_offsetof_count,
-		   offsetof(struct sha256_ce_state, sst.count));
-	ASM_EXPORT(sha256_ce_offsetof_finalize,
-		   offsetof(struct sha256_ce_state, finalize));
-
 	/*
 	 * Allow the asm code to perform the finalization if there is no
 	 * partial data and the input is a round multiple of the block size.
diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
index 9105655..8739cf7 100644
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ b/arch/x86/crypto/aes-x86_64-asm_64.S
@@ -42,17 +42,15 @@
 #define R5E	%esi
 #define R6	%rdi
 #define R6E	%edi
-#define R7	%rbp
-#define R7E	%ebp
+#define R7	%r9	/* don't use %rbp; it breaks stack traces */
+#define R7E	%r9d
 #define R8	%r8
-#define R9	%r9
 #define R10	%r10
 #define R11	%r11
 
-#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
+#define prologue(FUNC,KEY,B128,B192,r1,r2,r5,r6,r7,r8,r9,r10,r11) \
 	ENTRY(FUNC);			\
 	movq	r1,r2;			\
-	movq	r3,r4;			\
 	leaq	KEY+48(r8),r9;		\
 	movq	r10,r11;		\
 	movl	(r7),r5 ## E;		\
@@ -70,9 +68,8 @@
 	je	B192;			\
 	leaq	32(r9),r9;
 
-#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \
+#define epilogue(FUNC,r1,r2,r5,r6,r7,r8,r9) \
 	movq	r1,r2;			\
-	movq	r3,r4;			\
 	movl	r5 ## E,(r9);		\
 	movl	r6 ## E,4(r9);		\
 	movl	r7 ## E,8(r9);		\
@@ -88,12 +85,12 @@
 	movl	TAB(,r6,4),r6 ## E;	\
 	roll	$16,r2 ## E;		\
 	shrl	$16,r4 ## E;		\
-	movzbl	r4 ## H,r7 ## E;	\
-	movzbl	r4 ## L,r4 ## E;	\
+	movzbl	r4 ## L,r7 ## E;	\
+	movzbl	r4 ## H,r4 ## E;	\
 	xorl	OFFSET(r8),ra ## E;	\
 	xorl	OFFSET+4(r8),rb ## E;	\
-	xorl	TAB+3072(,r7,4),r5 ## E;\
-	xorl	TAB+2048(,r4,4),r6 ## E;\
+	xorl	TAB+3072(,r4,4),r5 ## E;\
+	xorl	TAB+2048(,r7,4),r6 ## E;\
 	movzbl	r1 ## L,r7 ## E;	\
 	movzbl	r1 ## H,r4 ## E;	\
 	movl	TAB+1024(,r4,4),r4 ## E;\
@@ -101,19 +98,19 @@
 	roll	$16,r1 ## E;		\
 	shrl	$16,r3 ## E;		\
 	xorl	TAB(,r7,4),r5 ## E;	\
-	movzbl	r3 ## H,r7 ## E;	\
-	movzbl	r3 ## L,r3 ## E;	\
-	xorl	TAB+3072(,r7,4),r4 ## E;\
-	xorl	TAB+2048(,r3,4),r5 ## E;\
-	movzbl	r1 ## H,r7 ## E;	\
-	movzbl	r1 ## L,r3 ## E;	\
+	movzbl	r3 ## L,r7 ## E;	\
+	movzbl	r3 ## H,r3 ## E;	\
+	xorl	TAB+3072(,r3,4),r4 ## E;\
+	xorl	TAB+2048(,r7,4),r5 ## E;\
+	movzbl	r1 ## L,r7 ## E;	\
+	movzbl	r1 ## H,r3 ## E;	\
 	shrl	$16,r1 ## E;		\
-	xorl	TAB+3072(,r7,4),r6 ## E;\
-	movl	TAB+2048(,r3,4),r3 ## E;\
-	movzbl	r1 ## H,r7 ## E;	\
-	movzbl	r1 ## L,r1 ## E;	\
-	xorl	TAB+1024(,r7,4),r6 ## E;\
-	xorl	TAB(,r1,4),r3 ## E;	\
+	xorl	TAB+3072(,r3,4),r6 ## E;\
+	movl	TAB+2048(,r7,4),r3 ## E;\
+	movzbl	r1 ## L,r7 ## E;	\
+	movzbl	r1 ## H,r1 ## E;	\
+	xorl	TAB+1024(,r1,4),r6 ## E;\
+	xorl	TAB(,r7,4),r3 ## E;	\
 	movzbl	r2 ## H,r1 ## E;	\
 	movzbl	r2 ## L,r7 ## E;	\
 	shrl	$16,r2 ## E;		\
@@ -131,9 +128,9 @@
 	movl	r4 ## E,r2 ## E;
 
 #define entry(FUNC,KEY,B128,B192) \
-	prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
+	prologue(FUNC,KEY,B128,B192,R2,R8,R1,R3,R4,R6,R10,R5,R11)
 
-#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11)
+#define return(FUNC) epilogue(FUNC,R8,R2,R5,R6,R3,R4,R11)
 
 #define encrypt_round(TAB,OFFSET) \
 	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 3c46518..16627fe 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -89,6 +89,29 @@
 ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
             .octa 0x00000000000000000000000000000000
 
+.section .rodata
+.align 16
+.type aad_shift_arr, @object
+.size aad_shift_arr, 272
+aad_shift_arr:
+        .octa     0xffffffffffffffffffffffffffffffff
+        .octa     0xffffffffffffffffffffffffffffff0C
+        .octa     0xffffffffffffffffffffffffffff0D0C
+        .octa     0xffffffffffffffffffffffffff0E0D0C
+        .octa     0xffffffffffffffffffffffff0F0E0D0C
+        .octa     0xffffffffffffffffffffff0C0B0A0908
+        .octa     0xffffffffffffffffffff0D0C0B0A0908
+        .octa     0xffffffffffffffffff0E0D0C0B0A0908
+        .octa     0xffffffffffffffff0F0E0D0C0B0A0908
+        .octa     0xffffffffffffff0C0B0A090807060504
+        .octa     0xffffffffffff0D0C0B0A090807060504
+        .octa     0xffffffffff0E0D0C0B0A090807060504
+        .octa     0xffffffff0F0E0D0C0B0A090807060504
+        .octa     0xffffff0C0B0A09080706050403020100
+        .octa     0xffff0D0C0B0A09080706050403020100
+        .octa     0xff0E0D0C0B0A09080706050403020100
+        .octa     0x0F0E0D0C0B0A09080706050403020100
+
 
 .text
 
@@ -252,32 +275,66 @@
 	mov	   arg8, %r12           # %r12 = aadLen
 	mov	   %r12, %r11
 	pxor	   %xmm\i, %xmm\i
+	pxor       \XMM2, \XMM2
 
-_get_AAD_loop\num_initial_blocks\operation:
-	movd	   (%r10), \TMP1
+	cmp	   $16, %r11
+	jl	   _get_AAD_rest8\num_initial_blocks\operation
+_get_AAD_blocks\num_initial_blocks\operation:
+	movdqu	   (%r10), %xmm\i
+	PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
+	pxor	   %xmm\i, \XMM2
+	GHASH_MUL  \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+	add	   $16, %r10
+	sub	   $16, %r12
+	sub	   $16, %r11
+	cmp	   $16, %r11
+	jge	   _get_AAD_blocks\num_initial_blocks\operation
+
+	movdqu	   \XMM2, %xmm\i
+	cmp	   $0, %r11
+	je	   _get_AAD_done\num_initial_blocks\operation
+
+	pxor	   %xmm\i,%xmm\i
+
+	/* read the last <16B of AAD. since we have at least 4B of
+	data right after the AAD (the ICV, and maybe some CT), we can
+	read 4B/8B blocks safely, and then get rid of the extra stuff */
+_get_AAD_rest8\num_initial_blocks\operation:
+	cmp	   $4, %r11
+	jle	   _get_AAD_rest4\num_initial_blocks\operation
+	movq	   (%r10), \TMP1
+	add	   $8, %r10
+	sub	   $8, %r11
+	pslldq	   $8, \TMP1
+	psrldq	   $8, %xmm\i
+	pxor	   \TMP1, %xmm\i
+	jmp	   _get_AAD_rest8\num_initial_blocks\operation
+_get_AAD_rest4\num_initial_blocks\operation:
+	cmp	   $0, %r11
+	jle	   _get_AAD_rest0\num_initial_blocks\operation
+	mov	   (%r10), %eax
+	movq	   %rax, \TMP1
+	add	   $4, %r10
+	sub	   $4, %r10
 	pslldq	   $12, \TMP1
 	psrldq	   $4, %xmm\i
 	pxor	   \TMP1, %xmm\i
-	add	   $4, %r10
-	sub	   $4, %r12
-	jne	   _get_AAD_loop\num_initial_blocks\operation
-
-	cmp	   $16, %r11
-	je	   _get_AAD_loop2_done\num_initial_blocks\operation
-
-	mov	   $16, %r12
-_get_AAD_loop2\num_initial_blocks\operation:
-	psrldq	   $4, %xmm\i
-	sub	   $4, %r12
-	cmp	   %r11, %r12
-	jne	   _get_AAD_loop2\num_initial_blocks\operation
-
-_get_AAD_loop2_done\num_initial_blocks\operation:
+_get_AAD_rest0\num_initial_blocks\operation:
+	/* finalize: shift out the extra bytes we read, and align
+	left. since pslldq can only shift by an immediate, we use
+	vpshufb and an array of shuffle masks */
+	movq	   %r12, %r11
+	salq	   $4, %r11
+	movdqu	   aad_shift_arr(%r11), \TMP1
+	PSHUFB_XMM \TMP1, %xmm\i
+_get_AAD_rest_final\num_initial_blocks\operation:
 	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
+	pxor	   \XMM2, %xmm\i
+	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
 
+_get_AAD_done\num_initial_blocks\operation:
 	xor	   %r11, %r11 # initialise the data pointer offset as zero
-
-        # start AES for num_initial_blocks blocks
+	# start AES for num_initial_blocks blocks
 
 	mov	   %arg5, %rax                      # %rax = *Y0
 	movdqu	   (%rax), \XMM0                    # XMM0 = Y0
@@ -322,7 +379,7 @@
                 # prepare plaintext/ciphertext for GHASH computation
 .endr
 .endif
-	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+
         # apply GHASH on num_initial_blocks blocks
 
 .if \i == 5
@@ -477,28 +534,66 @@
 	mov	   arg8, %r12           # %r12 = aadLen
 	mov	   %r12, %r11
 	pxor	   %xmm\i, %xmm\i
-_get_AAD_loop\num_initial_blocks\operation:
-	movd	   (%r10), \TMP1
+	pxor	   \XMM2, \XMM2
+
+	cmp	   $16, %r11
+	jl	   _get_AAD_rest8\num_initial_blocks\operation
+_get_AAD_blocks\num_initial_blocks\operation:
+	movdqu	   (%r10), %xmm\i
+	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
+	pxor	   %xmm\i, \XMM2
+	GHASH_MUL  \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+	add	   $16, %r10
+	sub	   $16, %r12
+	sub	   $16, %r11
+	cmp	   $16, %r11
+	jge	   _get_AAD_blocks\num_initial_blocks\operation
+
+	movdqu	   \XMM2, %xmm\i
+	cmp	   $0, %r11
+	je	   _get_AAD_done\num_initial_blocks\operation
+
+	pxor	   %xmm\i,%xmm\i
+
+	/* read the last <16B of AAD. since we have at least 4B of
+	data right after the AAD (the ICV, and maybe some PT), we can
+	read 4B/8B blocks safely, and then get rid of the extra stuff */
+_get_AAD_rest8\num_initial_blocks\operation:
+	cmp	   $4, %r11
+	jle	   _get_AAD_rest4\num_initial_blocks\operation
+	movq	   (%r10), \TMP1
+	add	   $8, %r10
+	sub	   $8, %r11
+	pslldq	   $8, \TMP1
+	psrldq	   $8, %xmm\i
+	pxor	   \TMP1, %xmm\i
+	jmp	   _get_AAD_rest8\num_initial_blocks\operation
+_get_AAD_rest4\num_initial_blocks\operation:
+	cmp	   $0, %r11
+	jle	   _get_AAD_rest0\num_initial_blocks\operation
+	mov	   (%r10), %eax
+	movq	   %rax, \TMP1
+	add	   $4, %r10
+	sub	   $4, %r10
 	pslldq	   $12, \TMP1
 	psrldq	   $4, %xmm\i
 	pxor	   \TMP1, %xmm\i
-	add	   $4, %r10
-	sub	   $4, %r12
-	jne	   _get_AAD_loop\num_initial_blocks\operation
-	cmp	   $16, %r11
-	je	   _get_AAD_loop2_done\num_initial_blocks\operation
-	mov	   $16, %r12
-_get_AAD_loop2\num_initial_blocks\operation:
-	psrldq	   $4, %xmm\i
-	sub	   $4, %r12
-	cmp	   %r11, %r12
-	jne	   _get_AAD_loop2\num_initial_blocks\operation
-_get_AAD_loop2_done\num_initial_blocks\operation:
+_get_AAD_rest0\num_initial_blocks\operation:
+	/* finalize: shift out the extra bytes we read, and align
+	left. since pslldq can only shift by an immediate, we use
+	vpshufb and an array of shuffle masks */
+	movq	   %r12, %r11
+	salq	   $4, %r11
+	movdqu	   aad_shift_arr(%r11), \TMP1
+	PSHUFB_XMM \TMP1, %xmm\i
+_get_AAD_rest_final\num_initial_blocks\operation:
 	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
+	pxor	   \XMM2, %xmm\i
+	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
 
+_get_AAD_done\num_initial_blocks\operation:
 	xor	   %r11, %r11 # initialise the data pointer offset as zero
-
-        # start AES for num_initial_blocks blocks
+	# start AES for num_initial_blocks blocks
 
 	mov	   %arg5, %rax                      # %rax = *Y0
 	movdqu	   (%rax), \XMM0                    # XMM0 = Y0
@@ -543,7 +638,7 @@
 		# prepare plaintext/ciphertext for GHASH computation
 .endr
 .endif
-	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+
         # apply GHASH on num_initial_blocks blocks
 
 .if \i == 5
@@ -1454,18 +1549,35 @@
 	mov	arg10, %r11               # %r11 = auth_tag_len
 	cmp	$16, %r11
 	je	_T_16_decrypt
-	cmp	$12, %r11
-	je	_T_12_decrypt
+	cmp	$8, %r11
+	jl	_T_4_decrypt
 _T_8_decrypt:
 	MOVQ_R64_XMM	%xmm0, %rax
 	mov	%rax, (%r10)
-	jmp	_return_T_done_decrypt
-_T_12_decrypt:
-	MOVQ_R64_XMM	%xmm0, %rax
-	mov	%rax, (%r10)
+	add	$8, %r10
+	sub	$8, %r11
 	psrldq	$8, %xmm0
+	cmp	$0, %r11
+	je	_return_T_done_decrypt
+_T_4_decrypt:
 	movd	%xmm0, %eax
-	mov	%eax, 8(%r10)
+	mov	%eax, (%r10)
+	add	$4, %r10
+	sub	$4, %r11
+	psrldq	$4, %xmm0
+	cmp	$0, %r11
+	je	_return_T_done_decrypt
+_T_123_decrypt:
+	movd	%xmm0, %eax
+	cmp	$2, %r11
+	jl	_T_1_decrypt
+	mov	%ax, (%r10)
+	cmp	$2, %r11
+	je	_return_T_done_decrypt
+	add	$2, %r10
+	sar	$16, %eax
+_T_1_decrypt:
+	mov	%al, (%r10)
 	jmp	_return_T_done_decrypt
 _T_16_decrypt:
 	movdqu	%xmm0, (%r10)
@@ -1718,18 +1830,35 @@
 	mov	arg10, %r11                    # %r11 = auth_tag_len
 	cmp	$16, %r11
 	je	_T_16_encrypt
-	cmp	$12, %r11
-	je	_T_12_encrypt
+	cmp	$8, %r11
+	jl	_T_4_encrypt
 _T_8_encrypt:
 	MOVQ_R64_XMM	%xmm0, %rax
 	mov	%rax, (%r10)
-	jmp	_return_T_done_encrypt
-_T_12_encrypt:
-	MOVQ_R64_XMM	%xmm0, %rax
-	mov	%rax, (%r10)
+	add	$8, %r10
+	sub	$8, %r11
 	psrldq	$8, %xmm0
+	cmp	$0, %r11
+	je	_return_T_done_encrypt
+_T_4_encrypt:
 	movd	%xmm0, %eax
-	mov	%eax, 8(%r10)
+	mov	%eax, (%r10)
+	add	$4, %r10
+	sub	$4, %r11
+	psrldq	$4, %xmm0
+	cmp	$0, %r11
+	je	_return_T_done_encrypt
+_T_123_encrypt:
+	movd	%xmm0, %eax
+	cmp	$2, %r11
+	jl	_T_1_encrypt
+	mov	%ax, (%r10)
+	cmp	$2, %r11
+	je	_return_T_done_encrypt
+	add	$2, %r10
+	sar	$16, %eax
+_T_1_encrypt:
+	mov	%al, (%r10)
 	jmp	_return_T_done_encrypt
 _T_16_encrypt:
 	movdqu	%xmm0, (%r10)
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index d664382..faecb15 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -155,6 +155,30 @@
 ALL_F:           .octa     0xffffffffffffffffffffffffffffffff
                  .octa     0x00000000000000000000000000000000
 
+.section .rodata
+.align 16
+.type aad_shift_arr, @object
+.size aad_shift_arr, 272
+aad_shift_arr:
+        .octa     0xffffffffffffffffffffffffffffffff
+        .octa     0xffffffffffffffffffffffffffffff0C
+        .octa     0xffffffffffffffffffffffffffff0D0C
+        .octa     0xffffffffffffffffffffffffff0E0D0C
+        .octa     0xffffffffffffffffffffffff0F0E0D0C
+        .octa     0xffffffffffffffffffffff0C0B0A0908
+        .octa     0xffffffffffffffffffff0D0C0B0A0908
+        .octa     0xffffffffffffffffff0E0D0C0B0A0908
+        .octa     0xffffffffffffffff0F0E0D0C0B0A0908
+        .octa     0xffffffffffffff0C0B0A090807060504
+        .octa     0xffffffffffff0D0C0B0A090807060504
+        .octa     0xffffffffff0E0D0C0B0A090807060504
+        .octa     0xffffffff0F0E0D0C0B0A090807060504
+        .octa     0xffffff0C0B0A09080706050403020100
+        .octa     0xffff0D0C0B0A09080706050403020100
+        .octa     0xff0E0D0C0B0A09080706050403020100
+        .octa     0x0F0E0D0C0B0A09080706050403020100
+
+
 .text
 
 
@@ -372,41 +396,72 @@
 
 .macro INITIAL_BLOCKS_AVX num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC
 	i = (8-\num_initial_blocks)
+	j = 0
 	setreg
 
-        mov     arg6, %r10                      # r10 = AAD
-        mov     arg7, %r12                      # r12 = aadLen
+	mov     arg6, %r10                      # r10 = AAD
+	mov     arg7, %r12                      # r12 = aadLen
 
 
-        mov     %r12, %r11
+	mov     %r12, %r11
 
-        vpxor   reg_i, reg_i, reg_i
-_get_AAD_loop\@:
-        vmovd   (%r10), \T1
-        vpslldq $12, \T1, \T1
-        vpsrldq $4, reg_i, reg_i
-        vpxor   \T1, reg_i, reg_i
+	vpxor   reg_j, reg_j, reg_j
+	vpxor   reg_i, reg_i, reg_i
+	cmp     $16, %r11
+	jl      _get_AAD_rest8\@
+_get_AAD_blocks\@:
+	vmovdqu (%r10), reg_i
+	vpshufb SHUF_MASK(%rip), reg_i, reg_i
+	vpxor   reg_i, reg_j, reg_j
+	GHASH_MUL_AVX       reg_j, \T2, \T1, \T3, \T4, \T5, \T6
+	add     $16, %r10
+	sub     $16, %r12
+	sub     $16, %r11
+	cmp     $16, %r11
+	jge     _get_AAD_blocks\@
+	vmovdqu reg_j, reg_i
+	cmp     $0, %r11
+	je      _get_AAD_done\@
 
-        add     $4, %r10
-        sub     $4, %r12
-        jg      _get_AAD_loop\@
+	vpxor   reg_i, reg_i, reg_i
 
+	/* read the last <16B of AAD. since we have at least 4B of
+	data right after the AAD (the ICV, and maybe some CT), we can
+	read 4B/8B blocks safely, and then get rid of the extra stuff */
+_get_AAD_rest8\@:
+	cmp     $4, %r11
+	jle     _get_AAD_rest4\@
+	movq    (%r10), \T1
+	add     $8, %r10
+	sub     $8, %r11
+	vpslldq $8, \T1, \T1
+	vpsrldq $8, reg_i, reg_i
+	vpxor   \T1, reg_i, reg_i
+	jmp     _get_AAD_rest8\@
+_get_AAD_rest4\@:
+	cmp     $0, %r11
+	jle      _get_AAD_rest0\@
+	mov     (%r10), %eax
+	movq    %rax, \T1
+	add     $4, %r10
+	sub     $4, %r11
+	vpslldq $12, \T1, \T1
+	vpsrldq $4, reg_i, reg_i
+	vpxor   \T1, reg_i, reg_i
+_get_AAD_rest0\@:
+	/* finalize: shift out the extra bytes we read, and align
+	left. since pslldq can only shift by an immediate, we use
+	vpshufb and an array of shuffle masks */
+	movq    %r12, %r11
+	salq    $4, %r11
+	movdqu  aad_shift_arr(%r11), \T1
+	vpshufb \T1, reg_i, reg_i
+_get_AAD_rest_final\@:
+	vpshufb SHUF_MASK(%rip), reg_i, reg_i
+	vpxor   reg_j, reg_i, reg_i
+	GHASH_MUL_AVX       reg_i, \T2, \T1, \T3, \T4, \T5, \T6
 
-        cmp     $16, %r11
-        je      _get_AAD_loop2_done\@
-        mov     $16, %r12
-
-_get_AAD_loop2\@:
-        vpsrldq $4, reg_i, reg_i
-        sub     $4, %r12
-        cmp     %r11, %r12
-        jg      _get_AAD_loop2\@
-
-_get_AAD_loop2_done\@:
-
-        #byte-reflect the AAD data
-        vpshufb SHUF_MASK(%rip), reg_i, reg_i
-
+_get_AAD_done\@:
 	# initialize the data pointer offset as zero
 	xor     %r11, %r11
 
@@ -480,7 +535,6 @@
 	i = (8-\num_initial_blocks)
 	j = (9-\num_initial_blocks)
 	setreg
-        GHASH_MUL_AVX       reg_i, \T2, \T1, \T3, \T4, \T5, \T6
 
 .rep \num_initial_blocks
         vpxor    reg_i, reg_j, reg_j
@@ -1427,19 +1481,36 @@
         cmp     $16, %r11
         je      _T_16\@
 
-        cmp     $12, %r11
-        je      _T_12\@
+        cmp     $8, %r11
+        jl      _T_4\@
 
 _T_8\@:
         vmovq   %xmm9, %rax
         mov     %rax, (%r10)
-        jmp     _return_T_done\@
-_T_12\@:
-        vmovq   %xmm9, %rax
-        mov     %rax, (%r10)
+        add     $8, %r10
+        sub     $8, %r11
         vpsrldq $8, %xmm9, %xmm9
+        cmp     $0, %r11
+        je     _return_T_done\@
+_T_4\@:
         vmovd   %xmm9, %eax
-        mov     %eax, 8(%r10)
+        mov     %eax, (%r10)
+        add     $4, %r10
+        sub     $4, %r11
+        vpsrldq     $4, %xmm9, %xmm9
+        cmp     $0, %r11
+        je     _return_T_done\@
+_T_123\@:
+        vmovd     %xmm9, %eax
+        cmp     $2, %r11
+        jl     _T_1\@
+        mov     %ax, (%r10)
+        cmp     $2, %r11
+        je     _return_T_done\@
+        add     $2, %r10
+        sar     $16, %eax
+_T_1\@:
+        mov     %al, (%r10)
         jmp     _return_T_done\@
 
 _T_16\@:
@@ -1631,41 +1702,73 @@
 
 .macro INITIAL_BLOCKS_AVX2 num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER
 	i = (8-\num_initial_blocks)
+	j = 0
 	setreg
 
-        mov     arg6, %r10                       # r10 = AAD
-        mov     arg7, %r12                       # r12 = aadLen
+	mov     arg6, %r10                       # r10 = AAD
+	mov     arg7, %r12                       # r12 = aadLen
 
 
-        mov     %r12, %r11
+	mov     %r12, %r11
 
-        vpxor   reg_i, reg_i, reg_i
-_get_AAD_loop\@:
-        vmovd   (%r10), \T1
-        vpslldq $12, \T1, \T1
-        vpsrldq $4, reg_i, reg_i
-        vpxor   \T1, reg_i, reg_i
+	vpxor   reg_j, reg_j, reg_j
+	vpxor   reg_i, reg_i, reg_i
 
-        add     $4, %r10
-        sub     $4, %r12
-        jg      _get_AAD_loop\@
+	cmp     $16, %r11
+	jl      _get_AAD_rest8\@
+_get_AAD_blocks\@:
+	vmovdqu (%r10), reg_i
+	vpshufb SHUF_MASK(%rip), reg_i, reg_i
+	vpxor   reg_i, reg_j, reg_j
+	GHASH_MUL_AVX2      reg_j, \T2, \T1, \T3, \T4, \T5, \T6
+	add     $16, %r10
+	sub     $16, %r12
+	sub     $16, %r11
+	cmp     $16, %r11
+	jge     _get_AAD_blocks\@
+	vmovdqu reg_j, reg_i
+	cmp     $0, %r11
+	je      _get_AAD_done\@
 
+	vpxor   reg_i, reg_i, reg_i
 
-        cmp     $16, %r11
-        je      _get_AAD_loop2_done\@
-        mov     $16, %r12
+	/* read the last <16B of AAD. since we have at least 4B of
+	data right after the AAD (the ICV, and maybe some CT), we can
+	read 4B/8B blocks safely, and then get rid of the extra stuff */
+_get_AAD_rest8\@:
+	cmp     $4, %r11
+	jle     _get_AAD_rest4\@
+	movq    (%r10), \T1
+	add     $8, %r10
+	sub     $8, %r11
+	vpslldq $8, \T1, \T1
+	vpsrldq $8, reg_i, reg_i
+	vpxor   \T1, reg_i, reg_i
+	jmp     _get_AAD_rest8\@
+_get_AAD_rest4\@:
+	cmp     $0, %r11
+	jle     _get_AAD_rest0\@
+	mov     (%r10), %eax
+	movq    %rax, \T1
+	add     $4, %r10
+	sub     $4, %r11
+	vpslldq $12, \T1, \T1
+	vpsrldq $4, reg_i, reg_i
+	vpxor   \T1, reg_i, reg_i
+_get_AAD_rest0\@:
+	/* finalize: shift out the extra bytes we read, and align
+	left. since pslldq can only shift by an immediate, we use
+	vpshufb and an array of shuffle masks */
+	movq    %r12, %r11
+	salq    $4, %r11
+	movdqu  aad_shift_arr(%r11), \T1
+	vpshufb \T1, reg_i, reg_i
+_get_AAD_rest_final\@:
+	vpshufb SHUF_MASK(%rip), reg_i, reg_i
+	vpxor   reg_j, reg_i, reg_i
+	GHASH_MUL_AVX2      reg_i, \T2, \T1, \T3, \T4, \T5, \T6
 
-_get_AAD_loop2\@:
-        vpsrldq $4, reg_i, reg_i
-        sub     $4, %r12
-        cmp     %r11, %r12
-        jg      _get_AAD_loop2\@
-
-_get_AAD_loop2_done\@:
-
-        #byte-reflect the AAD data
-        vpshufb SHUF_MASK(%rip), reg_i, reg_i
-
+_get_AAD_done\@:
 	# initialize the data pointer offset as zero
 	xor     %r11, %r11
 
@@ -1740,7 +1843,6 @@
 	i = (8-\num_initial_blocks)
 	j = (9-\num_initial_blocks)
 	setreg
-        GHASH_MUL_AVX2       reg_i, \T2, \T1, \T3, \T4, \T5, \T6
 
 .rep \num_initial_blocks
         vpxor    reg_i, reg_j, reg_j
@@ -2702,19 +2804,36 @@
         cmp     $16, %r11
         je      _T_16\@
 
-        cmp     $12, %r11
-        je      _T_12\@
+        cmp     $8, %r11
+        jl      _T_4\@
 
 _T_8\@:
         vmovq   %xmm9, %rax
         mov     %rax, (%r10)
-        jmp     _return_T_done\@
-_T_12\@:
-        vmovq   %xmm9, %rax
-        mov     %rax, (%r10)
+        add     $8, %r10
+        sub     $8, %r11
         vpsrldq $8, %xmm9, %xmm9
+        cmp     $0, %r11
+        je     _return_T_done\@
+_T_4\@:
         vmovd   %xmm9, %eax
-        mov     %eax, 8(%r10)
+        mov     %eax, (%r10)
+        add     $4, %r10
+        sub     $4, %r11
+        vpsrldq     $4, %xmm9, %xmm9
+        cmp     $0, %r11
+        je     _return_T_done\@
+_T_123\@:
+        vmovd     %xmm9, %eax
+        cmp     $2, %r11
+        jl     _T_1\@
+        mov     %ax, (%r10)
+        cmp     $2, %r11
+        je     _return_T_done\@
+        add     $2, %r10
+        sar     $16, %eax
+_T_1\@:
+        mov     %al, (%r10)
         jmp     _return_T_done\@
 
 _T_16\@:
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 93de8ea..4a55cdc 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -61,6 +61,11 @@ struct aesni_rfc4106_gcm_ctx {
 	u8 nonce[4];
 };
 
+struct generic_gcmaes_ctx {
+	u8 hash_subkey[16] AESNI_ALIGN_ATTR;
+	struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR;
+};
+
 struct aesni_xts_ctx {
 	u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
 	u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
@@ -102,13 +107,11 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
  * u8 *out, Ciphertext output. Encrypt in-place is allowed.
  * const u8 *in, Plaintext input
  * unsigned long plaintext_len, Length of data in bytes for encryption.
- * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association)
- *         concatenated with 8 byte Initialisation Vector (from IPSec ESP
- *         Payload) concatenated with 0x00000001. 16-byte aligned pointer.
+ * u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001.
+ *         16-byte aligned pointer.
  * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
  * const u8 *aad, Additional Authentication Data (AAD)
- * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this
- *          is going to be 8 or 12 bytes
+ * unsigned long aad_len, Length of AAD in bytes.
  * u8 *auth_tag, Authenticated Tag output.
  * unsigned long auth_tag_len), Authenticated Tag Length in bytes.
  *          Valid values are 16 (most likely), 12 or 8.
@@ -123,9 +126,8 @@ asmlinkage void aesni_gcm_enc(void *ctx, u8 *out,
  * u8 *out, Plaintext output. Decrypt in-place is allowed.
  * const u8 *in, Ciphertext input
  * unsigned long ciphertext_len, Length of data in bytes for decryption.
- * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association)
- *         concatenated with 8 byte Initialisation Vector (from IPSec ESP
- *         Payload) concatenated with 0x00000001. 16-byte aligned pointer.
+ * u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001.
+ *         16-byte aligned pointer.
  * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
  * const u8 *aad, Additional Authentication Data (AAD)
  * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going
@@ -275,6 +277,16 @@ aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
 		align = 1;
 	return PTR_ALIGN(crypto_aead_ctx(tfm), align);
 }
+
+static inline struct
+generic_gcmaes_ctx *generic_gcmaes_ctx_get(struct crypto_aead *tfm)
+{
+	unsigned long align = AESNI_ALIGN;
+
+	if (align <= crypto_tfm_ctx_alignment())
+		align = 1;
+	return PTR_ALIGN(crypto_aead_ctx(tfm), align);
+}
 #endif
 
 static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
@@ -712,32 +724,34 @@ static int rfc4106_set_authsize(struct crypto_aead *parent,
 	return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
 }
 
-static int helper_rfc4106_encrypt(struct aead_request *req)
+static int generic_gcmaes_set_authsize(struct crypto_aead *tfm,
+				       unsigned int authsize)
+{
+	switch (authsize) {
+	case 4:
+	case 8:
+	case 12:
+	case 13:
+	case 14:
+	case 15:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
+			  u8 *hash_subkey, u8 *iv, void *aes_ctx)
 {
 	u8 one_entry_in_sg = 0;
 	u8 *src, *dst, *assoc;
-	__be32 counter = cpu_to_be32(1);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
-	void *aes_ctx = &(ctx->aes_key_expanded);
 	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
-	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
 	struct scatter_walk src_sg_walk;
 	struct scatter_walk dst_sg_walk = {};
-	unsigned int i;
-
-	/* Assuming we are supporting rfc4106 64-bit extended */
-	/* sequence numbers We need to have the AAD length equal */
-	/* to 16 or 20 bytes */
-	if (unlikely(req->assoclen != 16 && req->assoclen != 20))
-		return -EINVAL;
-
-	/* IV below built */
-	for (i = 0; i < 4; i++)
-		*(iv+i) = ctx->nonce[i];
-	for (i = 0; i < 8; i++)
-		*(iv+4+i) = req->iv[i];
-	*((__be32 *)(iv+12)) = counter;
 
 	if (sg_is_last(req->src) &&
 	    (!PageHighMem(sg_page(req->src)) ||
@@ -768,7 +782,7 @@ static int helper_rfc4106_encrypt(struct aead_request *req)
 
 	kernel_fpu_begin();
 	aesni_gcm_enc_tfm(aes_ctx, dst, src, req->cryptlen, iv,
-			  ctx->hash_subkey, assoc, req->assoclen - 8,
+			  hash_subkey, assoc, assoclen,
 			  dst + req->cryptlen, auth_tag_len);
 	kernel_fpu_end();
 
@@ -791,37 +805,20 @@ static int helper_rfc4106_encrypt(struct aead_request *req)
 	return 0;
 }
 
-static int helper_rfc4106_decrypt(struct aead_request *req)
+static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
+			  u8 *hash_subkey, u8 *iv, void *aes_ctx)
 {
 	u8 one_entry_in_sg = 0;
 	u8 *src, *dst, *assoc;
 	unsigned long tempCipherLen = 0;
-	__be32 counter = cpu_to_be32(1);
-	int retval = 0;
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
-	void *aes_ctx = &(ctx->aes_key_expanded);
 	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
-	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
 	u8 authTag[16];
 	struct scatter_walk src_sg_walk;
 	struct scatter_walk dst_sg_walk = {};
-	unsigned int i;
-
-	if (unlikely(req->assoclen != 16 && req->assoclen != 20))
-		return -EINVAL;
-
-	/* Assuming we are supporting rfc4106 64-bit extended */
-	/* sequence numbers We need to have the AAD length */
-	/* equal to 16 or 20 bytes */
+	int retval = 0;
 
 	tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len);
-	/* IV below built */
-	for (i = 0; i < 4; i++)
-		*(iv+i) = ctx->nonce[i];
-	for (i = 0; i < 8; i++)
-		*(iv+4+i) = req->iv[i];
-	*((__be32 *)(iv+12)) = counter;
 
 	if (sg_is_last(req->src) &&
 	    (!PageHighMem(sg_page(req->src)) ||
@@ -838,7 +835,6 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
 			scatterwalk_start(&dst_sg_walk, req->dst);
 			dst = scatterwalk_map(&dst_sg_walk) + req->assoclen;
 		}
-
 	} else {
 		/* Allocate memory for src, dst, assoc */
 		assoc = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC);
@@ -850,9 +846,10 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
 		dst = src;
 	}
 
+
 	kernel_fpu_begin();
 	aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv,
-			  ctx->hash_subkey, assoc, req->assoclen - 8,
+			  hash_subkey, assoc, assoclen,
 			  authTag, auth_tag_len);
 	kernel_fpu_end();
 
@@ -875,6 +872,60 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
 		kfree(assoc);
 	}
 	return retval;
+
+}
+
+static int helper_rfc4106_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+	void *aes_ctx = &(ctx->aes_key_expanded);
+	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+	unsigned int i;
+	__be32 counter = cpu_to_be32(1);
+
+	/* Assuming we are supporting rfc4106 64-bit extended */
+	/* sequence numbers We need to have the AAD length equal */
+	/* to 16 or 20 bytes */
+	if (unlikely(req->assoclen != 16 && req->assoclen != 20))
+		return -EINVAL;
+
+	/* IV below built */
+	for (i = 0; i < 4; i++)
+		*(iv+i) = ctx->nonce[i];
+	for (i = 0; i < 8; i++)
+		*(iv+4+i) = req->iv[i];
+	*((__be32 *)(iv+12)) = counter;
+
+	return gcmaes_encrypt(req, req->assoclen - 8, ctx->hash_subkey, iv,
+			      aes_ctx);
+}
+
+static int helper_rfc4106_decrypt(struct aead_request *req)
+{
+	__be32 counter = cpu_to_be32(1);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+	void *aes_ctx = &(ctx->aes_key_expanded);
+	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+	unsigned int i;
+
+	if (unlikely(req->assoclen != 16 && req->assoclen != 20))
+		return -EINVAL;
+
+	/* Assuming we are supporting rfc4106 64-bit extended */
+	/* sequence numbers We need to have the AAD length */
+	/* equal to 16 or 20 bytes */
+
+	/* IV below built */
+	for (i = 0; i < 4; i++)
+		*(iv+i) = ctx->nonce[i];
+	for (i = 0; i < 8; i++)
+		*(iv+4+i) = req->iv[i];
+	*((__be32 *)(iv+12)) = counter;
+
+	return gcmaes_decrypt(req, req->assoclen - 8, ctx->hash_subkey, iv,
+			      aes_ctx);
 }
 
 static int rfc4106_encrypt(struct aead_request *req)
@@ -1035,6 +1086,46 @@ struct {
 };
 
 #ifdef CONFIG_X86_64
+static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
+				  unsigned int key_len)
+{
+	struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(aead);
+
+	return aes_set_key_common(crypto_aead_tfm(aead),
+				  &ctx->aes_key_expanded, key, key_len) ?:
+	       rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
+}
+
+static int generic_gcmaes_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
+	void *aes_ctx = &(ctx->aes_key_expanded);
+	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+	__be32 counter = cpu_to_be32(1);
+
+	memcpy(iv, req->iv, 12);
+	*((__be32 *)(iv+12)) = counter;
+
+	return gcmaes_encrypt(req, req->assoclen, ctx->hash_subkey, iv,
+			      aes_ctx);
+}
+
+static int generic_gcmaes_decrypt(struct aead_request *req)
+{
+	__be32 counter = cpu_to_be32(1);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+	void *aes_ctx = &(ctx->aes_key_expanded);
+	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+
+	memcpy(iv, req->iv, 12);
+	*((__be32 *)(iv+12)) = counter;
+
+	return gcmaes_decrypt(req, req->assoclen, ctx->hash_subkey, iv,
+			      aes_ctx);
+}
+
 static struct aead_alg aesni_aead_algs[] = { {
 	.setkey			= common_rfc4106_set_key,
 	.setauthsize		= common_rfc4106_set_authsize,
@@ -1069,6 +1160,23 @@ static struct aead_alg aesni_aead_algs[] = { {
 		.cra_ctxsize		= sizeof(struct cryptd_aead *),
 		.cra_module		= THIS_MODULE,
 	},
+}, {
+	.setkey			= generic_gcmaes_set_key,
+	.setauthsize		= generic_gcmaes_set_authsize,
+	.encrypt		= generic_gcmaes_encrypt,
+	.decrypt		= generic_gcmaes_decrypt,
+	.ivsize			= 12,
+	.maxauthsize		= 16,
+	.base = {
+		.cra_name		= "gcm(aes)",
+		.cra_driver_name	= "generic-gcm-aesni",
+		.cra_priority		= 400,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct generic_gcmaes_ctx),
+		.cra_alignmask		= AESNI_ALIGN - 1,
+		.cra_module		= THIS_MODULE,
+	},
 } };
 #else
 static struct aead_alg aesni_aead_algs[0];
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 24ac9fa..d61e579 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -176,9 +176,6 @@ __glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
 				src -= 1;
 				dst -= 1;
 			} while (nbytes >= func_bytes);
-
-			if (nbytes < bsize)
-				goto done;
 		}
 	}
 
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c
index 2dd3674..458409b 100644
--- a/arch/x86/crypto/sha512-mb/sha512_mb.c
+++ b/arch/x86/crypto/sha512-mb/sha512_mb.c
@@ -269,19 +269,19 @@ static struct sha512_hash_ctx
 		 * LAST
 		 */
 		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
-		return ctx;
+		goto unlock;
 	}
 
 	if (ctx->status & HASH_CTX_STS_PROCESSING) {
 		/* Cannot submit to a currently processing job. */
 		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
-		return ctx;
+		goto unlock;
 	}
 
 	if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
 		/* Cannot update a finished job. */
 		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
-		return ctx;
+		goto unlock;
 	}
 
 
@@ -363,6 +363,7 @@ static struct sha512_hash_ctx
 	}
 
 	ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
+unlock:
 	spin_unlock_irqrestore(&cstate->work_lock, irqflags);
 	return ctx;
 }
diff --git a/crypto/Kconfig b/crypto/Kconfig
index aac4bc9..caa770e 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -130,6 +130,7 @@
 config CRYPTO_ECDH
 	tristate "ECDH algorithm"
 	select CRYTPO_KPP
+	select CRYPTO_RNG_DEFAULT
 	help
 	  Generic implementation of the ECDH algorithm
 
diff --git a/crypto/Makefile b/crypto/Makefile
index 8a44057..d41f033 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -33,10 +33,6 @@
 dh_generic-y := dh.o
 dh_generic-y += dh_helper.o
 obj-$(CONFIG_CRYPTO_DH) += dh_generic.o
-ecdh_generic-y := ecc.o
-ecdh_generic-y += ecdh.o
-ecdh_generic-y += ecdh_helper.o
-obj-$(CONFIG_CRYPTO_ECDH) += ecdh_generic.o
 
 $(obj)/rsapubkey-asn1.o: $(obj)/rsapubkey-asn1.c $(obj)/rsapubkey-asn1.h
 $(obj)/rsaprivkey-asn1.o: $(obj)/rsaprivkey-asn1.c $(obj)/rsaprivkey-asn1.h
@@ -138,6 +134,11 @@
 obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
 obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
 
+ecdh_generic-y := ecc.o
+ecdh_generic-y += ecdh.o
+ecdh_generic-y += ecdh_helper.o
+obj-$(CONFIG_CRYPTO_ECDH) += ecdh_generic.o
+
 #
 # generic algorithms and the async_tx api
 #
diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c
index 92644fd..03023b2 100644
--- a/crypto/aes_ti.c
+++ b/crypto/aes_ti.c
@@ -114,7 +114,7 @@ static u32 mix_columns(u32 x)
 	 * | 0x2 0x3 0x1 0x1 |   | x[0] |
 	 * | 0x1 0x2 0x3 0x1 |   | x[1] |
 	 * | 0x1 0x1 0x2 0x3 | x | x[2] |
-	 * | 0x3 0x1 0x1 0x3 |   | x[3] |
+	 * | 0x3 0x1 0x1 0x2 |   | x[3] |
 	 */
 	u32 y = mul_by_x(x) ^ ror32(x, 16);
 
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 9eed4ef..e4cc761 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -260,7 +260,7 @@ void crypto_alg_tested(const char *name, int err)
 			goto found;
 	}
 
-	printk(KERN_ERR "alg: Unexpected test result for %s: %d\n", name, err);
+	pr_err("alg: Unexpected test result for %s: %d\n", name, err);
 	goto unlock;
 
 found:
diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index 727bd5c..61e7c4e 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -70,7 +70,7 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 
 		if (engine->unprepare_crypt_hardware &&
 		    engine->unprepare_crypt_hardware(engine))
-			pr_err("failed to unprepare crypt hardware\n");
+			dev_err(engine->dev, "failed to unprepare crypt hardware\n");
 
 		spin_lock_irqsave(&engine->queue_lock, flags);
 		engine->idling = false;
@@ -99,7 +99,7 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 	if (!was_busy && engine->prepare_crypt_hardware) {
 		ret = engine->prepare_crypt_hardware(engine);
 		if (ret) {
-			pr_err("failed to prepare crypt hardware\n");
+			dev_err(engine->dev, "failed to prepare crypt hardware\n");
 			goto req_err;
 		}
 	}
@@ -110,14 +110,15 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 		if (engine->prepare_hash_request) {
 			ret = engine->prepare_hash_request(engine, hreq);
 			if (ret) {
-				pr_err("failed to prepare request: %d\n", ret);
+				dev_err(engine->dev, "failed to prepare request: %d\n",
+					ret);
 				goto req_err;
 			}
 			engine->cur_req_prepared = true;
 		}
 		ret = engine->hash_one_request(engine, hreq);
 		if (ret) {
-			pr_err("failed to hash one request from queue\n");
+			dev_err(engine->dev, "failed to hash one request from queue\n");
 			goto req_err;
 		}
 		return;
@@ -126,19 +127,20 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 		if (engine->prepare_cipher_request) {
 			ret = engine->prepare_cipher_request(engine, breq);
 			if (ret) {
-				pr_err("failed to prepare request: %d\n", ret);
+				dev_err(engine->dev, "failed to prepare request: %d\n",
+					ret);
 				goto req_err;
 			}
 			engine->cur_req_prepared = true;
 		}
 		ret = engine->cipher_one_request(engine, breq);
 		if (ret) {
-			pr_err("failed to cipher one request from queue\n");
+			dev_err(engine->dev, "failed to cipher one request from queue\n");
 			goto req_err;
 		}
 		return;
 	default:
-		pr_err("failed to prepare request of unknown type\n");
+		dev_err(engine->dev, "failed to prepare request of unknown type\n");
 		return;
 	}
 
@@ -275,7 +277,7 @@ void crypto_finalize_cipher_request(struct crypto_engine *engine,
 		    engine->unprepare_cipher_request) {
 			ret = engine->unprepare_cipher_request(engine, req);
 			if (ret)
-				pr_err("failed to unprepare request\n");
+				dev_err(engine->dev, "failed to unprepare request\n");
 		}
 		spin_lock_irqsave(&engine->queue_lock, flags);
 		engine->cur_req = NULL;
@@ -312,7 +314,7 @@ void crypto_finalize_hash_request(struct crypto_engine *engine,
 		    engine->unprepare_hash_request) {
 			ret = engine->unprepare_hash_request(engine, req);
 			if (ret)
-				pr_err("failed to unprepare request\n");
+				dev_err(engine->dev, "failed to unprepare request\n");
 		}
 		spin_lock_irqsave(&engine->queue_lock, flags);
 		engine->cur_req = NULL;
@@ -384,7 +386,7 @@ int crypto_engine_stop(struct crypto_engine *engine)
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
 
 	if (ret)
-		pr_warn("could not stop engine\n");
+		dev_warn(engine->dev, "could not stop engine\n");
 
 	return ret;
 }
@@ -411,6 +413,7 @@ struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt)
 	if (!engine)
 		return NULL;
 
+	engine->dev = dev;
 	engine->rt = rt;
 	engine->running = false;
 	engine->busy = false;
diff --git a/crypto/dh.c b/crypto/dh.c
index 87e3542..b1032a5 100644
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -4,9 +4,9 @@
  * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
  *
  * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
+ * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
+ * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -85,6 +85,9 @@ static int dh_set_secret(struct crypto_kpp *tfm, const void *buf,
 	struct dh_ctx *ctx = dh_get_ctx(tfm);
 	struct dh params;
 
+	/* Free the old MPI key if any */
+	dh_free_ctx(ctx);
+
 	if (crypto_dh_decode_key(buf, len, &params) < 0)
 		return -EINVAL;
 
@@ -144,7 +147,7 @@ static int dh_compute_value(struct kpp_request *req)
 	return ret;
 }
 
-static int dh_max_size(struct crypto_kpp *tfm)
+static unsigned int dh_max_size(struct crypto_kpp *tfm)
 {
 	struct dh_ctx *ctx = dh_get_ctx(tfm);
 
diff --git a/crypto/dh_helper.c b/crypto/dh_helper.c
index 02db76b..8ba8a3f 100644
--- a/crypto/dh_helper.c
+++ b/crypto/dh_helper.c
@@ -3,9 +3,9 @@
  * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
  *
  * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
+ * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
+ * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
 #include <linux/export.h>
diff --git a/crypto/drbg.c b/crypto/drbg.c
index cdb27ac..633a88e 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -1691,6 +1691,7 @@ static int drbg_init_sym_kernel(struct drbg_state *drbg)
 		return PTR_ERR(sk_tfm);
 	}
 	drbg->ctr_handle = sk_tfm;
+	init_completion(&drbg->ctr_completion);
 
 	req = skcipher_request_alloc(sk_tfm, GFP_KERNEL);
 	if (!req) {
diff --git a/crypto/ecc.c b/crypto/ecc.c
index 414c78a..633a9bc 100644
--- a/crypto/ecc.c
+++ b/crypto/ecc.c
@@ -29,6 +29,7 @@
 #include <linux/swab.h>
 #include <linux/fips.h>
 #include <crypto/ecdh.h>
+#include <crypto/rng.h>
 
 #include "ecc.h"
 #include "ecc_curve_defs.h"
@@ -904,7 +905,7 @@ static inline void ecc_swap_digits(const u64 *in, u64 *out,
 }
 
 int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
-		     const u8 *private_key, unsigned int private_key_len)
+		     const u64 *private_key, unsigned int private_key_len)
 {
 	int nbytes;
 	const struct ecc_curve *curve = ecc_get_curve(curve_id);
@@ -917,24 +918,77 @@ int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
 	if (private_key_len != nbytes)
 		return -EINVAL;
 
-	if (vli_is_zero((const u64 *)&private_key[0], ndigits))
+	if (vli_is_zero(private_key, ndigits))
 		return -EINVAL;
 
 	/* Make sure the private key is in the range [1, n-1]. */
-	if (vli_cmp(curve->n, (const u64 *)&private_key[0], ndigits) != 1)
+	if (vli_cmp(curve->n, private_key, ndigits) != 1)
 		return -EINVAL;
 
 	return 0;
 }
 
-int ecdh_make_pub_key(unsigned int curve_id, unsigned int ndigits,
-		      const u8 *private_key, unsigned int private_key_len,
-		      u8 *public_key, unsigned int public_key_len)
+/*
+ * ECC private keys are generated using the method of extra random bits,
+ * equivalent to that described in FIPS 186-4, Appendix B.4.1.
+ *
+ * d = (c mod(n–1)) + 1    where c is a string of random bits, 64 bits longer
+ *                         than requested
+ * 0 <= c mod(n-1) <= n-2  and implies that
+ * 1 <= d <= n-1
+ *
+ * This method generates a private key uniformly distributed in the range
+ * [1, n-1].
+ */
+int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey)
+{
+	const struct ecc_curve *curve = ecc_get_curve(curve_id);
+	u64 priv[ndigits];
+	unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+	unsigned int nbits = vli_num_bits(curve->n, ndigits);
+	int err;
+
+	/* Check that N is included in Table 1 of FIPS 186-4, section 6.1.1 */
+	if (nbits < 160)
+		return -EINVAL;
+
+	/*
+	 * FIPS 186-4 recommends that the private key should be obtained from a
+	 * RBG with a security strength equal to or greater than the security
+	 * strength associated with N.
+	 *
+	 * The maximum security strength identified by NIST SP800-57pt1r4 for
+	 * ECC is 256 (N >= 512).
+	 *
+	 * This condition is met by the default RNG because it selects a favored
+	 * DRBG with a security strength of 256.
+	 */
+	if (crypto_get_default_rng())
+		err = -EFAULT;
+
+	err = crypto_rng_get_bytes(crypto_default_rng, (u8 *)priv, nbytes);
+	crypto_put_default_rng();
+	if (err)
+		return err;
+
+	if (vli_is_zero(priv, ndigits))
+		return -EINVAL;
+
+	/* Make sure the private key is in the range [1, n-1]. */
+	if (vli_cmp(curve->n, priv, ndigits) != 1)
+		return -EINVAL;
+
+	ecc_swap_digits(priv, privkey, ndigits);
+
+	return 0;
+}
+
+int ecc_make_pub_key(unsigned int curve_id, unsigned int ndigits,
+		     const u64 *private_key, u64 *public_key)
 {
 	int ret = 0;
 	struct ecc_point *pk;
 	u64 priv[ndigits];
-	unsigned int nbytes;
 	const struct ecc_curve *curve = ecc_get_curve(curve_id);
 
 	if (!private_key || !curve) {
@@ -942,7 +996,7 @@ int ecdh_make_pub_key(unsigned int curve_id, unsigned int ndigits,
 		goto out;
 	}
 
-	ecc_swap_digits((const u64 *)private_key, priv, ndigits);
+	ecc_swap_digits(private_key, priv, ndigits);
 
 	pk = ecc_alloc_point(ndigits);
 	if (!pk) {
@@ -956,9 +1010,8 @@ int ecdh_make_pub_key(unsigned int curve_id, unsigned int ndigits,
 		goto err_free_point;
 	}
 
-	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
-	ecc_swap_digits(pk->x, (u64 *)public_key, ndigits);
-	ecc_swap_digits(pk->y, (u64 *)&public_key[nbytes], ndigits);
+	ecc_swap_digits(pk->x, public_key, ndigits);
+	ecc_swap_digits(pk->y, &public_key[ndigits], ndigits);
 
 err_free_point:
 	ecc_free_point(pk);
@@ -967,9 +1020,8 @@ int ecdh_make_pub_key(unsigned int curve_id, unsigned int ndigits,
 }
 
 int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
-		       const u8 *private_key, unsigned int private_key_len,
-		       const u8 *public_key, unsigned int public_key_len,
-		       u8 *secret, unsigned int secret_len)
+			      const u64 *private_key, const u64 *public_key,
+			      u64 *secret)
 {
 	int ret = 0;
 	struct ecc_point *product, *pk;
@@ -999,13 +1051,13 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 		goto err_alloc_product;
 	}
 
-	ecc_swap_digits((const u64 *)public_key, pk->x, ndigits);
-	ecc_swap_digits((const u64 *)&public_key[nbytes], pk->y, ndigits);
-	ecc_swap_digits((const u64 *)private_key, priv, ndigits);
+	ecc_swap_digits(public_key, pk->x, ndigits);
+	ecc_swap_digits(&public_key[ndigits], pk->y, ndigits);
+	ecc_swap_digits(private_key, priv, ndigits);
 
 	ecc_point_mult(product, pk, priv, rand_z, curve->p, ndigits);
 
-	ecc_swap_digits(product->x, (u64 *)secret, ndigits);
+	ecc_swap_digits(product->x, secret, ndigits);
 
 	if (ecc_point_is_zero(product))
 		ret = -EFAULT;
diff --git a/crypto/ecc.h b/crypto/ecc.h
index 663d598..e4fd449 100644
--- a/crypto/ecc.h
+++ b/crypto/ecc.h
@@ -34,41 +34,51 @@
  * ecc_is_key_valid() - Validate a given ECDH private key
  *
  * @curve_id:		id representing the curve to use
- * @ndigits:		curve number of digits
+ * @ndigits:		curve's number of digits
  * @private_key:	private key to be used for the given curve
- * @private_key_len:	private key len
+ * @private_key_len:	private key length
  *
  * Returns 0 if the key is acceptable, a negative value otherwise
  */
 int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
-		     const u8 *private_key, unsigned int private_key_len);
+		     const u64 *private_key, unsigned int private_key_len);
 
 /**
- * ecdh_make_pub_key() - Compute an ECC public key
+ * ecc_gen_privkey() -  Generates an ECC private key.
+ * The private key is a random integer in the range 0 < random < n, where n is a
+ * prime that is the order of the cyclic subgroup generated by the distinguished
+ * point G.
+ * @curve_id:		id representing the curve to use
+ * @ndigits:		curve number of digits
+ * @private_key:	buffer for storing the generated private key
+ *
+ * Returns 0 if the private key was generated successfully, a negative value
+ * if an error occurred.
+ */
+int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey);
+
+/**
+ * ecc_make_pub_key() - Compute an ECC public key
  *
  * @curve_id:		id representing the curve to use
+ * @ndigits:		curve's number of digits
  * @private_key:	pregenerated private key for the given curve
- * @private_key_len:	length of private_key
- * @public_key:		buffer for storing the public key generated
- * @public_key_len:	length of the public_key buffer
+ * @public_key:		buffer for storing the generated public key
  *
  * Returns 0 if the public key was generated successfully, a negative value
  * if an error occurred.
  */
-int ecdh_make_pub_key(const unsigned int curve_id, unsigned int ndigits,
-		      const u8 *private_key, unsigned int private_key_len,
-		      u8 *public_key, unsigned int public_key_len);
+int ecc_make_pub_key(const unsigned int curve_id, unsigned int ndigits,
+		     const u64 *private_key, u64 *public_key);
 
 /**
  * crypto_ecdh_shared_secret() - Compute a shared secret
  *
  * @curve_id:		id representing the curve to use
+ * @ndigits:		curve's number of digits
  * @private_key:	private key of part A
- * @private_key_len:	length of private_key
  * @public_key:		public key of counterpart B
- * @public_key_len:	length of public_key
  * @secret:		buffer for storing the calculated shared secret
- * @secret_len:		length of the secret buffer
  *
  * Note: It is recommended that you hash the result of crypto_ecdh_shared_secret
  * before using it for symmetric encryption or HMAC.
@@ -77,7 +87,6 @@ int ecdh_make_pub_key(const unsigned int curve_id, unsigned int ndigits,
  * if an error occurred.
  */
 int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
-		       const u8 *private_key, unsigned int private_key_len,
-		       const u8 *public_key, unsigned int public_key_len,
-		       u8 *secret, unsigned int secret_len);
+			      const u64 *private_key, const u64 *public_key,
+			      u64 *secret);
 #endif
diff --git a/crypto/ecdh.c b/crypto/ecdh.c
index 63ca337..61c7708 100644
--- a/crypto/ecdh.c
+++ b/crypto/ecdh.c
@@ -4,9 +4,9 @@
  * Authors: Salvator Benedetto <salvatore.benedetto@intel.com>
  *
  * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
+ * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
+ * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -55,8 +55,12 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
 	ctx->curve_id = params.curve_id;
 	ctx->ndigits = ndigits;
 
+	if (!params.key || !params.key_size)
+		return ecc_gen_privkey(ctx->curve_id, ctx->ndigits,
+				       ctx->private_key);
+
 	if (ecc_is_key_valid(ctx->curve_id, ctx->ndigits,
-			     (const u8 *)params.key, params.key_size) < 0)
+			     (const u64 *)params.key, params.key_size) < 0)
 		return -EINVAL;
 
 	memcpy(ctx->private_key, params.key, params.key_size);
@@ -81,16 +85,14 @@ static int ecdh_compute_value(struct kpp_request *req)
 			return -EINVAL;
 
 		ret = crypto_ecdh_shared_secret(ctx->curve_id, ctx->ndigits,
-					 (const u8 *)ctx->private_key, nbytes,
-					 (const u8 *)ctx->public_key, 2 * nbytes,
-					 (u8 *)ctx->shared_secret, nbytes);
+						ctx->private_key,
+						ctx->public_key,
+						ctx->shared_secret);
 
 		buf = ctx->shared_secret;
 	} else {
-		ret = ecdh_make_pub_key(ctx->curve_id, ctx->ndigits,
-					(const u8 *)ctx->private_key, nbytes,
-					(u8 *)ctx->public_key,
-					sizeof(ctx->public_key));
+		ret = ecc_make_pub_key(ctx->curve_id, ctx->ndigits,
+				       ctx->private_key, ctx->public_key);
 		buf = ctx->public_key;
 		/* Public part is a point thus it has both coordinates */
 		nbytes *= 2;
@@ -106,13 +108,12 @@ static int ecdh_compute_value(struct kpp_request *req)
 	return ret;
 }
 
-static int ecdh_max_size(struct crypto_kpp *tfm)
+static unsigned int ecdh_max_size(struct crypto_kpp *tfm)
 {
 	struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
-	int nbytes = ctx->ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
 
-	/* Public key is made of two coordinates */
-	return 2 * nbytes;
+	/* Public key is made of two coordinates, add one to the left shift */
+	return ctx->ndigits << (ECC_DIGITS_TO_BYTES_SHIFT + 1);
 }
 
 static void no_exit_tfm(struct crypto_kpp *tfm)
diff --git a/crypto/ecdh_helper.c b/crypto/ecdh_helper.c
index 3cd8a24..f05bea5 100644
--- a/crypto/ecdh_helper.c
+++ b/crypto/ecdh_helper.c
@@ -3,9 +3,9 @@
  * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
  *
  * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
+ * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
+ * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
 #include <linux/export.h>
diff --git a/crypto/hmac.c b/crypto/hmac.c
index 72e38c0..92871dc 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -16,6 +16,7 @@
  *
  */
 
+#include <crypto/hmac.h>
 #include <crypto/internal/hash.h>
 #include <crypto/scatterwalk.h>
 #include <linux/err.h>
@@ -74,8 +75,8 @@ static int hmac_setkey(struct crypto_shash *parent,
 	memcpy(opad, ipad, bs);
 
 	for (i = 0; i < bs; i++) {
-		ipad[i] ^= 0x36;
-		opad[i] ^= 0x5c;
+		ipad[i] ^= HMAC_IPAD_VALUE;
+		opad[i] ^= HMAC_OPAD_VALUE;
 	}
 
 	return crypto_shash_init(shash) ?:
diff --git a/crypto/rng.c b/crypto/rng.c
index f46dac5..5e84692 100644
--- a/crypto/rng.c
+++ b/crypto/rng.c
@@ -33,11 +33,6 @@ struct crypto_rng *crypto_default_rng;
 EXPORT_SYMBOL_GPL(crypto_default_rng);
 static int crypto_default_rng_refcnt;
 
-static inline struct crypto_rng *__crypto_rng_cast(struct crypto_tfm *tfm)
-{
-	return container_of(tfm, struct crypto_rng, base);
-}
-
 int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
 {
 	u8 *buf = NULL;
diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 8baab43..407c64b 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -120,9 +120,6 @@ static int pkcs1pad_set_pub_key(struct crypto_akcipher *tfm, const void *key,
 
 	/* Find out new modulus size from rsa implementation */
 	err = crypto_akcipher_maxsize(ctx->child);
-	if (err < 0)
-		return err;
-
 	if (err > PAGE_SIZE)
 		return -ENOTSUPP;
 
@@ -144,9 +141,6 @@ static int pkcs1pad_set_priv_key(struct crypto_akcipher *tfm, const void *key,
 
 	/* Find out new modulus size from rsa implementation */
 	err = crypto_akcipher_maxsize(ctx->child);
-	if (err < 0)
-		return err;
-
 	if (err > PAGE_SIZE)
 		return -ENOTSUPP;
 
@@ -154,7 +148,7 @@ static int pkcs1pad_set_priv_key(struct crypto_akcipher *tfm, const void *key,
 	return 0;
 }
 
-static int pkcs1pad_get_max_size(struct crypto_akcipher *tfm)
+static unsigned int pkcs1pad_get_max_size(struct crypto_akcipher *tfm)
 {
 	struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm);
 
@@ -164,7 +158,7 @@ static int pkcs1pad_get_max_size(struct crypto_akcipher *tfm)
 	 * decrypt/verify.
 	 */
 
-	return ctx->key_size ?: -EINVAL;
+	return ctx->key_size;
 }
 
 static void pkcs1pad_sg_set_buf(struct scatterlist *sg, void *buf, size_t len,
@@ -496,7 +490,7 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
 		goto done;
 	pos++;
 
-	if (memcmp(out_buf + pos, digest_info->data, digest_info->size))
+	if (crypto_memneq(out_buf + pos, digest_info->data, digest_info->size))
 		goto done;
 
 	pos += digest_info->size;
diff --git a/crypto/rsa.c b/crypto/rsa.c
index 4c280b6..b067f3a 100644
--- a/crypto/rsa.c
+++ b/crypto/rsa.c
@@ -337,11 +337,11 @@ static int rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
 	return -ENOMEM;
 }
 
-static int rsa_max_size(struct crypto_akcipher *tfm)
+static unsigned int rsa_max_size(struct crypto_akcipher *tfm)
 {
 	struct rsa_mpi_key *pkey = akcipher_tfm_ctx(tfm);
 
-	return pkey->n ? mpi_get_size(pkey->n) : -EINVAL;
+	return mpi_get_size(pkey->n);
 }
 
 static void rsa_exit_tfm(struct crypto_akcipher *tfm)
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 9a11f3c..0dd6a43 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -138,8 +138,6 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen)
 	int ret = 0;
 	int i;
 
-	local_irq_disable();
-
 	/* Warm-up run. */
 	for (i = 0; i < 4; i++) {
 		if (enc)
@@ -169,8 +167,6 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen)
 	}
 
 out:
-	local_irq_enable();
-
 	if (ret == 0)
 		printk("1 operation in %lu cycles (%d bytes)\n",
 		       (cycles + 4) / 8, blen);
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 6f5f3ed..7125ba3 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -218,14 +218,14 @@ static int ahash_partial_update(struct ahash_request **preq,
 			crypto_ahash_reqtfm(req));
 	state = kmalloc(statesize + sizeof(guard), GFP_KERNEL);
 	if (!state) {
-		pr_err("alt: hash: Failed to alloc state for %s\n", algo);
+		pr_err("alg: hash: Failed to alloc state for %s\n", algo);
 		goto out_nostate;
 	}
 	memcpy(state + statesize, guard, sizeof(guard));
 	ret = crypto_ahash_export(req, state);
 	WARN_ON(memcmp(state + statesize, guard, sizeof(guard)));
 	if (ret) {
-		pr_err("alt: hash: Failed to export() for %s\n", algo);
+		pr_err("alg: hash: Failed to export() for %s\n", algo);
 		goto out;
 	}
 	ahash_request_free(req);
@@ -344,19 +344,19 @@ static int __test_hash(struct crypto_ahash *tfm,
 		} else {
 			ret = wait_async_op(&tresult, crypto_ahash_init(req));
 			if (ret) {
-				pr_err("alt: hash: init failed on test %d "
+				pr_err("alg: hash: init failed on test %d "
 				       "for %s: ret=%d\n", j, algo, -ret);
 				goto out;
 			}
 			ret = wait_async_op(&tresult, crypto_ahash_update(req));
 			if (ret) {
-				pr_err("alt: hash: update failed on test %d "
+				pr_err("alg: hash: update failed on test %d "
 				       "for %s: ret=%d\n", j, algo, -ret);
 				goto out;
 			}
 			ret = wait_async_op(&tresult, crypto_ahash_final(req));
 			if (ret) {
-				pr_err("alt: hash: final failed on test %d "
+				pr_err("alg: hash: final failed on test %d "
 				       "for %s: ret=%d\n", j, algo, -ret);
 				goto out;
 			}
@@ -488,13 +488,13 @@ static int __test_hash(struct crypto_ahash *tfm,
 		ahash_request_set_crypt(req, sg, result, template[i].tap[0]);
 		ret = wait_async_op(&tresult, crypto_ahash_init(req));
 		if (ret) {
-			pr_err("alt: hash: init failed on test %d for %s: ret=%d\n",
+			pr_err("alg: hash: init failed on test %d for %s: ret=%d\n",
 				j, algo, -ret);
 			goto out;
 		}
 		ret = wait_async_op(&tresult, crypto_ahash_update(req));
 		if (ret) {
-			pr_err("alt: hash: update failed on test %d for %s: ret=%d\n",
+			pr_err("alg: hash: update failed on test %d for %s: ret=%d\n",
 				j, algo, -ret);
 			goto out;
 		}
@@ -505,7 +505,7 @@ static int __test_hash(struct crypto_ahash *tfm,
 				hash_buff, k, temp, &sg[0], algo, result,
 				&tresult);
 			if (ret) {
-				pr_err("hash: partial update failed on test %d for %s: ret=%d\n",
+				pr_err("alg: hash: partial update failed on test %d for %s: ret=%d\n",
 					j, algo, -ret);
 				goto out_noreq;
 			}
@@ -513,7 +513,7 @@ static int __test_hash(struct crypto_ahash *tfm,
 		}
 		ret = wait_async_op(&tresult, crypto_ahash_final(req));
 		if (ret) {
-			pr_err("alt: hash: final failed on test %d for %s: ret=%d\n",
+			pr_err("alg: hash: final failed on test %d for %s: ret=%d\n",
 				j, algo, -ret);
 			goto out;
 		}
@@ -1997,6 +1997,9 @@ static int do_test_kpp(struct crypto_kpp *tfm, const struct kpp_testvec *vec,
 	struct kpp_request *req;
 	void *input_buf = NULL;
 	void *output_buf = NULL;
+	void *a_public = NULL;
+	void *a_ss = NULL;
+	void *shared_secret = NULL;
 	struct tcrypt_result result;
 	unsigned int out_len_max;
 	int err = -ENOMEM;
@@ -2026,20 +2029,31 @@ static int do_test_kpp(struct crypto_kpp *tfm, const struct kpp_testvec *vec,
 	kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
 				 tcrypt_complete, &result);
 
-	/* Compute public key */
+	/* Compute party A's public key */
 	err = wait_async_op(&result, crypto_kpp_generate_public_key(req));
 	if (err) {
-		pr_err("alg: %s: generate public key test failed. err %d\n",
+		pr_err("alg: %s: Party A: generate public key test failed. err %d\n",
 		       alg, err);
 		goto free_output;
 	}
-	/* Verify calculated public key */
-	if (memcmp(vec->expected_a_public, sg_virt(req->dst),
-		   vec->expected_a_public_size)) {
-		pr_err("alg: %s: generate public key test failed. Invalid output\n",
-		       alg);
-		err = -EINVAL;
-		goto free_output;
+
+	if (vec->genkey) {
+		/* Save party A's public key */
+		a_public = kzalloc(out_len_max, GFP_KERNEL);
+		if (!a_public) {
+			err = -ENOMEM;
+			goto free_output;
+		}
+		memcpy(a_public, sg_virt(req->dst), out_len_max);
+	} else {
+		/* Verify calculated public key */
+		if (memcmp(vec->expected_a_public, sg_virt(req->dst),
+			   vec->expected_a_public_size)) {
+			pr_err("alg: %s: Party A: generate public key test failed. Invalid output\n",
+			       alg);
+			err = -EINVAL;
+			goto free_output;
+		}
 	}
 
 	/* Calculate shared secret key by using counter part (b) public key. */
@@ -2058,15 +2072,53 @@ static int do_test_kpp(struct crypto_kpp *tfm, const struct kpp_testvec *vec,
 				 tcrypt_complete, &result);
 	err = wait_async_op(&result, crypto_kpp_compute_shared_secret(req));
 	if (err) {
-		pr_err("alg: %s: compute shard secret test failed. err %d\n",
+		pr_err("alg: %s: Party A: compute shared secret test failed. err %d\n",
 		       alg, err);
 		goto free_all;
 	}
+
+	if (vec->genkey) {
+		/* Save the shared secret obtained by party A */
+		a_ss = kzalloc(vec->expected_ss_size, GFP_KERNEL);
+		if (!a_ss) {
+			err = -ENOMEM;
+			goto free_all;
+		}
+		memcpy(a_ss, sg_virt(req->dst), vec->expected_ss_size);
+
+		/*
+		 * Calculate party B's shared secret by using party A's
+		 * public key.
+		 */
+		err = crypto_kpp_set_secret(tfm, vec->b_secret,
+					    vec->b_secret_size);
+		if (err < 0)
+			goto free_all;
+
+		sg_init_one(&src, a_public, vec->expected_a_public_size);
+		sg_init_one(&dst, output_buf, out_len_max);
+		kpp_request_set_input(req, &src, vec->expected_a_public_size);
+		kpp_request_set_output(req, &dst, out_len_max);
+		kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+					 tcrypt_complete, &result);
+		err = wait_async_op(&result,
+				    crypto_kpp_compute_shared_secret(req));
+		if (err) {
+			pr_err("alg: %s: Party B: compute shared secret failed. err %d\n",
+			       alg, err);
+			goto free_all;
+		}
+
+		shared_secret = a_ss;
+	} else {
+		shared_secret = (void *)vec->expected_ss;
+	}
+
 	/*
 	 * verify shared secret from which the user will derive
 	 * secret key by executing whatever hash it has chosen
 	 */
-	if (memcmp(vec->expected_ss, sg_virt(req->dst),
+	if (memcmp(shared_secret, sg_virt(req->dst),
 		   vec->expected_ss_size)) {
 		pr_err("alg: %s: compute shared secret test failed. Invalid output\n",
 		       alg);
@@ -2074,8 +2126,10 @@ static int do_test_kpp(struct crypto_kpp *tfm, const struct kpp_testvec *vec,
 	}
 
 free_all:
+	kfree(a_ss);
 	kfree(input_buf);
 free_output:
+	kfree(a_public);
 	kfree(output_buf);
 free_req:
 	kpp_request_free(req);
@@ -2168,8 +2222,11 @@ static int test_akcipher_one(struct crypto_akcipher *tfm,
 	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
 				      tcrypt_complete, &result);
 
-	/* Run RSA encrypt - c = m^e mod n;*/
-	err = wait_async_op(&result, crypto_akcipher_encrypt(req));
+	err = wait_async_op(&result, vecs->siggen_sigver_test ?
+				     /* Run asymmetric signature generation */
+				     crypto_akcipher_sign(req) :
+				     /* Run asymmetric encrypt */
+				     crypto_akcipher_encrypt(req));
 	if (err) {
 		pr_err("alg: akcipher: encrypt test failed. err %d\n", err);
 		goto free_all;
@@ -2207,8 +2264,11 @@ static int test_akcipher_one(struct crypto_akcipher *tfm,
 	init_completion(&result.completion);
 	akcipher_request_set_crypt(req, &src, &dst, vecs->c_size, out_len_max);
 
-	/* Run RSA decrypt - m = c^d mod n;*/
-	err = wait_async_op(&result, crypto_akcipher_decrypt(req));
+	err = wait_async_op(&result, vecs->siggen_sigver_test ?
+				     /* Run asymmetric signature verification */
+				     crypto_akcipher_verify(req) :
+				     /* Run asymmetric decrypt */
+				     crypto_akcipher_decrypt(req));
 	if (err) {
 		pr_err("alg: akcipher: decrypt test failed. err %d\n", err);
 		goto free_all;
@@ -2306,6 +2366,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "authenc(hmac(sha1),cbc(aes))",
 		.test = alg_test_aead,
+		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = __VECS(hmac_sha1_aes_cbc_enc_tv_temp)
@@ -3255,6 +3316,25 @@ static const struct alg_test_desc alg_test_descs[] = {
 			}
 		}
 	}, {
+		.alg = "pkcs1pad(rsa,sha224)",
+		.test = alg_test_null,
+		.fips_allowed = 1,
+	}, {
+		.alg = "pkcs1pad(rsa,sha256)",
+		.test = alg_test_akcipher,
+		.fips_allowed = 1,
+		.suite = {
+			.akcipher = __VECS(pkcs1pad_rsa_tv_template)
+		}
+	}, {
+		.alg = "pkcs1pad(rsa,sha384)",
+		.test = alg_test_null,
+		.fips_allowed = 1,
+	}, {
+		.alg = "pkcs1pad(rsa,sha512)",
+		.test = alg_test_null,
+		.fips_allowed = 1,
+	}, {
 		.alg = "poly1305",
 		.test = alg_test_hash,
 		.suite = {
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 4293573..6ceb0e2 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -133,17 +133,21 @@ struct akcipher_testvec {
 	unsigned int m_size;
 	unsigned int c_size;
 	bool public_key_vec;
+	bool siggen_sigver_test;
 };
 
 struct kpp_testvec {
 	const unsigned char *secret;
+	const unsigned char *b_secret;
 	const unsigned char *b_public;
 	const unsigned char *expected_a_public;
 	const unsigned char *expected_ss;
 	unsigned short secret_size;
+	unsigned short b_secret_size;
 	unsigned short b_public_size;
 	unsigned short expected_a_public_size;
 	unsigned short expected_ss_size;
+	bool genkey;
 };
 
 static const char zeroed_string[48];
@@ -538,6 +542,101 @@ static const struct akcipher_testvec rsa_tv_template[] = {
 	}
 };
 
+/*
+ * PKCS#1 RSA test vectors. Obtained from CAVS testing.
+ */
+static const struct akcipher_testvec pkcs1pad_rsa_tv_template[] = {
+	{
+	.key =
+	"\x30\x82\x03\x1f\x02\x01\x10\x02\x82\x01\x01\x00\xd7\x1e\x77\x82"
+	"\x8c\x92\x31\xe7\x69\x02\xa2\xd5\x5c\x78\xde\xa2\x0c\x8f\xfe\x28"
+	"\x59\x31\xdf\x40\x9c\x60\x61\x06\xb9\x2f\x62\x40\x80\x76\xcb\x67"
+	"\x4a\xb5\x59\x56\x69\x17\x07\xfa\xf9\x4c\xbd\x6c\x37\x7a\x46\x7d"
+	"\x70\xa7\x67\x22\xb3\x4d\x7a\x94\xc3\xba\x4b\x7c\x4b\xa9\x32\x7c"
+	"\xb7\x38\x95\x45\x64\xa4\x05\xa8\x9f\x12\x7c\x4e\xc6\xc8\x2d\x40"
+	"\x06\x30\xf4\x60\xa6\x91\xbb\x9b\xca\x04\x79\x11\x13\x75\xf0\xae"
+	"\xd3\x51\x89\xc5\x74\xb9\xaa\x3f\xb6\x83\xe4\x78\x6b\xcd\xf9\x5c"
+	"\x4c\x85\xea\x52\x3b\x51\x93\xfc\x14\x6b\x33\x5d\x30\x70\xfa\x50"
+	"\x1b\x1b\x38\x81\x13\x8d\xf7\xa5\x0c\xc0\x8e\xf9\x63\x52\x18\x4e"
+	"\xa9\xf9\xf8\x5c\x5d\xcd\x7a\x0d\xd4\x8e\x7b\xee\x91\x7b\xad\x7d"
+	"\xb4\x92\xd5\xab\x16\x3b\x0a\x8a\xce\x8e\xde\x47\x1a\x17\x01\x86"
+	"\x7b\xab\x99\xf1\x4b\x0c\x3a\x0d\x82\x47\xc1\x91\x8c\xbb\x2e\x22"
+	"\x9e\x49\x63\x6e\x02\xc1\xc9\x3a\x9b\xa5\x22\x1b\x07\x95\xd6\x10"
+	"\x02\x50\xfd\xfd\xd1\x9b\xbe\xab\xc2\xc0\x74\xd7\xec\x00\xfb\x11"
+	"\x71\xcb\x7a\xdc\x81\x79\x9f\x86\x68\x46\x63\x82\x4d\xb7\xf1\xe6"
+	"\x16\x6f\x42\x63\xf4\x94\xa0\xca\x33\xcc\x75\x13\x02\x82\x01\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x01"
+	"\x02\x82\x01\x00\x62\xb5\x60\x31\x4f\x3f\x66\x16\xc1\x60\xac\x47"
+	"\x2a\xff\x6b\x69\x00\x4a\xb2\x5c\xe1\x50\xb9\x18\x74\xa8\xe4\xdc"
+	"\xa8\xec\xcd\x30\xbb\xc1\xc6\xe3\xc6\xac\x20\x2a\x3e\x5e\x8b\x12"
+	"\xe6\x82\x08\x09\x38\x0b\xab\x7c\xb3\xcc\x9c\xce\x97\x67\xdd\xef"
+	"\x95\x40\x4e\x92\xe2\x44\xe9\x1d\xc1\x14\xfd\xa9\xb1\xdc\x71\x9c"
+	"\x46\x21\xbd\x58\x88\x6e\x22\x15\x56\xc1\xef\xe0\xc9\x8d\xe5\x80"
+	"\x3e\xda\x7e\x93\x0f\x52\xf6\xf5\xc1\x91\x90\x9e\x42\x49\x4f\x8d"
+	"\x9c\xba\x38\x83\xe9\x33\xc2\x50\x4f\xec\xc2\xf0\xa8\xb7\x6e\x28"
+	"\x25\x56\x6b\x62\x67\xfe\x08\xf1\x56\xe5\x6f\x0e\x99\xf1\xe5\x95"
+	"\x7b\xef\xeb\x0a\x2c\x92\x97\x57\x23\x33\x36\x07\xdd\xfb\xae\xf1"
+	"\xb1\xd8\x33\xb7\x96\x71\x42\x36\xc5\xa4\xa9\x19\x4b\x1b\x52\x4c"
+	"\x50\x69\x91\xf0\x0e\xfa\x80\x37\x4b\xb5\xd0\x2f\xb7\x44\x0d\xd4"
+	"\xf8\x39\x8d\xab\x71\x67\x59\x05\x88\x3d\xeb\x48\x48\x33\x88\x4e"
+	"\xfe\xf8\x27\x1b\xd6\x55\x60\x5e\x48\xb7\x6d\x9a\xa8\x37\xf9\x7a"
+	"\xde\x1b\xcd\x5d\x1a\x30\xd4\xe9\x9e\x5b\x3c\x15\xf8\x9c\x1f\xda"
+	"\xd1\x86\x48\x55\xce\x83\xee\x8e\x51\xc7\xde\x32\x12\x47\x7d\x46"
+	"\xb8\x35\xdf\x41\x02\x01\x30\x02\x01\x30\x02\x01\x30\x02\x01\x30"
+	"\x02\x01\x30",
+	.key_len = 804,
+	/*
+	 * m is SHA256 hash of following message:
+	 * "\x49\x41\xbe\x0a\x0c\xc9\xf6\x35\x51\xe4\x27\x56\x13\x71\x4b\xd0"
+	 * "\x36\x92\x84\x89\x1b\xf8\x56\x4a\x72\x61\x14\x69\x4f\x5e\x98\xa5"
+	 * "\x80\x5a\x37\x51\x1f\xd8\xf5\xb5\x63\xfc\xf4\xb1\xbb\x4d\x33\xa3"
+	 * "\x1e\xb9\x75\x8b\x9c\xda\x7e\x6d\x3a\x77\x85\xf7\xfc\x4e\xe7\x64"
+	 * "\x43\x10\x19\xa0\x59\xae\xe0\xad\x4b\xd3\xc4\x45\xf7\xb1\xc2\xc1"
+	 * "\x65\x01\x41\x39\x5b\x45\x47\xed\x2b\x51\xed\xe3\xd0\x09\x10\xd2"
+	 * "\x39\x6c\x4a\x3f\xe5\xd2\x20\xe6\xb0\x71\x7d\x5b\xed\x26\x60\xf1"
+	 * "\xb4\x73\xd1\xdb\x7d\xc4\x19\x91\xee\xf6\x32\x76\xf2\x19\x7d\xb7"
+	 */
+	.m =
+	"\x3e\xc8\xa1\x26\x20\x54\x44\x52\x48\x0d\xe5\x66\xf3\xb3\xf5\x04"
+	"\xbe\x10\xa8\x48\x94\x22\x2d\xdd\xba\x7a\xb4\x76\x8d\x79\x98\x89",
+	.m_size = 32,
+	.c =
+	"\xc7\xa3\x98\xeb\x43\xd1\x08\xc2\x3d\x78\x45\x04\x70\xc9\x01\xee"
+	"\xf8\x85\x37\x7c\x0b\xf9\x19\x70\x5c\x45\x7b\x2f\x3a\x0b\xb7\x8b"
+	"\xc4\x0d\x7b\x3a\x64\x0b\x0f\xdb\x78\xa9\x0b\xfd\x8d\x82\xa4\x86"
+	"\x39\xbf\x21\xb8\x84\xc4\xce\x9f\xc2\xe8\xb6\x61\x46\x17\xb9\x4e"
+	"\x0b\x57\x05\xb4\x4f\xf9\x9c\x93\x2d\x9b\xd5\x48\x1d\x80\x12\xef"
+	"\x3a\x77\x7f\xbc\xb5\x8e\x2b\x6b\x7c\xfc\x9f\x8c\x9d\xa2\xc4\x85"
+	"\xb0\x87\xe9\x17\x9b\xb6\x23\x62\xd2\xa9\x9f\x57\xe8\xf7\x04\x45"
+	"\x24\x3a\x45\xeb\xeb\x6a\x08\x8e\xaf\xc8\xa0\x84\xbc\x5d\x13\x38"
+	"\xf5\x17\x8c\xa3\x96\x9b\xa9\x38\x8d\xf0\x35\xad\x32\x8a\x72\x5b"
+	"\xdf\x21\xab\x4b\x0e\xa8\x29\xbb\x61\x54\xbf\x05\xdb\x84\x84\xde"
+	"\xdd\x16\x36\x31\xda\xf3\x42\x6d\x7a\x90\x22\x9b\x11\x29\xa6\xf8"
+	"\x30\x61\xda\xd3\x8b\x54\x1e\x42\xd1\x47\x1d\x6f\xd1\xcd\x42\x0b"
+	"\xd1\xe4\x15\x85\x7e\x08\xd6\x59\x64\x4c\x01\x34\x91\x92\x26\xe8"
+	"\xb0\x25\x8c\xf8\xf4\xfa\x8b\xc9\x31\x33\x76\x72\xfb\x64\x92\x9f"
+	"\xda\x62\x8d\xe1\x2a\x71\x91\x43\x40\x61\x3c\x5a\xbe\x86\xfc\x5b"
+	"\xe6\xf9\xa9\x16\x31\x1f\xaf\x25\x6d\xc2\x4a\x23\x6e\x63\x02\xa2",
+	.c_size = 256,
+	.siggen_sigver_test = true,
+	}
+};
+
 static const struct kpp_testvec dh_tv_template[] = {
 	{
 	.secret =
@@ -840,6 +939,50 @@ static const struct kpp_testvec ecdh_tv_template[] = {
 	.b_public_size = 64,
 	.expected_a_public_size = 64,
 	.expected_ss_size = 32
+	}, {
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x02\x00" /* type */
+	"\x08\x00" /* len */
+	"\x02\x00" /* curve_id */
+	"\x00\x00", /* key_size */
+#else
+	"\x00\x02" /* type */
+	"\x00\x08" /* len */
+	"\x00\x02" /* curve_id */
+	"\x00\x00", /* key_size */
+#endif
+	.b_secret =
+#ifdef __LITTLE_ENDIAN
+	"\x02\x00" /* type */
+	"\x28\x00" /* len */
+	"\x02\x00" /* curve_id */
+	"\x20\x00" /* key_size */
+#else
+	"\x00\x02" /* type */
+	"\x00\x28" /* len */
+	"\x00\x02" /* curve_id */
+	"\x00\x20" /* key_size */
+#endif
+	"\x24\xd1\x21\xeb\xe5\xcf\x2d\x83"
+	"\xf6\x62\x1b\x6e\x43\x84\x3a\xa3"
+	"\x8b\xe0\x86\xc3\x20\x19\xda\x92"
+	"\x50\x53\x03\xe1\xc0\xea\xb8\x82",
+	.b_public =
+	"\x1a\x7f\xeb\x52\x00\xbd\x3c\x31"
+	"\x7d\xb6\x70\xc1\x86\xa6\xc7\xc4"
+	"\x3b\xc5\x5f\x6c\x6f\x58\x3c\xf5"
+	"\xb6\x63\x82\x77\x33\x24\xa1\x5f"
+	"\x6a\xca\x43\x6f\xf7\x7e\xff\x02"
+	"\x37\x08\xcc\x40\x5e\x7a\xfd\x6a"
+	"\x6a\x02\x6e\x41\x87\x68\x38\x77"
+	"\xfa\xa9\x44\x43\x2d\xef\x09\xdf",
+	.secret_size = 8,
+	.b_secret_size = 40,
+	.b_public_size = 64,
+	.expected_a_public_size = 64,
+	.expected_ss_size = 32,
+	.genkey = true,
 	}
 };
 
diff --git a/drivers/char/hw_random/mtk-rng.c b/drivers/char/hw_random/mtk-rng.c
index df8eb54..8da7bcf 100644
--- a/drivers/char/hw_random/mtk-rng.c
+++ b/drivers/char/hw_random/mtk-rng.c
@@ -25,6 +25,10 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+/* Runtime PM autosuspend timeout: */
+#define RNG_AUTOSUSPEND_TIMEOUT		100
 
 #define USEC_POLL			2
 #define TIMEOUT_POLL			20
@@ -90,6 +94,8 @@ static int mtk_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
 	struct mtk_rng *priv = to_mtk_rng(rng);
 	int retval = 0;
 
+	pm_runtime_get_sync((struct device *)priv->rng.priv);
+
 	while (max >= sizeof(u32)) {
 		if (!mtk_rng_wait_ready(rng, wait))
 			break;
@@ -100,6 +106,9 @@ static int mtk_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
 		max -= sizeof(u32);
 	}
 
+	pm_runtime_mark_last_busy((struct device *)priv->rng.priv);
+	pm_runtime_put_sync_autosuspend((struct device *)priv->rng.priv);
+
 	return retval || !wait ? retval : -EIO;
 }
 
@@ -120,9 +129,12 @@ static int mtk_rng_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	priv->rng.name = pdev->name;
+#ifndef CONFIG_PM
 	priv->rng.init = mtk_rng_init;
 	priv->rng.cleanup = mtk_rng_cleanup;
+#endif
 	priv->rng.read = mtk_rng_read;
+	priv->rng.priv = (unsigned long)&pdev->dev;
 
 	priv->clk = devm_clk_get(&pdev->dev, "rng");
 	if (IS_ERR(priv->clk)) {
@@ -142,11 +154,40 @@ static int mtk_rng_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	dev_set_drvdata(&pdev->dev, priv);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, RNG_AUTOSUSPEND_TIMEOUT);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
 	dev_info(&pdev->dev, "registered RNG driver\n");
 
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int mtk_rng_runtime_suspend(struct device *dev)
+{
+	struct mtk_rng *priv = dev_get_drvdata(dev);
+
+	mtk_rng_cleanup(&priv->rng);
+
+	return 0;
+}
+
+static int mtk_rng_runtime_resume(struct device *dev)
+{
+	struct mtk_rng *priv = dev_get_drvdata(dev);
+
+	return mtk_rng_init(&priv->rng);
+}
+
+static UNIVERSAL_DEV_PM_OPS(mtk_rng_pm_ops, mtk_rng_runtime_suspend,
+			    mtk_rng_runtime_resume, NULL);
+#define MTK_RNG_PM_OPS (&mtk_rng_pm_ops)
+#else	/* CONFIG_PM */
+#define MTK_RNG_PM_OPS NULL
+#endif	/* CONFIG_PM */
+
 static const struct of_device_id mtk_rng_match[] = {
 	{ .compatible = "mediatek,mt7623-rng" },
 	{},
@@ -157,6 +198,7 @@ static struct platform_driver mtk_rng_driver = {
 	.probe          = mtk_rng_probe,
 	.driver = {
 		.name = MTK_RNG_DEV,
+		.pm = MTK_RNG_PM_OPS,
 		.of_match_table = mtk_rng_match,
 	},
 };
diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c
index 37a58d7..38b7190 100644
--- a/drivers/char/hw_random/omap3-rom-rng.c
+++ b/drivers/char/hw_random/omap3-rom-rng.c
@@ -53,7 +53,10 @@ static int omap3_rom_rng_get_random(void *buf, unsigned int count)
 
 	cancel_delayed_work_sync(&idle_work);
 	if (rng_idle) {
-		clk_prepare_enable(rng_clk);
+		r = clk_prepare_enable(rng_clk);
+		if (r)
+			return r;
+
 		r = omap3_rom_rng_call(0, 0, RNG_GEN_PRNG_HW_INIT);
 		if (r != 0) {
 			clk_disable_unprepare(rng_clk);
@@ -88,6 +91,8 @@ static struct hwrng omap3_rom_rng_ops = {
 
 static int omap3_rom_rng_probe(struct platform_device *pdev)
 {
+	int ret = 0;
+
 	pr_info("initializing\n");
 
 	omap3_rom_rng_call = pdev->dev.platform_data;
@@ -104,7 +109,9 @@ static int omap3_rom_rng_probe(struct platform_device *pdev)
 	}
 
 	/* Leave the RNG in reset state. */
-	clk_prepare_enable(rng_clk);
+	ret = clk_prepare_enable(rng_clk);
+	if (ret)
+		return ret;
 	omap3_rom_rng_idle(0);
 
 	return hwrng_register(&omap3_rom_rng_ops);
diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c
index a0faa5f..03ff548 100644
--- a/drivers/char/hw_random/timeriomem-rng.c
+++ b/drivers/char/hw_random/timeriomem-rng.c
@@ -151,8 +151,15 @@ static int timeriomem_rng_probe(struct platform_device *pdev)
 			dev_err(&pdev->dev, "missing period\n");
 			return -EINVAL;
 		}
+
+		if (!of_property_read_u32(pdev->dev.of_node,
+						"quality", &i))
+			priv->rng_ops.quality = i;
+		else
+			priv->rng_ops.quality = 0;
 	} else {
 		period = pdata->period;
+		priv->rng_ops.quality = pdata->quality;
 	}
 
 	priv->period = ns_to_ktime(period * NSEC_PER_USEC);
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 9c7951b..193204d 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -327,6 +327,15 @@
 	 This option provides the kernel-side support for the TRNG hardware
 	 found in the security function of some PowerPC 4xx SoCs.
 
+config CRYPTO_DEV_OMAP
+	tristate "Support for OMAP crypto HW accelerators"
+	depends on ARCH_OMAP2PLUS
+	help
+	  OMAP processors have various crypto HW accelerators. Select this if
+          you want to use the OMAP modules for any of the crypto algorithms.
+
+if CRYPTO_DEV_OMAP
+
 config CRYPTO_DEV_OMAP_SHAM
 	tristate "Support for OMAP MD5/SHA1/SHA2 hw accelerator"
 	depends on ARCH_OMAP2PLUS
@@ -348,6 +357,7 @@
 	select CRYPTO_CBC
 	select CRYPTO_ECB
 	select CRYPTO_CTR
+	select CRYPTO_AEAD
 	help
 	  OMAP processors have AES module accelerator. Select this if you
 	  want to use the OMAP module for AES algorithms.
@@ -364,6 +374,8 @@
 	  the ECB and CBC modes of operation are supported by the driver. Also
 	  accesses made on unaligned boundaries are supported.
 
+endif # CRYPTO_DEV_OMAP
+
 config CRYPTO_DEV_PICOXCELL
 	tristate "Support for picoXcell IPSEC and Layer2 crypto engines"
 	depends on (ARCH_PICOXCELL || COMPILE_TEST) && HAVE_CLK
@@ -542,6 +554,7 @@
 
 source "drivers/crypto/qat/Kconfig"
 source "drivers/crypto/cavium/cpt/Kconfig"
+source "drivers/crypto/cavium/nitrox/Kconfig"
 
 config CRYPTO_DEV_CAVIUM_ZIP
 	tristate "Cavium ZIP driver"
@@ -656,4 +669,21 @@
 
 source "drivers/crypto/stm32/Kconfig"
 
+config CRYPTO_DEV_SAFEXCEL
+	tristate "Inside Secure's SafeXcel cryptographic engine driver"
+	depends on HAS_DMA && OF
+	depends on (ARM64 && ARCH_MVEBU) || (COMPILE_TEST && 64BIT)
+	select CRYPTO_AES
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_HASH
+	select CRYPTO_HMAC
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	select CRYPTO_SHA512
+	help
+	  This driver interfaces with the SafeXcel EIP-197 cryptographic engine
+	  designed by Inside Secure. Select this if you want to use CBC/ECB
+	  chain mode, AES cipher mode and SHA1/SHA224/SHA256/SHA512 hash
+	  algorithms.
+
 endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 463f335..2c555a3 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -6,6 +6,7 @@
 obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
 obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/
 obj-$(CONFIG_CRYPTO_DEV_CPT) += cavium/cpt/
+obj-$(CONFIG_CRYPTO_DEV_NITROX) += cavium/nitrox/
 obj-$(CONFIG_CRYPTO_DEV_EXYNOS_RNG) += exynos-rng.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
 obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
@@ -20,7 +21,9 @@
 obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
 n2_crypto-y := n2_core.o n2_asm.o
 obj-$(CONFIG_CRYPTO_DEV_NX) += nx/
-obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o
+obj-$(CONFIG_CRYPTO_DEV_OMAP) += omap-crypto.o
+obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes-driver.o
+omap-aes-driver-objs := omap-aes.o omap-aes-gcm.o
 obj-$(CONFIG_CRYPTO_DEV_OMAP_DES) += omap-des.o
 obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
@@ -39,3 +42,4 @@
 obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/
 obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
 obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
+obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/
diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
index fdc83a2..65dc78b 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -1179,6 +1179,7 @@ static int crypto4xx_probe(struct platform_device *ofdev)
 	dev_set_drvdata(dev, core_dev);
 	core_dev->ofdev = ofdev;
 	core_dev->dev = kzalloc(sizeof(struct crypto4xx_device), GFP_KERNEL);
+	rc = -ENOMEM;
 	if (!core_dev->dev)
 		goto err_alloc_dev;
 
diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c
index cc0d5b9..9cfd36c 100644
--- a/drivers/crypto/bcm/cipher.c
+++ b/drivers/crypto/bcm/cipher.c
@@ -36,6 +36,7 @@
 #include <crypto/internal/aead.h>
 #include <crypto/aes.h>
 #include <crypto/des.h>
+#include <crypto/hmac.h>
 #include <crypto/sha.h>
 #include <crypto/md5.h>
 #include <crypto/authenc.h>
@@ -2510,8 +2511,8 @@ static int ahash_hmac_setkey(struct crypto_ahash *ahash, const u8 *key,
 		memcpy(ctx->opad, ctx->ipad, blocksize);
 
 		for (index = 0; index < blocksize; index++) {
-			ctx->ipad[index] ^= 0x36;
-			ctx->opad[index] ^= 0x5c;
+			ctx->ipad[index] ^= HMAC_IPAD_VALUE;
+			ctx->opad[index] ^= HMAC_OPAD_VALUE;
 		}
 
 		flow_dump("  ipad: ", ctx->ipad, blocksize);
@@ -2638,7 +2639,7 @@ static int aead_need_fallback(struct aead_request *req)
 	    (spu->spu_type == SPU_TYPE_SPUM) &&
 	    (ctx->digestsize != 8) && (ctx->digestsize != 12) &&
 	    (ctx->digestsize != 16)) {
-		flow_log("%s() AES CCM needs fallbck for digest size %d\n",
+		flow_log("%s() AES CCM needs fallback for digest size %d\n",
 			 __func__, ctx->digestsize);
 		return 1;
 	}
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 398807d..fde399c 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -1187,8 +1187,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
 	struct aead_edesc *edesc;
 	int sec4_sg_index, sec4_sg_len, sec4_sg_bytes;
@@ -1475,8 +1475,7 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-					  CRYPTO_TFM_REQ_MAY_SLEEP)) ?
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
 	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
 	struct ablkcipher_edesc *edesc;
@@ -1681,8 +1680,7 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-					  CRYPTO_TFM_REQ_MAY_SLEEP)) ?
+	gfp_t flags = (req->base.flags &  CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
 	int src_nents, mapped_src_nents, dst_nents, mapped_dst_nents;
 	struct ablkcipher_edesc *edesc;
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index ea0e5b8..78c4c04 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -555,8 +555,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead),
 						 typeof(*alg), aead);
 	struct device *qidev = ctx->qidev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
 	struct aead_edesc *edesc;
 	dma_addr_t qm_sg_dma, iv_dma = 0;
@@ -808,8 +808,7 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct device *qidev = ctx->qidev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-					  CRYPTO_TFM_REQ_MAY_SLEEP)) ?
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
 	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
 	struct ablkcipher_edesc *edesc;
@@ -953,8 +952,7 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct device *qidev = ctx->qidev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-					  CRYPTO_TFM_REQ_MAY_SLEEP)) ?
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
 	int src_nents, mapped_src_nents, dst_nents, mapped_dst_nents;
 	struct ablkcipher_edesc *edesc;
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index da4f94e..7c44c90 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -719,8 +719,8 @@ static int ahash_update_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	u8 *buf = current_buf(state);
 	int *buflen = current_buflen(state);
 	u8 *next_buf = alt_buf(state);
@@ -849,8 +849,8 @@ static int ahash_final_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	int buflen = *current_buflen(state);
 	u32 *desc;
 	int sec4_sg_bytes, sec4_sg_src_index;
@@ -926,8 +926,8 @@ static int ahash_finup_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	int buflen = *current_buflen(state);
 	u32 *desc;
 	int sec4_sg_src_index;
@@ -1013,8 +1013,8 @@ static int ahash_digest(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	u32 *desc;
 	int digestsize = crypto_ahash_digestsize(ahash);
 	int src_nents, mapped_nents;
@@ -1093,8 +1093,8 @@ static int ahash_final_no_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	u8 *buf = current_buf(state);
 	int buflen = *current_buflen(state);
 	u32 *desc;
@@ -1154,8 +1154,8 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	u8 *buf = current_buf(state);
 	int *buflen = current_buflen(state);
 	u8 *next_buf = alt_buf(state);
@@ -1280,8 +1280,8 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	int buflen = *current_buflen(state);
 	u32 *desc;
 	int sec4_sg_bytes, sec4_sg_src_index, src_nents, mapped_nents;
@@ -1370,8 +1370,8 @@ static int ahash_update_first(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	u8 *next_buf = alt_buf(state);
 	int *next_buflen = alt_buflen(state);
 	int to_hash;
diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index 49cbdcb..7a897209 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -18,6 +18,10 @@
 #define DESC_RSA_PUB_LEN	(2 * CAAM_CMD_SZ + sizeof(struct rsa_pub_pdb))
 #define DESC_RSA_PRIV_F1_LEN	(2 * CAAM_CMD_SZ + \
 				 sizeof(struct rsa_priv_f1_pdb))
+#define DESC_RSA_PRIV_F2_LEN	(2 * CAAM_CMD_SZ + \
+				 sizeof(struct rsa_priv_f2_pdb))
+#define DESC_RSA_PRIV_F3_LEN	(2 * CAAM_CMD_SZ + \
+				 sizeof(struct rsa_priv_f3_pdb))
 
 static void rsa_io_unmap(struct device *dev, struct rsa_edesc *edesc,
 			 struct akcipher_request *req)
@@ -54,6 +58,42 @@ static void rsa_priv_f1_unmap(struct device *dev, struct rsa_edesc *edesc,
 	dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE);
 }
 
+static void rsa_priv_f2_unmap(struct device *dev, struct rsa_edesc *edesc,
+			      struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2;
+	size_t p_sz = key->p_sz;
+	size_t q_sz = key->p_sz;
+
+	dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE);
+}
+
+static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc,
+			      struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3;
+	size_t p_sz = key->p_sz;
+	size_t q_sz = key->p_sz;
+
+	dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->dp_dma, p_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->dq_dma, q_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE);
+}
+
 /* RSA Job Completion handler */
 static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context)
 {
@@ -90,6 +130,42 @@ static void rsa_priv_f1_done(struct device *dev, u32 *desc, u32 err,
 	akcipher_request_complete(req, err);
 }
 
+static void rsa_priv_f2_done(struct device *dev, u32 *desc, u32 err,
+			     void *context)
+{
+	struct akcipher_request *req = context;
+	struct rsa_edesc *edesc;
+
+	if (err)
+		caam_jr_strstatus(dev, err);
+
+	edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
+
+	rsa_priv_f2_unmap(dev, edesc, req);
+	rsa_io_unmap(dev, edesc, req);
+	kfree(edesc);
+
+	akcipher_request_complete(req, err);
+}
+
+static void rsa_priv_f3_done(struct device *dev, u32 *desc, u32 err,
+			     void *context)
+{
+	struct akcipher_request *req = context;
+	struct rsa_edesc *edesc;
+
+	if (err)
+		caam_jr_strstatus(dev, err);
+
+	edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
+
+	rsa_priv_f3_unmap(dev, edesc, req);
+	rsa_io_unmap(dev, edesc, req);
+	kfree(edesc);
+
+	akcipher_request_complete(req, err);
+}
+
 static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
 					 size_t desclen)
 {
@@ -97,8 +173,8 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct device *dev = ctx->dev;
 	struct rsa_edesc *edesc;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	int sgc;
 	int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes;
 	int src_nents, dst_nents;
@@ -258,6 +334,172 @@ static int set_rsa_priv_f1_pdb(struct akcipher_request *req,
 	return 0;
 }
 
+static int set_rsa_priv_f2_pdb(struct akcipher_request *req,
+			       struct rsa_edesc *edesc)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	struct device *dev = ctx->dev;
+	struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2;
+	int sec4_sg_index = 0;
+	size_t p_sz = key->p_sz;
+	size_t q_sz = key->p_sz;
+
+	pdb->d_dma = dma_map_single(dev, key->d, key->d_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->d_dma)) {
+		dev_err(dev, "Unable to map RSA private exponent memory\n");
+		return -ENOMEM;
+	}
+
+	pdb->p_dma = dma_map_single(dev, key->p, p_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->p_dma)) {
+		dev_err(dev, "Unable to map RSA prime factor p memory\n");
+		goto unmap_d;
+	}
+
+	pdb->q_dma = dma_map_single(dev, key->q, q_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->q_dma)) {
+		dev_err(dev, "Unable to map RSA prime factor q memory\n");
+		goto unmap_p;
+	}
+
+	pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->tmp1_dma)) {
+		dev_err(dev, "Unable to map RSA tmp1 memory\n");
+		goto unmap_q;
+	}
+
+	pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->tmp2_dma)) {
+		dev_err(dev, "Unable to map RSA tmp2 memory\n");
+		goto unmap_tmp1;
+	}
+
+	if (edesc->src_nents > 1) {
+		pdb->sgf |= RSA_PRIV_PDB_SGF_G;
+		pdb->g_dma = edesc->sec4_sg_dma;
+		sec4_sg_index += edesc->src_nents;
+	} else {
+		pdb->g_dma = sg_dma_address(req->src);
+	}
+
+	if (edesc->dst_nents > 1) {
+		pdb->sgf |= RSA_PRIV_PDB_SGF_F;
+		pdb->f_dma = edesc->sec4_sg_dma +
+			     sec4_sg_index * sizeof(struct sec4_sg_entry);
+	} else {
+		pdb->f_dma = sg_dma_address(req->dst);
+	}
+
+	pdb->sgf |= (key->d_sz << RSA_PDB_D_SHIFT) | key->n_sz;
+	pdb->p_q_len = (q_sz << RSA_PDB_Q_SHIFT) | p_sz;
+
+	return 0;
+
+unmap_tmp1:
+	dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE);
+unmap_q:
+	dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE);
+unmap_p:
+	dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE);
+unmap_d:
+	dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE);
+
+	return -ENOMEM;
+}
+
+static int set_rsa_priv_f3_pdb(struct akcipher_request *req,
+			       struct rsa_edesc *edesc)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	struct device *dev = ctx->dev;
+	struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3;
+	int sec4_sg_index = 0;
+	size_t p_sz = key->p_sz;
+	size_t q_sz = key->p_sz;
+
+	pdb->p_dma = dma_map_single(dev, key->p, p_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->p_dma)) {
+		dev_err(dev, "Unable to map RSA prime factor p memory\n");
+		return -ENOMEM;
+	}
+
+	pdb->q_dma = dma_map_single(dev, key->q, q_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->q_dma)) {
+		dev_err(dev, "Unable to map RSA prime factor q memory\n");
+		goto unmap_p;
+	}
+
+	pdb->dp_dma = dma_map_single(dev, key->dp, p_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->dp_dma)) {
+		dev_err(dev, "Unable to map RSA exponent dp memory\n");
+		goto unmap_q;
+	}
+
+	pdb->dq_dma = dma_map_single(dev, key->dq, q_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->dq_dma)) {
+		dev_err(dev, "Unable to map RSA exponent dq memory\n");
+		goto unmap_dp;
+	}
+
+	pdb->c_dma = dma_map_single(dev, key->qinv, p_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->c_dma)) {
+		dev_err(dev, "Unable to map RSA CRT coefficient qinv memory\n");
+		goto unmap_dq;
+	}
+
+	pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->tmp1_dma)) {
+		dev_err(dev, "Unable to map RSA tmp1 memory\n");
+		goto unmap_qinv;
+	}
+
+	pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->tmp2_dma)) {
+		dev_err(dev, "Unable to map RSA tmp2 memory\n");
+		goto unmap_tmp1;
+	}
+
+	if (edesc->src_nents > 1) {
+		pdb->sgf |= RSA_PRIV_PDB_SGF_G;
+		pdb->g_dma = edesc->sec4_sg_dma;
+		sec4_sg_index += edesc->src_nents;
+	} else {
+		pdb->g_dma = sg_dma_address(req->src);
+	}
+
+	if (edesc->dst_nents > 1) {
+		pdb->sgf |= RSA_PRIV_PDB_SGF_F;
+		pdb->f_dma = edesc->sec4_sg_dma +
+			     sec4_sg_index * sizeof(struct sec4_sg_entry);
+	} else {
+		pdb->f_dma = sg_dma_address(req->dst);
+	}
+
+	pdb->sgf |= key->n_sz;
+	pdb->p_q_len = (q_sz << RSA_PDB_Q_SHIFT) | p_sz;
+
+	return 0;
+
+unmap_tmp1:
+	dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE);
+unmap_qinv:
+	dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE);
+unmap_dq:
+	dma_unmap_single(dev, pdb->dq_dma, q_sz, DMA_TO_DEVICE);
+unmap_dp:
+	dma_unmap_single(dev, pdb->dp_dma, p_sz, DMA_TO_DEVICE);
+unmap_q:
+	dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE);
+unmap_p:
+	dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE);
+
+	return -ENOMEM;
+}
+
 static int caam_rsa_enc(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
@@ -301,24 +543,14 @@ static int caam_rsa_enc(struct akcipher_request *req)
 	return ret;
 }
 
-static int caam_rsa_dec(struct akcipher_request *req)
+static int caam_rsa_dec_priv_f1(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
-	struct caam_rsa_key *key = &ctx->key;
 	struct device *jrdev = ctx->dev;
 	struct rsa_edesc *edesc;
 	int ret;
 
-	if (unlikely(!key->n || !key->d))
-		return -EINVAL;
-
-	if (req->dst_len < key->n_sz) {
-		req->dst_len = key->n_sz;
-		dev_err(jrdev, "Output buffer length less than parameter n\n");
-		return -EOVERFLOW;
-	}
-
 	/* Allocate extended descriptor */
 	edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F1_LEN);
 	if (IS_ERR(edesc))
@@ -344,17 +576,147 @@ static int caam_rsa_dec(struct akcipher_request *req)
 	return ret;
 }
 
+static int caam_rsa_dec_priv_f2(struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct device *jrdev = ctx->dev;
+	struct rsa_edesc *edesc;
+	int ret;
+
+	/* Allocate extended descriptor */
+	edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F2_LEN);
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	/* Set RSA Decrypt Protocol Data Block - Private Key Form #2 */
+	ret = set_rsa_priv_f2_pdb(req, edesc);
+	if (ret)
+		goto init_fail;
+
+	/* Initialize Job Descriptor */
+	init_rsa_priv_f2_desc(edesc->hw_desc, &edesc->pdb.priv_f2);
+
+	ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f2_done, req);
+	if (!ret)
+		return -EINPROGRESS;
+
+	rsa_priv_f2_unmap(jrdev, edesc, req);
+
+init_fail:
+	rsa_io_unmap(jrdev, edesc, req);
+	kfree(edesc);
+	return ret;
+}
+
+static int caam_rsa_dec_priv_f3(struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct device *jrdev = ctx->dev;
+	struct rsa_edesc *edesc;
+	int ret;
+
+	/* Allocate extended descriptor */
+	edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F3_LEN);
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	/* Set RSA Decrypt Protocol Data Block - Private Key Form #3 */
+	ret = set_rsa_priv_f3_pdb(req, edesc);
+	if (ret)
+		goto init_fail;
+
+	/* Initialize Job Descriptor */
+	init_rsa_priv_f3_desc(edesc->hw_desc, &edesc->pdb.priv_f3);
+
+	ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f3_done, req);
+	if (!ret)
+		return -EINPROGRESS;
+
+	rsa_priv_f3_unmap(jrdev, edesc, req);
+
+init_fail:
+	rsa_io_unmap(jrdev, edesc, req);
+	kfree(edesc);
+	return ret;
+}
+
+static int caam_rsa_dec(struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	int ret;
+
+	if (unlikely(!key->n || !key->d))
+		return -EINVAL;
+
+	if (req->dst_len < key->n_sz) {
+		req->dst_len = key->n_sz;
+		dev_err(ctx->dev, "Output buffer length less than parameter n\n");
+		return -EOVERFLOW;
+	}
+
+	if (key->priv_form == FORM3)
+		ret = caam_rsa_dec_priv_f3(req);
+	else if (key->priv_form == FORM2)
+		ret = caam_rsa_dec_priv_f2(req);
+	else
+		ret = caam_rsa_dec_priv_f1(req);
+
+	return ret;
+}
+
 static void caam_rsa_free_key(struct caam_rsa_key *key)
 {
 	kzfree(key->d);
+	kzfree(key->p);
+	kzfree(key->q);
+	kzfree(key->dp);
+	kzfree(key->dq);
+	kzfree(key->qinv);
+	kzfree(key->tmp1);
+	kzfree(key->tmp2);
 	kfree(key->e);
 	kfree(key->n);
-	key->d = NULL;
-	key->e = NULL;
-	key->n = NULL;
-	key->d_sz = 0;
-	key->e_sz = 0;
-	key->n_sz = 0;
+	memset(key, 0, sizeof(*key));
+}
+
+static void caam_rsa_drop_leading_zeros(const u8 **ptr, size_t *nbytes)
+{
+	while (!**ptr && *nbytes) {
+		(*ptr)++;
+		(*nbytes)--;
+	}
+}
+
+/**
+ * caam_read_rsa_crt - Used for reading dP, dQ, qInv CRT members.
+ * dP, dQ and qInv could decode to less than corresponding p, q length, as the
+ * BER-encoding requires that the minimum number of bytes be used to encode the
+ * integer. dP, dQ, qInv decoded values have to be zero-padded to appropriate
+ * length.
+ *
+ * @ptr   : pointer to {dP, dQ, qInv} CRT member
+ * @nbytes: length in bytes of {dP, dQ, qInv} CRT member
+ * @dstlen: length in bytes of corresponding p or q prime factor
+ */
+static u8 *caam_read_rsa_crt(const u8 *ptr, size_t nbytes, size_t dstlen)
+{
+	u8 *dst;
+
+	caam_rsa_drop_leading_zeros(&ptr, &nbytes);
+	if (!nbytes)
+		return NULL;
+
+	dst = kzalloc(dstlen, GFP_DMA | GFP_KERNEL);
+	if (!dst)
+		return NULL;
+
+	memcpy(dst + (dstlen - nbytes), ptr, nbytes);
+
+	return dst;
 }
 
 /**
@@ -370,10 +732,9 @@ static inline u8 *caam_read_raw_data(const u8 *buf, size_t *nbytes)
 {
 	u8 *val;
 
-	while (!*buf && *nbytes) {
-		buf++;
-		(*nbytes)--;
-	}
+	caam_rsa_drop_leading_zeros(&buf, nbytes);
+	if (!*nbytes)
+		return NULL;
 
 	val = kzalloc(*nbytes, GFP_DMA | GFP_KERNEL);
 	if (!val)
@@ -437,6 +798,64 @@ static int caam_rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key,
 	return -ENOMEM;
 }
 
+static void caam_rsa_set_priv_key_form(struct caam_rsa_ctx *ctx,
+				       struct rsa_key *raw_key)
+{
+	struct caam_rsa_key *rsa_key = &ctx->key;
+	size_t p_sz = raw_key->p_sz;
+	size_t q_sz = raw_key->q_sz;
+
+	rsa_key->p = caam_read_raw_data(raw_key->p, &p_sz);
+	if (!rsa_key->p)
+		return;
+	rsa_key->p_sz = p_sz;
+
+	rsa_key->q = caam_read_raw_data(raw_key->q, &q_sz);
+	if (!rsa_key->q)
+		goto free_p;
+	rsa_key->q_sz = q_sz;
+
+	rsa_key->tmp1 = kzalloc(raw_key->p_sz, GFP_DMA | GFP_KERNEL);
+	if (!rsa_key->tmp1)
+		goto free_q;
+
+	rsa_key->tmp2 = kzalloc(raw_key->q_sz, GFP_DMA | GFP_KERNEL);
+	if (!rsa_key->tmp2)
+		goto free_tmp1;
+
+	rsa_key->priv_form = FORM2;
+
+	rsa_key->dp = caam_read_rsa_crt(raw_key->dp, raw_key->dp_sz, p_sz);
+	if (!rsa_key->dp)
+		goto free_tmp2;
+
+	rsa_key->dq = caam_read_rsa_crt(raw_key->dq, raw_key->dq_sz, q_sz);
+	if (!rsa_key->dq)
+		goto free_dp;
+
+	rsa_key->qinv = caam_read_rsa_crt(raw_key->qinv, raw_key->qinv_sz,
+					  q_sz);
+	if (!rsa_key->qinv)
+		goto free_dq;
+
+	rsa_key->priv_form = FORM3;
+
+	return;
+
+free_dq:
+	kzfree(rsa_key->dq);
+free_dp:
+	kzfree(rsa_key->dp);
+free_tmp2:
+	kzfree(rsa_key->tmp2);
+free_tmp1:
+	kzfree(rsa_key->tmp1);
+free_q:
+	kzfree(rsa_key->q);
+free_p:
+	kzfree(rsa_key->p);
+}
+
 static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
 				 unsigned int keylen)
 {
@@ -483,6 +902,8 @@ static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
 	memcpy(rsa_key->d, raw_key.d, raw_key.d_sz);
 	memcpy(rsa_key->e, raw_key.e, raw_key.e_sz);
 
+	caam_rsa_set_priv_key_form(ctx, &raw_key);
+
 	return 0;
 
 err:
@@ -490,12 +911,11 @@ static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
 	return -ENOMEM;
 }
 
-static int caam_rsa_max_size(struct crypto_akcipher *tfm)
+static unsigned int caam_rsa_max_size(struct crypto_akcipher *tfm)
 {
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
-	struct caam_rsa_key *key = &ctx->key;
 
-	return (key->n) ? key->n_sz : -EINVAL;
+	return ctx->key.n_sz;
 }
 
 /* Per session pkc's driver context creation function */
diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h
index f595d15..87ab75e 100644
--- a/drivers/crypto/caam/caampkc.h
+++ b/drivers/crypto/caam/caampkc.h
@@ -13,21 +13,75 @@
 #include "pdb.h"
 
 /**
+ * caam_priv_key_form - CAAM RSA private key representation
+ * CAAM RSA private key may have either of three forms.
+ *
+ * 1. The first representation consists of the pair (n, d), where the
+ *    components have the following meanings:
+ *        n      the RSA modulus
+ *        d      the RSA private exponent
+ *
+ * 2. The second representation consists of the triplet (p, q, d), where the
+ *    components have the following meanings:
+ *        p      the first prime factor of the RSA modulus n
+ *        q      the second prime factor of the RSA modulus n
+ *        d      the RSA private exponent
+ *
+ * 3. The third representation consists of the quintuple (p, q, dP, dQ, qInv),
+ *    where the components have the following meanings:
+ *        p      the first prime factor of the RSA modulus n
+ *        q      the second prime factor of the RSA modulus n
+ *        dP     the first factors's CRT exponent
+ *        dQ     the second factors's CRT exponent
+ *        qInv   the (first) CRT coefficient
+ *
+ * The benefit of using the third or the second key form is lower computational
+ * cost for the decryption and signature operations.
+ */
+enum caam_priv_key_form {
+	FORM1,
+	FORM2,
+	FORM3
+};
+
+/**
  * caam_rsa_key - CAAM RSA key structure. Keys are allocated in DMA zone.
  * @n           : RSA modulus raw byte stream
  * @e           : RSA public exponent raw byte stream
  * @d           : RSA private exponent raw byte stream
+ * @p           : RSA prime factor p of RSA modulus n
+ * @q           : RSA prime factor q of RSA modulus n
+ * @dp          : RSA CRT exponent of p
+ * @dp          : RSA CRT exponent of q
+ * @qinv        : RSA CRT coefficient
+ * @tmp1        : CAAM uses this temporary buffer as internal state buffer.
+ *                It is assumed to be as long as p.
+ * @tmp2        : CAAM uses this temporary buffer as internal state buffer.
+ *                It is assumed to be as long as q.
  * @n_sz        : length in bytes of RSA modulus n
  * @e_sz        : length in bytes of RSA public exponent
  * @d_sz        : length in bytes of RSA private exponent
+ * @p_sz        : length in bytes of RSA prime factor p of RSA modulus n
+ * @q_sz        : length in bytes of RSA prime factor q of RSA modulus n
+ * @priv_form   : CAAM RSA private key representation
  */
 struct caam_rsa_key {
 	u8 *n;
 	u8 *e;
 	u8 *d;
+	u8 *p;
+	u8 *q;
+	u8 *dp;
+	u8 *dq;
+	u8 *qinv;
+	u8 *tmp1;
+	u8 *tmp2;
 	size_t n_sz;
 	size_t e_sz;
 	size_t d_sz;
+	size_t p_sz;
+	size_t q_sz;
+	enum caam_priv_key_form priv_form;
 };
 
 /**
@@ -59,6 +113,8 @@ struct rsa_edesc {
 	union {
 		struct rsa_pub_pdb pub;
 		struct rsa_priv_f1_pdb priv_f1;
+		struct rsa_priv_f2_pdb priv_f2;
+		struct rsa_priv_f3_pdb priv_f3;
 	} pdb;
 	u32 hw_desc[];
 };
@@ -66,5 +122,7 @@ struct rsa_edesc {
 /* Descriptor construction primitives. */
 void init_rsa_pub_desc(u32 *desc, struct rsa_pub_pdb *pdb);
 void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb);
+void init_rsa_priv_f2_desc(u32 *desc, struct rsa_priv_f2_pdb *pdb);
+void init_rsa_priv_f3_desc(u32 *desc, struct rsa_priv_f3_pdb *pdb);
 
 #endif
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 2763100..1ccfb31 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -536,7 +536,7 @@ static int caam_jr_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static struct of_device_id caam_jr_match[] = {
+static const struct of_device_id caam_jr_match[] = {
 	{
 		.compatible = "fsl,sec-v4.0-job-ring",
 	},
diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h
index aaa00dd..31e5996 100644
--- a/drivers/crypto/caam/pdb.h
+++ b/drivers/crypto/caam/pdb.h
@@ -483,6 +483,8 @@ struct dsa_verify_pdb {
 #define RSA_PDB_E_MASK          (0xFFF << RSA_PDB_E_SHIFT)
 #define RSA_PDB_D_SHIFT         12
 #define RSA_PDB_D_MASK          (0xFFF << RSA_PDB_D_SHIFT)
+#define RSA_PDB_Q_SHIFT         12
+#define RSA_PDB_Q_MASK          (0xFFF << RSA_PDB_Q_SHIFT)
 
 #define RSA_PDB_SGF_F           (0x8 << RSA_PDB_SGF_SHIFT)
 #define RSA_PDB_SGF_G           (0x4 << RSA_PDB_SGF_SHIFT)
@@ -490,6 +492,8 @@ struct dsa_verify_pdb {
 #define RSA_PRIV_PDB_SGF_G      (0x8 << RSA_PDB_SGF_SHIFT)
 
 #define RSA_PRIV_KEY_FRM_1      0
+#define RSA_PRIV_KEY_FRM_2      1
+#define RSA_PRIV_KEY_FRM_3      2
 
 /**
  * RSA Encrypt Protocol Data Block
@@ -525,4 +529,62 @@ struct rsa_priv_f1_pdb {
 	dma_addr_t	d_dma;
 } __packed;
 
+/**
+ * RSA Decrypt PDB - Private Key Form #2
+ * @sgf     : scatter-gather field
+ * @g_dma   : dma address of encrypted input data
+ * @f_dma   : dma address of output data
+ * @d_dma   : dma address of RSA private exponent
+ * @p_dma   : dma address of RSA prime factor p of RSA modulus n
+ * @q_dma   : dma address of RSA prime factor q of RSA modulus n
+ * @tmp1_dma: dma address of temporary buffer. CAAM uses this temporary buffer
+ *            as internal state buffer. It is assumed to be as long as p.
+ * @tmp2_dma: dma address of temporary buffer. CAAM uses this temporary buffer
+ *            as internal state buffer. It is assumed to be as long as q.
+ * @p_q_len : length in bytes of first two prime factors of the RSA modulus n
+ */
+struct rsa_priv_f2_pdb {
+	u32		sgf;
+	dma_addr_t	g_dma;
+	dma_addr_t	f_dma;
+	dma_addr_t	d_dma;
+	dma_addr_t	p_dma;
+	dma_addr_t	q_dma;
+	dma_addr_t	tmp1_dma;
+	dma_addr_t	tmp2_dma;
+	u32		p_q_len;
+} __packed;
+
+/**
+ * RSA Decrypt PDB - Private Key Form #3
+ * This is the RSA Chinese Reminder Theorem (CRT) form for two prime factors of
+ * the RSA modulus.
+ * @sgf     : scatter-gather field
+ * @g_dma   : dma address of encrypted input data
+ * @f_dma   : dma address of output data
+ * @c_dma   : dma address of RSA CRT coefficient
+ * @p_dma   : dma address of RSA prime factor p of RSA modulus n
+ * @q_dma   : dma address of RSA prime factor q of RSA modulus n
+ * @dp_dma  : dma address of RSA CRT exponent of RSA prime factor p
+ * @dp_dma  : dma address of RSA CRT exponent of RSA prime factor q
+ * @tmp1_dma: dma address of temporary buffer. CAAM uses this temporary buffer
+ *            as internal state buffer. It is assumed to be as long as p.
+ * @tmp2_dma: dma address of temporary buffer. CAAM uses this temporary buffer
+ *            as internal state buffer. It is assumed to be as long as q.
+ * @p_q_len : length in bytes of first two prime factors of the RSA modulus n
+ */
+struct rsa_priv_f3_pdb {
+	u32		sgf;
+	dma_addr_t	g_dma;
+	dma_addr_t	f_dma;
+	dma_addr_t	c_dma;
+	dma_addr_t	p_dma;
+	dma_addr_t	q_dma;
+	dma_addr_t	dp_dma;
+	dma_addr_t	dq_dma;
+	dma_addr_t	tmp1_dma;
+	dma_addr_t	tmp2_dma;
+	u32		p_q_len;
+} __packed;
+
 #endif
diff --git a/drivers/crypto/caam/pkc_desc.c b/drivers/crypto/caam/pkc_desc.c
index 4e4183e..9e2ce6f 100644
--- a/drivers/crypto/caam/pkc_desc.c
+++ b/drivers/crypto/caam/pkc_desc.c
@@ -34,3 +34,39 @@ void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb)
 	append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY |
 			 RSA_PRIV_KEY_FRM_1);
 }
+
+/* Descriptor for RSA Private operation - Private Key Form #2 */
+void init_rsa_priv_f2_desc(u32 *desc, struct rsa_priv_f2_pdb *pdb)
+{
+	init_job_desc_pdb(desc, 0, sizeof(*pdb));
+	append_cmd(desc, pdb->sgf);
+	append_ptr(desc, pdb->g_dma);
+	append_ptr(desc, pdb->f_dma);
+	append_ptr(desc, pdb->d_dma);
+	append_ptr(desc, pdb->p_dma);
+	append_ptr(desc, pdb->q_dma);
+	append_ptr(desc, pdb->tmp1_dma);
+	append_ptr(desc, pdb->tmp2_dma);
+	append_cmd(desc, pdb->p_q_len);
+	append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY |
+			 RSA_PRIV_KEY_FRM_2);
+}
+
+/* Descriptor for RSA Private operation - Private Key Form #3 */
+void init_rsa_priv_f3_desc(u32 *desc, struct rsa_priv_f3_pdb *pdb)
+{
+	init_job_desc_pdb(desc, 0, sizeof(*pdb));
+	append_cmd(desc, pdb->sgf);
+	append_ptr(desc, pdb->g_dma);
+	append_ptr(desc, pdb->f_dma);
+	append_ptr(desc, pdb->c_dma);
+	append_ptr(desc, pdb->p_dma);
+	append_ptr(desc, pdb->q_dma);
+	append_ptr(desc, pdb->dp_dma);
+	append_ptr(desc, pdb->dq_dma);
+	append_ptr(desc, pdb->tmp1_dma);
+	append_ptr(desc, pdb->tmp2_dma);
+	append_cmd(desc, pdb->p_q_len);
+	append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY |
+			 RSA_PRIV_KEY_FRM_3);
+}
diff --git a/drivers/crypto/cavium/cpt/cptvf_algs.c b/drivers/crypto/cavium/cpt/cptvf_algs.c
index cc853f9..1b220f3 100644
--- a/drivers/crypto/cavium/cpt/cptvf_algs.c
+++ b/drivers/crypto/cavium/cpt/cptvf_algs.c
@@ -98,7 +98,6 @@ static inline void update_output_data(struct cpt_request_info *req_info,
 }
 
 static inline u32 create_ctx_hdr(struct ablkcipher_request *req, u32 enc,
-				 u32 cipher_type, u32 aes_key_type,
 				 u32 *argcnt)
 {
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
@@ -124,11 +123,11 @@ static inline u32 create_ctx_hdr(struct ablkcipher_request *req, u32 enc,
 	req_info->req.param1 = req->nbytes; /* Encryption Data length */
 	req_info->req.param2 = 0; /*Auth data length */
 
-	fctx->enc.enc_ctrl.e.enc_cipher = cipher_type;
-	fctx->enc.enc_ctrl.e.aes_key = aes_key_type;
+	fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type;
+	fctx->enc.enc_ctrl.e.aes_key = ctx->key_type;
 	fctx->enc.enc_ctrl.e.iv_source = FROM_DPTR;
 
-	if (cipher_type == AES_XTS)
+	if (ctx->cipher_type == AES_XTS)
 		memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len * 2);
 	else
 		memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len);
@@ -154,14 +153,13 @@ static inline u32 create_ctx_hdr(struct ablkcipher_request *req, u32 enc,
 }
 
 static inline u32 create_input_list(struct ablkcipher_request  *req, u32 enc,
-				    u32 cipher_type, u32 aes_key_type,
 				    u32 enc_iv_len)
 {
 	struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req);
 	struct cpt_request_info *req_info = &rctx->cpt_req;
 	u32 argcnt =  0;
 
-	create_ctx_hdr(req, enc, cipher_type, aes_key_type, &argcnt);
+	create_ctx_hdr(req, enc, &argcnt);
 	update_input_iv(req_info, req->info, enc_iv_len, &argcnt);
 	update_input_data(req_info, req->src, req->nbytes, &argcnt);
 	req_info->incnt = argcnt;
@@ -177,7 +175,6 @@ static inline void store_cb_info(struct ablkcipher_request *req,
 }
 
 static inline void create_output_list(struct ablkcipher_request *req,
-				      u32 cipher_type,
 				      u32 enc_iv_len)
 {
 	struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req);
@@ -197,12 +194,9 @@ static inline void create_output_list(struct ablkcipher_request *req,
 	req_info->outcnt = argcnt;
 }
 
-static inline int cvm_enc_dec(struct ablkcipher_request *req, u32 enc,
-			      u32 cipher_type)
+static inline int cvm_enc_dec(struct ablkcipher_request *req, u32 enc)
 {
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct cvm_enc_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	u32 key_type = AES_128_BIT;
 	struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req);
 	u32 enc_iv_len = crypto_ablkcipher_ivsize(tfm);
 	struct fc_context *fctx = &rctx->fctx;
@@ -210,36 +204,10 @@ static inline int cvm_enc_dec(struct ablkcipher_request *req, u32 enc,
 	void *cdev = NULL;
 	int status;
 
-	switch (ctx->key_len) {
-	case 16:
-		key_type = AES_128_BIT;
-		break;
-	case 24:
-		key_type = AES_192_BIT;
-		break;
-	case 32:
-		if (cipher_type == AES_XTS)
-			key_type = AES_128_BIT;
-		else
-			key_type = AES_256_BIT;
-		break;
-	case 64:
-		if (cipher_type == AES_XTS)
-			key_type = AES_256_BIT;
-		else
-			return -EINVAL;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	if (cipher_type == DES3_CBC)
-		key_type = 0;
-
 	memset(req_info, 0, sizeof(struct cpt_request_info));
 	memset(fctx, 0, sizeof(struct fc_context));
-	create_input_list(req, enc, cipher_type, key_type, enc_iv_len);
-	create_output_list(req, cipher_type, enc_iv_len);
+	create_input_list(req, enc, enc_iv_len);
+	create_output_list(req, enc_iv_len);
 	store_cb_info(req, req_info);
 	cdev = dev_handle.cdev[smp_processor_id()];
 	status = cptvf_do_request(cdev, req_info);
@@ -254,34 +222,14 @@ static inline int cvm_enc_dec(struct ablkcipher_request *req, u32 enc,
 		return -EINPROGRESS;
 }
 
-int cvm_des3_encrypt_cbc(struct ablkcipher_request *req)
+int cvm_encrypt(struct ablkcipher_request *req)
 {
-	return cvm_enc_dec(req, true, DES3_CBC);
+	return cvm_enc_dec(req, true);
 }
 
-int cvm_des3_decrypt_cbc(struct ablkcipher_request *req)
+int cvm_decrypt(struct ablkcipher_request *req)
 {
-	return cvm_enc_dec(req, false, DES3_CBC);
-}
-
-int cvm_aes_encrypt_xts(struct ablkcipher_request *req)
-{
-	return cvm_enc_dec(req, true, AES_XTS);
-}
-
-int cvm_aes_decrypt_xts(struct ablkcipher_request *req)
-{
-	return cvm_enc_dec(req, false, AES_XTS);
-}
-
-int cvm_aes_encrypt_cbc(struct ablkcipher_request *req)
-{
-	return cvm_enc_dec(req, true, AES_CBC);
-}
-
-int cvm_aes_decrypt_cbc(struct ablkcipher_request *req)
-{
-	return cvm_enc_dec(req, false, AES_CBC);
+	return cvm_enc_dec(req, false);
 }
 
 int cvm_xts_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
@@ -299,24 +247,93 @@ int cvm_xts_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	ctx->key_len = keylen;
 	memcpy(ctx->enc_key, key1, keylen / 2);
 	memcpy(ctx->enc_key + KEY2_OFFSET, key2, keylen / 2);
+	ctx->cipher_type = AES_XTS;
+	switch (ctx->key_len) {
+	case 32:
+		ctx->key_type = AES_128_BIT;
+		break;
+	case 64:
+		ctx->key_type = AES_256_BIT;
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	return 0;
 }
 
-int cvm_enc_dec_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
-		       u32 keylen)
+static int cvm_validate_keylen(struct cvm_enc_ctx *ctx, u32 keylen)
+{
+	if ((keylen == 16) || (keylen == 24) || (keylen == 32)) {
+		ctx->key_len = keylen;
+		switch (ctx->key_len) {
+		case 16:
+			ctx->key_type = AES_128_BIT;
+			break;
+		case 24:
+			ctx->key_type = AES_192_BIT;
+			break;
+		case 32:
+			ctx->key_type = AES_256_BIT;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		if (ctx->cipher_type == DES3_CBC)
+			ctx->key_type = 0;
+
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int cvm_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+		      u32 keylen, u8 cipher_type)
 {
 	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
 	struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	if ((keylen == 16) || (keylen == 24) || (keylen == 32)) {
-		ctx->key_len = keylen;
+	ctx->cipher_type = cipher_type;
+	if (!cvm_validate_keylen(ctx, keylen)) {
 		memcpy(ctx->enc_key, key, keylen);
 		return 0;
+	} else {
+		crypto_ablkcipher_set_flags(cipher,
+					    CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
 	}
-	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+}
 
-	return -EINVAL;
+static int cvm_cbc_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			      u32 keylen)
+{
+	return cvm_setkey(cipher, key, keylen, AES_CBC);
+}
+
+static int cvm_ecb_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			      u32 keylen)
+{
+	return cvm_setkey(cipher, key, keylen, AES_ECB);
+}
+
+static int cvm_cfb_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			      u32 keylen)
+{
+	return cvm_setkey(cipher, key, keylen, AES_CFB);
+}
+
+static int cvm_cbc_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			       u32 keylen)
+{
+	return cvm_setkey(cipher, key, keylen, DES3_CBC);
+}
+
+static int cvm_ecb_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			       u32 keylen)
+{
+	return cvm_setkey(cipher, key, keylen, DES3_ECB);
 }
 
 int cvm_enc_dec_init(struct crypto_tfm *tfm)
@@ -349,8 +366,8 @@ struct crypto_alg algs[] = { {
 			.min_keysize = 2 * AES_MIN_KEY_SIZE,
 			.max_keysize = 2 * AES_MAX_KEY_SIZE,
 			.setkey = cvm_xts_setkey,
-			.encrypt = cvm_aes_encrypt_xts,
-			.decrypt = cvm_aes_decrypt_xts,
+			.encrypt = cvm_encrypt,
+			.decrypt = cvm_decrypt,
 		},
 	},
 	.cra_init = cvm_enc_dec_init,
@@ -369,9 +386,51 @@ struct crypto_alg algs[] = { {
 			.ivsize = AES_BLOCK_SIZE,
 			.min_keysize = AES_MIN_KEY_SIZE,
 			.max_keysize = AES_MAX_KEY_SIZE,
-			.setkey = cvm_enc_dec_setkey,
-			.encrypt = cvm_aes_encrypt_cbc,
-			.decrypt = cvm_aes_decrypt_cbc,
+			.setkey = cvm_cbc_aes_setkey,
+			.encrypt = cvm_encrypt,
+			.decrypt = cvm_decrypt,
+		},
+	},
+	.cra_init = cvm_enc_dec_init,
+	.cra_module = THIS_MODULE,
+}, {
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = AES_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct cvm_enc_ctx),
+	.cra_alignmask = 7,
+	.cra_priority = 4001,
+	.cra_name = "ecb(aes)",
+	.cra_driver_name = "cavium-ecb-aes",
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_u = {
+		.ablkcipher = {
+			.ivsize = AES_BLOCK_SIZE,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.setkey = cvm_ecb_aes_setkey,
+			.encrypt = cvm_encrypt,
+			.decrypt = cvm_decrypt,
+		},
+	},
+	.cra_init = cvm_enc_dec_init,
+	.cra_module = THIS_MODULE,
+}, {
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = AES_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct cvm_enc_ctx),
+	.cra_alignmask = 7,
+	.cra_priority = 4001,
+	.cra_name = "cfb(aes)",
+	.cra_driver_name = "cavium-cfb-aes",
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_u = {
+		.ablkcipher = {
+			.ivsize = AES_BLOCK_SIZE,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.setkey = cvm_cfb_aes_setkey,
+			.encrypt = cvm_encrypt,
+			.decrypt = cvm_decrypt,
 		},
 	},
 	.cra_init = cvm_enc_dec_init,
@@ -390,9 +449,30 @@ struct crypto_alg algs[] = { {
 			.min_keysize = DES3_EDE_KEY_SIZE,
 			.max_keysize = DES3_EDE_KEY_SIZE,
 			.ivsize = DES_BLOCK_SIZE,
-			.setkey = cvm_enc_dec_setkey,
-			.encrypt = cvm_des3_encrypt_cbc,
-			.decrypt = cvm_des3_decrypt_cbc,
+			.setkey = cvm_cbc_des3_setkey,
+			.encrypt = cvm_encrypt,
+			.decrypt = cvm_decrypt,
+		},
+	},
+	.cra_init = cvm_enc_dec_init,
+	.cra_module = THIS_MODULE,
+}, {
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct cvm_des3_ctx),
+	.cra_alignmask = 7,
+	.cra_priority = 4001,
+	.cra_name = "ecb(des3_ede)",
+	.cra_driver_name = "cavium-ecb-des3_ede",
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize = DES3_EDE_KEY_SIZE,
+			.max_keysize = DES3_EDE_KEY_SIZE,
+			.ivsize = DES_BLOCK_SIZE,
+			.setkey = cvm_ecb_des3_setkey,
+			.encrypt = cvm_encrypt,
+			.decrypt = cvm_decrypt,
 		},
 	},
 	.cra_init = cvm_enc_dec_init,
diff --git a/drivers/crypto/cavium/cpt/cptvf_algs.h b/drivers/crypto/cavium/cpt/cptvf_algs.h
index a12050d..902f257 100644
--- a/drivers/crypto/cavium/cpt/cptvf_algs.h
+++ b/drivers/crypto/cavium/cpt/cptvf_algs.h
@@ -77,6 +77,11 @@ union encr_ctrl {
 	} e;
 };
 
+struct cvm_cipher {
+	const char *name;
+	u8 value;
+};
+
 struct enc_context {
 	union encr_ctrl enc_ctrl;
 	u8 encr_key[32];
@@ -96,6 +101,8 @@ struct fc_context {
 struct cvm_enc_ctx {
 	u32 key_len;
 	u8 enc_key[MAX_KEY_SIZE];
+	u8 cipher_type:4;
+	u8 key_type:2;
 };
 
 struct cvm_des3_ctx {
diff --git a/drivers/crypto/cavium/cpt/cptvf_main.c b/drivers/crypto/cavium/cpt/cptvf_main.c
index 6ffc740..5c796ed 100644
--- a/drivers/crypto/cavium/cpt/cptvf_main.c
+++ b/drivers/crypto/cavium/cpt/cptvf_main.c
@@ -525,7 +525,7 @@ static irqreturn_t cptvf_misc_intr_handler(int irq, void *cptvf_irq)
 	intr = cptvf_read_vf_misc_intr_status(cptvf);
 	/*Check for MISC interrupt types*/
 	if (likely(intr & CPT_VF_INTR_MBOX_MASK)) {
-		dev_err(&pdev->dev, "Mailbox interrupt 0x%llx on CPT VF %d\n",
+		dev_dbg(&pdev->dev, "Mailbox interrupt 0x%llx on CPT VF %d\n",
 			intr, cptvf->vfid);
 		cptvf_handle_mbox_intr(cptvf);
 		cptvf_clear_mbox_intr(cptvf);
diff --git a/drivers/crypto/cavium/nitrox/Kconfig b/drivers/crypto/cavium/nitrox/Kconfig
new file mode 100644
index 0000000..731e6a5
--- /dev/null
+++ b/drivers/crypto/cavium/nitrox/Kconfig
@@ -0,0 +1,21 @@
+#
+# Cavium NITROX Crypto Device configuration
+#
+config CRYPTO_DEV_NITROX
+	tristate
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_AES
+	select CRYPTO_DES
+	select FW_LOADER
+
+config CRYPTO_DEV_NITROX_CNN55XX
+	tristate "Support for Cavium CNN55XX driver"
+	depends on PCI_MSI && 64BIT
+	select CRYPTO_DEV_NITROX
+	default m
+	help
+	  Support for Cavium NITROX family CNN55XX driver
+	  for accelerating crypto workloads.
+
+	  To compile this as a module, choose M here: the module
+	  will be called n5pf.
diff --git a/drivers/crypto/cavium/nitrox/Makefile b/drivers/crypto/cavium/nitrox/Makefile
new file mode 100644
index 0000000..5af2e43
--- /dev/null
+++ b/drivers/crypto/cavium/nitrox/Makefile
@@ -0,0 +1,8 @@
+obj-$(CONFIG_CRYPTO_DEV_NITROX_CNN55XX) += n5pf.o
+
+n5pf-objs := nitrox_main.o \
+	nitrox_isr.o \
+	nitrox_lib.o \
+	nitrox_hal.o \
+	nitrox_reqmgr.o \
+	nitrox_algs.o
diff --git a/drivers/crypto/cavium/nitrox/nitrox_algs.c b/drivers/crypto/cavium/nitrox/nitrox_algs.c
new file mode 100644
index 0000000..ce33027
--- /dev/null
+++ b/drivers/crypto/cavium/nitrox/nitrox_algs.c
@@ -0,0 +1,457 @@
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+
+#include <crypto/aes.h>
+#include <crypto/skcipher.h>
+#include <crypto/ctr.h>
+#include <crypto/des.h>
+#include <crypto/xts.h>
+
+#include "nitrox_dev.h"
+#include "nitrox_common.h"
+#include "nitrox_req.h"
+
+#define PRIO 4001
+
+struct nitrox_cipher {
+	const char *name;
+	enum flexi_cipher value;
+};
+
+/**
+ * supported cipher list
+ */
+static const struct nitrox_cipher flexi_cipher_table[] = {
+	{ "null",		CIPHER_NULL },
+	{ "cbc(des3_ede)",	CIPHER_3DES_CBC },
+	{ "ecb(des3_ede)",	CIPHER_3DES_ECB },
+	{ "cbc(aes)",		CIPHER_AES_CBC },
+	{ "ecb(aes)",		CIPHER_AES_ECB },
+	{ "cfb(aes)",		CIPHER_AES_CFB },
+	{ "rfc3686(ctr(aes))",	CIPHER_AES_CTR },
+	{ "xts(aes)",		CIPHER_AES_XTS },
+	{ "cts(cbc(aes))",	CIPHER_AES_CBC_CTS },
+	{ NULL,			CIPHER_INVALID }
+};
+
+static enum flexi_cipher flexi_cipher_type(const char *name)
+{
+	const struct nitrox_cipher *cipher = flexi_cipher_table;
+
+	while (cipher->name) {
+		if (!strcmp(cipher->name, name))
+			break;
+		cipher++;
+	}
+	return cipher->value;
+}
+
+static int flexi_aes_keylen(int keylen)
+{
+	int aes_keylen;
+
+	switch (keylen) {
+	case AES_KEYSIZE_128:
+		aes_keylen = 1;
+		break;
+	case AES_KEYSIZE_192:
+		aes_keylen = 2;
+		break;
+	case AES_KEYSIZE_256:
+		aes_keylen = 3;
+		break;
+	default:
+		aes_keylen = -EINVAL;
+		break;
+	}
+	return aes_keylen;
+}
+
+static int nitrox_skcipher_init(struct crypto_skcipher *tfm)
+{
+	struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(tfm);
+	void *fctx;
+
+	/* get the first device */
+	nctx->ndev = nitrox_get_first_device();
+	if (!nctx->ndev)
+		return -ENODEV;
+
+	/* allocate nitrox crypto context */
+	fctx = crypto_alloc_context(nctx->ndev);
+	if (!fctx) {
+		nitrox_put_device(nctx->ndev);
+		return -ENOMEM;
+	}
+	nctx->u.ctx_handle = (uintptr_t)fctx;
+	crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(tfm) +
+				    sizeof(struct nitrox_kcrypt_request));
+	return 0;
+}
+
+static void nitrox_skcipher_exit(struct crypto_skcipher *tfm)
+{
+	struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(tfm);
+
+	/* free the nitrox crypto context */
+	if (nctx->u.ctx_handle) {
+		struct flexi_crypto_context *fctx = nctx->u.fctx;
+
+		memset(&fctx->crypto, 0, sizeof(struct crypto_keys));
+		memset(&fctx->auth, 0, sizeof(struct auth_keys));
+		crypto_free_context((void *)fctx);
+	}
+	nitrox_put_device(nctx->ndev);
+
+	nctx->u.ctx_handle = 0;
+	nctx->ndev = NULL;
+}
+
+static inline int nitrox_skcipher_setkey(struct crypto_skcipher *cipher,
+					 int aes_keylen, const u8 *key,
+					 unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
+	struct nitrox_crypto_ctx *nctx = crypto_tfm_ctx(tfm);
+	struct flexi_crypto_context *fctx;
+	enum flexi_cipher cipher_type;
+	const char *name;
+
+	name = crypto_tfm_alg_name(tfm);
+	cipher_type = flexi_cipher_type(name);
+	if (unlikely(cipher_type == CIPHER_INVALID)) {
+		pr_err("unsupported cipher: %s\n", name);
+		return -EINVAL;
+	}
+
+	/* fill crypto context */
+	fctx = nctx->u.fctx;
+	fctx->flags = 0;
+	fctx->w0.cipher_type = cipher_type;
+	fctx->w0.aes_keylen = aes_keylen;
+	fctx->w0.iv_source = IV_FROM_DPTR;
+	fctx->flags = cpu_to_be64(*(u64 *)&fctx->w0);
+	/* copy the key to context */
+	memcpy(fctx->crypto.u.key, key, keylen);
+
+	return 0;
+}
+
+static int nitrox_aes_setkey(struct crypto_skcipher *cipher, const u8 *key,
+			     unsigned int keylen)
+{
+	int aes_keylen;
+
+	aes_keylen = flexi_aes_keylen(keylen);
+	if (aes_keylen < 0) {
+		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	return nitrox_skcipher_setkey(cipher, aes_keylen, key, keylen);
+}
+
+static void nitrox_skcipher_callback(struct skcipher_request *skreq,
+				     int err)
+{
+	if (err) {
+		pr_err_ratelimited("request failed status 0x%0x\n", err);
+		err = -EINVAL;
+	}
+	skcipher_request_complete(skreq, err);
+}
+
+static int nitrox_skcipher_crypt(struct skcipher_request *skreq, bool enc)
+{
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(skreq);
+	struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(cipher);
+	struct nitrox_kcrypt_request *nkreq = skcipher_request_ctx(skreq);
+	int ivsize = crypto_skcipher_ivsize(cipher);
+	struct se_crypto_request *creq;
+
+	creq = &nkreq->creq;
+	creq->flags = skreq->base.flags;
+	creq->gfp = (skreq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		     GFP_KERNEL : GFP_ATOMIC;
+
+	/* fill the request */
+	creq->ctrl.value = 0;
+	creq->opcode = FLEXI_CRYPTO_ENCRYPT_HMAC;
+	creq->ctrl.s.arg = (enc ? ENCRYPT : DECRYPT);
+	/* param0: length of the data to be encrypted */
+	creq->gph.param0 = cpu_to_be16(skreq->cryptlen);
+	creq->gph.param1 = 0;
+	/* param2: encryption data offset */
+	creq->gph.param2 = cpu_to_be16(ivsize);
+	creq->gph.param3 = 0;
+
+	creq->ctx_handle = nctx->u.ctx_handle;
+	creq->ctrl.s.ctxl = sizeof(struct flexi_crypto_context);
+
+	/* copy the iv */
+	memcpy(creq->iv, skreq->iv, ivsize);
+	creq->ivsize = ivsize;
+	creq->src = skreq->src;
+	creq->dst = skreq->dst;
+
+	nkreq->nctx = nctx;
+	nkreq->skreq = skreq;
+
+	/* send the crypto request */
+	return nitrox_process_se_request(nctx->ndev, creq,
+					 nitrox_skcipher_callback, skreq);
+}
+
+static int nitrox_aes_encrypt(struct skcipher_request *skreq)
+{
+	return nitrox_skcipher_crypt(skreq, true);
+}
+
+static int nitrox_aes_decrypt(struct skcipher_request *skreq)
+{
+	return nitrox_skcipher_crypt(skreq, false);
+}
+
+static int nitrox_3des_setkey(struct crypto_skcipher *cipher,
+			      const u8 *key, unsigned int keylen)
+{
+	if (keylen != DES3_EDE_KEY_SIZE) {
+		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	return nitrox_skcipher_setkey(cipher, 0, key, keylen);
+}
+
+static int nitrox_3des_encrypt(struct skcipher_request *skreq)
+{
+	return nitrox_skcipher_crypt(skreq, true);
+}
+
+static int nitrox_3des_decrypt(struct skcipher_request *skreq)
+{
+	return nitrox_skcipher_crypt(skreq, false);
+}
+
+static int nitrox_aes_xts_setkey(struct crypto_skcipher *cipher,
+				 const u8 *key, unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
+	struct nitrox_crypto_ctx *nctx = crypto_tfm_ctx(tfm);
+	struct flexi_crypto_context *fctx;
+	int aes_keylen, ret;
+
+	ret = xts_check_key(tfm, key, keylen);
+	if (ret)
+		return ret;
+
+	keylen /= 2;
+
+	aes_keylen = flexi_aes_keylen(keylen);
+	if (aes_keylen < 0) {
+		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	fctx = nctx->u.fctx;
+	/* copy KEY2 */
+	memcpy(fctx->auth.u.key2, (key + keylen), keylen);
+
+	return nitrox_skcipher_setkey(cipher, aes_keylen, key, keylen);
+}
+
+static int nitrox_aes_ctr_rfc3686_setkey(struct crypto_skcipher *cipher,
+					 const u8 *key, unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
+	struct nitrox_crypto_ctx *nctx = crypto_tfm_ctx(tfm);
+	struct flexi_crypto_context *fctx;
+	int aes_keylen;
+
+	if (keylen < CTR_RFC3686_NONCE_SIZE)
+		return -EINVAL;
+
+	fctx = nctx->u.fctx;
+
+	memcpy(fctx->crypto.iv, key + (keylen - CTR_RFC3686_NONCE_SIZE),
+	       CTR_RFC3686_NONCE_SIZE);
+
+	keylen -= CTR_RFC3686_NONCE_SIZE;
+
+	aes_keylen = flexi_aes_keylen(keylen);
+	if (aes_keylen < 0) {
+		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	return nitrox_skcipher_setkey(cipher, aes_keylen, key, keylen);
+}
+
+static struct skcipher_alg nitrox_skciphers[] = { {
+	.base = {
+		.cra_name = "cbc(aes)",
+		.cra_driver_name = "n5_cbc(aes)",
+		.cra_priority = PRIO,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_blocksize = AES_BLOCK_SIZE,
+		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.min_keysize = AES_MIN_KEY_SIZE,
+	.max_keysize = AES_MAX_KEY_SIZE,
+	.ivsize = AES_BLOCK_SIZE,
+	.setkey = nitrox_aes_setkey,
+	.encrypt = nitrox_aes_encrypt,
+	.decrypt = nitrox_aes_decrypt,
+	.init = nitrox_skcipher_init,
+	.exit = nitrox_skcipher_exit,
+}, {
+	.base = {
+		.cra_name = "ecb(aes)",
+		.cra_driver_name = "n5_ecb(aes)",
+		.cra_priority = PRIO,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_blocksize = AES_BLOCK_SIZE,
+		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.min_keysize = AES_MIN_KEY_SIZE,
+	.max_keysize = AES_MAX_KEY_SIZE,
+	.ivsize = AES_BLOCK_SIZE,
+	.setkey = nitrox_aes_setkey,
+	.encrypt = nitrox_aes_encrypt,
+	.decrypt = nitrox_aes_decrypt,
+	.init = nitrox_skcipher_init,
+	.exit = nitrox_skcipher_exit,
+}, {
+	.base = {
+		.cra_name = "cfb(aes)",
+		.cra_driver_name = "n5_cfb(aes)",
+		.cra_priority = PRIO,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_blocksize = AES_BLOCK_SIZE,
+		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.min_keysize = AES_MIN_KEY_SIZE,
+	.max_keysize = AES_MAX_KEY_SIZE,
+	.ivsize = AES_BLOCK_SIZE,
+	.setkey = nitrox_aes_setkey,
+	.encrypt = nitrox_aes_encrypt,
+	.decrypt = nitrox_aes_decrypt,
+	.init = nitrox_skcipher_init,
+	.exit = nitrox_skcipher_exit,
+}, {
+	.base = {
+		.cra_name = "xts(aes)",
+		.cra_driver_name = "n5_xts(aes)",
+		.cra_priority = PRIO,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_blocksize = AES_BLOCK_SIZE,
+		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.min_keysize = 2 * AES_MIN_KEY_SIZE,
+	.max_keysize = 2 * AES_MAX_KEY_SIZE,
+	.ivsize = AES_BLOCK_SIZE,
+	.setkey = nitrox_aes_xts_setkey,
+	.encrypt = nitrox_aes_encrypt,
+	.decrypt = nitrox_aes_decrypt,
+	.init = nitrox_skcipher_init,
+	.exit = nitrox_skcipher_exit,
+}, {
+	.base = {
+		.cra_name = "rfc3686(ctr(aes))",
+		.cra_driver_name = "n5_rfc3686(ctr(aes))",
+		.cra_priority = PRIO,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_blocksize = 1,
+		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.min_keysize = AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+	.max_keysize = AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+	.ivsize = CTR_RFC3686_IV_SIZE,
+	.init = nitrox_skcipher_init,
+	.exit = nitrox_skcipher_exit,
+	.setkey = nitrox_aes_ctr_rfc3686_setkey,
+	.encrypt = nitrox_aes_encrypt,
+	.decrypt = nitrox_aes_decrypt,
+}, {
+	.base = {
+		.cra_name = "cts(cbc(aes))",
+		.cra_driver_name = "n5_cts(cbc(aes))",
+		.cra_priority = PRIO,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_blocksize = AES_BLOCK_SIZE,
+		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
+		.cra_alignmask = 0,
+		.cra_type = &crypto_ablkcipher_type,
+		.cra_module = THIS_MODULE,
+	},
+	.min_keysize = AES_MIN_KEY_SIZE,
+	.max_keysize = AES_MAX_KEY_SIZE,
+	.ivsize = AES_BLOCK_SIZE,
+	.setkey = nitrox_aes_setkey,
+	.encrypt = nitrox_aes_encrypt,
+	.decrypt = nitrox_aes_decrypt,
+	.init = nitrox_skcipher_init,
+	.exit = nitrox_skcipher_exit,
+}, {
+	.base = {
+		.cra_name = "cbc(des3_ede)",
+		.cra_driver_name = "n5_cbc(des3_ede)",
+		.cra_priority = PRIO,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.min_keysize = DES3_EDE_KEY_SIZE,
+	.max_keysize = DES3_EDE_KEY_SIZE,
+	.ivsize = DES3_EDE_BLOCK_SIZE,
+	.setkey = nitrox_3des_setkey,
+	.encrypt = nitrox_3des_encrypt,
+	.decrypt = nitrox_3des_decrypt,
+	.init = nitrox_skcipher_init,
+	.exit = nitrox_skcipher_exit,
+}, {
+	.base = {
+		.cra_name = "ecb(des3_ede)",
+		.cra_driver_name = "n5_ecb(des3_ede)",
+		.cra_priority = PRIO,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.min_keysize = DES3_EDE_KEY_SIZE,
+	.max_keysize = DES3_EDE_KEY_SIZE,
+	.ivsize = DES3_EDE_BLOCK_SIZE,
+	.setkey = nitrox_3des_setkey,
+	.encrypt = nitrox_3des_encrypt,
+	.decrypt = nitrox_3des_decrypt,
+	.init = nitrox_skcipher_init,
+	.exit = nitrox_skcipher_exit,
+}
+
+};
+
+int nitrox_crypto_register(void)
+{
+	return crypto_register_skciphers(nitrox_skciphers,
+					 ARRAY_SIZE(nitrox_skciphers));
+}
+
+void nitrox_crypto_unregister(void)
+{
+	crypto_unregister_skciphers(nitrox_skciphers,
+				    ARRAY_SIZE(nitrox_skciphers));
+}
diff --git a/drivers/crypto/cavium/nitrox/nitrox_common.h b/drivers/crypto/cavium/nitrox/nitrox_common.h
new file mode 100644
index 0000000..4888c78
--- /dev/null
+++ b/drivers/crypto/cavium/nitrox/nitrox_common.h
@@ -0,0 +1,42 @@
+#ifndef __NITROX_COMMON_H
+#define __NITROX_COMMON_H
+
+#include "nitrox_dev.h"
+#include "nitrox_req.h"
+
+int nitrox_crypto_register(void);
+void nitrox_crypto_unregister(void);
+void *crypto_alloc_context(struct nitrox_device *ndev);
+void crypto_free_context(void *ctx);
+struct nitrox_device *nitrox_get_first_device(void);
+void nitrox_put_device(struct nitrox_device *ndev);
+
+void nitrox_pf_cleanup_isr(struct nitrox_device *ndev);
+int nitrox_pf_init_isr(struct nitrox_device *ndev);
+
+int nitrox_common_sw_init(struct nitrox_device *ndev);
+void nitrox_common_sw_cleanup(struct nitrox_device *ndev);
+
+void pkt_slc_resp_handler(unsigned long data);
+int nitrox_process_se_request(struct nitrox_device *ndev,
+			      struct se_crypto_request *req,
+			      completion_t cb,
+			      struct skcipher_request *skreq);
+void backlog_qflush_work(struct work_struct *work);
+
+void nitrox_config_emu_unit(struct nitrox_device *ndev);
+void nitrox_config_pkt_input_rings(struct nitrox_device *ndev);
+void nitrox_config_pkt_solicit_ports(struct nitrox_device *ndev);
+void nitrox_config_vfmode(struct nitrox_device *ndev, int mode);
+void nitrox_config_nps_unit(struct nitrox_device *ndev);
+void nitrox_config_pom_unit(struct nitrox_device *ndev);
+void nitrox_config_rand_unit(struct nitrox_device *ndev);
+void nitrox_config_efl_unit(struct nitrox_device *ndev);
+void nitrox_config_bmi_unit(struct nitrox_device *ndev);
+void nitrox_config_bmo_unit(struct nitrox_device *ndev);
+void nitrox_config_lbc_unit(struct nitrox_device *ndev);
+void invalidate_lbc(struct nitrox_device *ndev);
+void enable_pkt_input_ring(struct nitrox_device *ndev, int ring);
+void enable_pkt_solicit_port(struct nitrox_device *ndev, int port);
+
+#endif /* __NITROX_COMMON_H */
diff --git a/drivers/crypto/cavium/nitrox/nitrox_csr.h b/drivers/crypto/cavium/nitrox/nitrox_csr.h
new file mode 100644
index 0000000..30b04c4
--- /dev/null
+++ b/drivers/crypto/cavium/nitrox/nitrox_csr.h
@@ -0,0 +1,1084 @@
+#ifndef __NITROX_CSR_H
+#define __NITROX_CSR_H
+
+#include <asm/byteorder.h>
+#include <linux/types.h>
+
+/* EMU clusters */
+#define NR_CLUSTERS		4
+#define AE_CORES_PER_CLUSTER	20
+#define SE_CORES_PER_CLUSTER	16
+
+/* BIST registers */
+#define EMU_BIST_STATUSX(_i)	(0x1402700 + ((_i) * 0x40000))
+#define UCD_BIST_STATUS		0x12C0070
+#define NPS_CORE_BIST_REG	0x10000E8
+#define NPS_CORE_NPC_BIST_REG	0x1000128
+#define NPS_PKT_SLC_BIST_REG	0x1040088
+#define NPS_PKT_IN_BIST_REG	0x1040100
+#define POM_BIST_REG		0x11C0100
+#define BMI_BIST_REG		0x1140080
+#define EFL_CORE_BIST_REGX(_i)	(0x1240100 + ((_i) * 0x400))
+#define EFL_TOP_BIST_STAT	0x1241090
+#define BMO_BIST_REG		0x1180080
+#define LBC_BIST_STATUS		0x1200020
+#define PEM_BIST_STATUSX(_i)	(0x1080468 | ((_i) << 18))
+
+/* EMU registers */
+#define EMU_SE_ENABLEX(_i)	(0x1400000 + ((_i) * 0x40000))
+#define EMU_AE_ENABLEX(_i)	(0x1400008 + ((_i) * 0x40000))
+#define EMU_WD_INT_ENA_W1SX(_i)	(0x1402318 + ((_i) * 0x40000))
+#define EMU_GE_INT_ENA_W1SX(_i)	(0x1402518 + ((_i) * 0x40000))
+#define EMU_FUSE_MAPX(_i)	(0x1402708 + ((_i) * 0x40000))
+
+/* UCD registers */
+#define UCD_UCODE_LOAD_BLOCK_NUM	0x12C0010
+#define UCD_UCODE_LOAD_IDX_DATAX(_i)	(0x12C0018 + ((_i) * 0x20))
+#define UCD_SE_EID_UCODE_BLOCK_NUMX(_i)	(0x12C0000 + ((_i) * 0x1000))
+
+/* NPS core registers */
+#define NPS_CORE_GBL_VFCFG	0x1000000
+#define NPS_CORE_CONTROL	0x1000008
+#define NPS_CORE_INT_ACTIVE	0x1000080
+#define NPS_CORE_INT		0x10000A0
+#define NPS_CORE_INT_ENA_W1S	0x10000B8
+#define NPS_STATS_PKT_DMA_RD_CNT	0x1000180
+#define NPS_STATS_PKT_DMA_WR_CNT	0x1000190
+
+/* NPS packet registers */
+#define NPS_PKT_INT				0x1040018
+#define NPS_PKT_IN_RERR_HI		0x1040108
+#define NPS_PKT_IN_RERR_HI_ENA_W1S	0x1040120
+#define NPS_PKT_IN_RERR_LO		0x1040128
+#define NPS_PKT_IN_RERR_LO_ENA_W1S	0x1040140
+#define NPS_PKT_IN_ERR_TYPE		0x1040148
+#define NPS_PKT_IN_ERR_TYPE_ENA_W1S	0x1040160
+#define NPS_PKT_IN_INSTR_CTLX(_i)	(0x10060 + ((_i) * 0x40000))
+#define NPS_PKT_IN_INSTR_BADDRX(_i)	(0x10068 + ((_i) * 0x40000))
+#define NPS_PKT_IN_INSTR_RSIZEX(_i)	(0x10070 + ((_i) * 0x40000))
+#define NPS_PKT_IN_DONE_CNTSX(_i)	(0x10080 + ((_i) * 0x40000))
+#define NPS_PKT_IN_INSTR_BAOFF_DBELLX(_i)	(0x10078 + ((_i) * 0x40000))
+#define NPS_PKT_IN_INT_LEVELSX(_i)		(0x10088 + ((_i) * 0x40000))
+
+#define NPS_PKT_SLC_RERR_HI		0x1040208
+#define NPS_PKT_SLC_RERR_HI_ENA_W1S	0x1040220
+#define NPS_PKT_SLC_RERR_LO		0x1040228
+#define NPS_PKT_SLC_RERR_LO_ENA_W1S	0x1040240
+#define NPS_PKT_SLC_ERR_TYPE		0x1040248
+#define NPS_PKT_SLC_ERR_TYPE_ENA_W1S	0x1040260
+#define NPS_PKT_SLC_CTLX(_i)		(0x10000 + ((_i) * 0x40000))
+#define NPS_PKT_SLC_CNTSX(_i)		(0x10008 + ((_i) * 0x40000))
+#define NPS_PKT_SLC_INT_LEVELSX(_i)	(0x10010 + ((_i) * 0x40000))
+
+/* POM registers */
+#define POM_INT_ENA_W1S		0x11C0018
+#define POM_GRP_EXECMASKX(_i)	(0x11C1100 | ((_i) * 8))
+#define POM_INT		0x11C0000
+#define POM_PERF_CTL	0x11CC400
+
+/* BMI registers */
+#define BMI_INT		0x1140000
+#define BMI_CTL		0x1140020
+#define BMI_INT_ENA_W1S	0x1140018
+#define BMI_NPS_PKT_CNT	0x1140070
+
+/* EFL registers */
+#define EFL_CORE_INT_ENA_W1SX(_i)		(0x1240018 + ((_i) * 0x400))
+#define EFL_CORE_VF_ERR_INT0X(_i)		(0x1240050 + ((_i) * 0x400))
+#define EFL_CORE_VF_ERR_INT0_ENA_W1SX(_i)	(0x1240068 + ((_i) * 0x400))
+#define EFL_CORE_VF_ERR_INT1X(_i)		(0x1240070 + ((_i) * 0x400))
+#define EFL_CORE_VF_ERR_INT1_ENA_W1SX(_i)	(0x1240088 + ((_i) * 0x400))
+#define EFL_CORE_SE_ERR_INTX(_i)		(0x12400A0 + ((_i) * 0x400))
+#define EFL_RNM_CTL_STATUS			0x1241800
+#define EFL_CORE_INTX(_i)			(0x1240000 + ((_i) * 0x400))
+
+/* BMO registers */
+#define BMO_CTL2		0x1180028
+#define BMO_NPS_SLC_PKT_CNT	0x1180078
+
+/* LBC registers */
+#define LBC_INT			0x1200000
+#define LBC_INVAL_CTL		0x1201010
+#define LBC_PLM_VF1_64_INT	0x1202008
+#define LBC_INVAL_STATUS	0x1202010
+#define LBC_INT_ENA_W1S		0x1203000
+#define LBC_PLM_VF1_64_INT_ENA_W1S	0x1205008
+#define LBC_PLM_VF65_128_INT		0x1206008
+#define LBC_ELM_VF1_64_INT		0x1208000
+#define LBC_PLM_VF65_128_INT_ENA_W1S	0x1209008
+#define LBC_ELM_VF1_64_INT_ENA_W1S	0x120B000
+#define LBC_ELM_VF65_128_INT		0x120C000
+#define LBC_ELM_VF65_128_INT_ENA_W1S	0x120F000
+
+/* PEM registers */
+#define PEM0_INT 0x1080428
+
+/**
+ * struct emu_fuse_map - EMU Fuse Map Registers
+ * @ae_fuse: Fuse settings for AE 19..0
+ * @se_fuse: Fuse settings for SE 15..0
+ *
+ * A set bit indicates the unit is fuse disabled.
+ */
+union emu_fuse_map {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 valid : 1;
+		u64 raz_52_62 : 11;
+		u64 ae_fuse : 20;
+		u64 raz_16_31 : 16;
+		u64 se_fuse : 16;
+#else
+		u64 se_fuse : 16;
+		u64 raz_16_31 : 16;
+		u64 ae_fuse : 20;
+		u64 raz_52_62 : 11;
+		u64 valid : 1;
+#endif
+	} s;
+};
+
+/**
+ * struct emu_se_enable - Symmetric Engine Enable Registers
+ * @enable: Individual enables for each of the clusters
+ *   16 symmetric engines.
+ */
+union emu_se_enable {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz	: 48;
+		u64 enable : 16;
+#else
+		u64 enable : 16;
+		u64 raz	: 48;
+#endif
+	} s;
+};
+
+/**
+ * struct emu_ae_enable - EMU Asymmetric engines.
+ * @enable: Individual enables for each of the cluster's
+ *   20 Asymmetric Engines.
+ */
+union emu_ae_enable {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz	: 44;
+		u64 enable : 20;
+#else
+		u64 enable : 20;
+		u64 raz	: 44;
+#endif
+	} s;
+};
+
+/**
+ * struct emu_wd_int_ena_w1s - EMU Interrupt Enable Registers
+ * @ae_wd: Reads or sets enable for EMU(0..3)_WD_INT[AE_WD]
+ * @se_wd: Reads or sets enable for EMU(0..3)_WD_INT[SE_WD]
+ */
+union emu_wd_int_ena_w1s {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz2 : 12;
+		u64 ae_wd : 20;
+		u64 raz1 : 16;
+		u64 se_wd : 16;
+#else
+		u64 se_wd : 16;
+		u64 raz1 : 16;
+		u64 ae_wd : 20;
+		u64 raz2 : 12;
+#endif
+	} s;
+};
+
+/**
+ * struct emu_ge_int_ena_w1s - EMU Interrupt Enable set registers
+ * @ae_ge: Reads or sets enable for EMU(0..3)_GE_INT[AE_GE]
+ * @se_ge: Reads or sets enable for EMU(0..3)_GE_INT[SE_GE]
+ */
+union emu_ge_int_ena_w1s {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_52_63 : 12;
+		u64 ae_ge : 20;
+		u64 raz_16_31: 16;
+		u64 se_ge : 16;
+#else
+		u64 se_ge : 16;
+		u64 raz_16_31: 16;
+		u64 ae_ge : 20;
+		u64 raz_52_63 : 12;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_pkt_slc_ctl - Solicited Packet Out Control Registers
+ * @rh: Indicates whether to remove or include the response header
+ *   1 = Include, 0 = Remove
+ * @z: If set, 8 trailing 0x00 bytes will be added to the end of the
+ *   outgoing packet.
+ * @enb: Enable for this port.
+ */
+union nps_pkt_slc_ctl {
+	u64 value;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 raz : 61;
+		u64 rh : 1;
+		u64 z : 1;
+		u64 enb : 1;
+#else
+		u64 enb : 1;
+		u64 z : 1;
+		u64 rh : 1;
+		u64 raz : 61;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_pkt_slc_cnts - Solicited Packet Out Count Registers
+ * @slc_int: Returns a 1 when:
+ *   NPS_PKT_SLC(i)_CNTS[CNT] > NPS_PKT_SLC(i)_INT_LEVELS[CNT], or
+ *   NPS_PKT_SLC(i)_CNTS[TIMER] > NPS_PKT_SLC(i)_INT_LEVELS[TIMET].
+ *   To clear the bit, the CNTS register must be written to clear.
+ * @in_int: Returns a 1 when:
+ *   NPS_PKT_IN(i)_DONE_CNTS[CNT] > NPS_PKT_IN(i)_INT_LEVELS[CNT].
+ *   To clear the bit, the DONE_CNTS register must be written to clear.
+ * @mbox_int: Returns a 1 when:
+ *   NPS_PKT_MBOX_PF_VF(i)_INT[INTR] is set. To clear the bit,
+ *   write NPS_PKT_MBOX_PF_VF(i)_INT[INTR] with 1.
+ * @timer: Timer, incremented every 2048 coprocessor clock cycles
+ *   when [CNT] is not zero. The hardware clears both [TIMER] and
+ *   [INT] when [CNT] goes to 0.
+ * @cnt: Packet counter. Hardware adds to [CNT] as it sends packets out.
+ *   On a write to this CSR, hardware subtracts the amount written to the
+ *   [CNT] field from [CNT].
+ */
+union nps_pkt_slc_cnts {
+	u64 value;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 slc_int : 1;
+		u64 uns_int : 1;
+		u64 in_int : 1;
+		u64 mbox_int : 1;
+		u64 resend : 1;
+		u64 raz : 5;
+		u64 timer : 22;
+		u64 cnt : 32;
+#else
+		u64 cnt	: 32;
+		u64 timer : 22;
+		u64 raz	: 5;
+		u64 resend : 1;
+		u64 mbox_int : 1;
+		u64 in_int : 1;
+		u64 uns_int : 1;
+		u64 slc_int : 1;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_pkt_slc_int_levels - Solicited Packet Out Interrupt Levels
+ *   Registers.
+ * @bmode: Determines whether NPS_PKT_SLC_CNTS[CNT] is a byte or
+ *   packet counter.
+ * @timet: Output port counter time interrupt threshold.
+ * @cnt: Output port counter interrupt threshold.
+ */
+union nps_pkt_slc_int_levels {
+	u64 value;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 bmode : 1;
+		u64 raz	: 9;
+		u64 timet : 22;
+		u64 cnt	: 32;
+#else
+		u64 cnt : 32;
+		u64 timet : 22;
+		u64 raz : 9;
+		u64 bmode : 1;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_pkt_inst - NPS Packet Interrupt Register
+ * @in_err: Set when any NPS_PKT_IN_RERR_HI/LO bit and
+ *    corresponding NPS_PKT_IN_RERR_*_ENA_* bit are bot set.
+ * @uns_err: Set when any NSP_PKT_UNS_RERR_HI/LO bit and
+ *    corresponding NPS_PKT_UNS_RERR_*_ENA_* bit are both set.
+ * @slc_er: Set when any NSP_PKT_SLC_RERR_HI/LO bit and
+ *    corresponding NPS_PKT_SLC_RERR_*_ENA_* bit are both set.
+ */
+union nps_pkt_int {
+	u64 value;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 raz	: 54;
+		u64 uns_wto : 1;
+		u64 in_err : 1;
+		u64 uns_err : 1;
+		u64 slc_err : 1;
+		u64 in_dbe : 1;
+		u64 in_sbe : 1;
+		u64 uns_dbe : 1;
+		u64 uns_sbe : 1;
+		u64 slc_dbe : 1;
+		u64 slc_sbe : 1;
+#else
+		u64 slc_sbe : 1;
+		u64 slc_dbe : 1;
+		u64 uns_sbe : 1;
+		u64 uns_dbe : 1;
+		u64 in_sbe : 1;
+		u64 in_dbe : 1;
+		u64 slc_err : 1;
+		u64 uns_err : 1;
+		u64 in_err : 1;
+		u64 uns_wto : 1;
+		u64 raz	: 54;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_pkt_in_done_cnts - Input instruction ring counts registers
+ * @slc_cnt: Returns a 1 when:
+ *    NPS_PKT_SLC(i)_CNTS[CNT] > NPS_PKT_SLC(i)_INT_LEVELS[CNT], or
+ *    NPS_PKT_SLC(i)_CNTS[TIMER] > NPS_PKT_SCL(i)_INT_LEVELS[TIMET]
+ *    To clear the bit, the CNTS register must be
+ *    written to clear the underlying condition
+ * @uns_int: Return a 1 when:
+ *    NPS_PKT_UNS(i)_CNTS[CNT] > NPS_PKT_UNS(i)_INT_LEVELS[CNT], or
+ *    NPS_PKT_UNS(i)_CNTS[TIMER] > NPS_PKT_UNS(i)_INT_LEVELS[TIMET]
+ *    To clear the bit, the CNTS register must be
+ *    written to clear the underlying condition
+ * @in_int: Returns a 1 when:
+ *    NPS_PKT_IN(i)_DONE_CNTS[CNT] > NPS_PKT_IN(i)_INT_LEVELS[CNT]
+ *    To clear the bit, the DONE_CNTS register
+ *    must be written to clear the underlying condition
+ * @mbox_int: Returns a 1 when:
+ *    NPS_PKT_MBOX_PF_VF(i)_INT[INTR] is set.
+ *    To clear the bit, write NPS_PKT_MBOX_PF_VF(i)_INT[INTR]
+ *    with 1.
+ * @resend: A write of 1 will resend an MSI-X interrupt message if any
+ *    of the following conditions are true for this ring "i".
+ *    NPS_PKT_SLC(i)_CNTS[CNT] > NPS_PKT_SLC(i)_INT_LEVELS[CNT]
+ *    NPS_PKT_SLC(i)_CNTS[TIMER] > NPS_PKT_SLC(i)_INT_LEVELS[TIMET]
+ *    NPS_PKT_UNS(i)_CNTS[CNT] > NPS_PKT_UNS(i)_INT_LEVELS[CNT]
+ *    NPS_PKT_UNS(i)_CNTS[TIMER] > NPS_PKT_UNS(i)_INT_LEVELS[TIMET]
+ *    NPS_PKT_IN(i)_DONE_CNTS[CNT] > NPS_PKT_IN(i)_INT_LEVELS[CNT]
+ *    NPS_PKT_MBOX_PF_VF(i)_INT[INTR] is set
+ * @cnt: Packet counter. Hardware adds to [CNT] as it reads
+ *    packets. On a write to this CSR, hardware substracts the
+ *    amount written to the [CNT] field from [CNT], which will
+ *    clear PKT_IN(i)_INT_STATUS[INTR] if [CNT] becomes <=
+ *    NPS_PKT_IN(i)_INT_LEVELS[CNT]. This register should be
+ *    cleared before enabling a ring by reading the current
+ *    value and writing it back.
+ */
+union nps_pkt_in_done_cnts {
+	u64 value;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 slc_int : 1;
+		u64 uns_int : 1;
+		u64 in_int : 1;
+		u64 mbox_int : 1;
+		u64 resend : 1;
+		u64 raz : 27;
+		u64 cnt	: 32;
+#else
+		u64 cnt	: 32;
+		u64 raz	: 27;
+		u64 resend : 1;
+		u64 mbox_int : 1;
+		u64 in_int : 1;
+		u64 uns_int : 1;
+		u64 slc_int : 1;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_pkt_in_instr_ctl - Input Instruction Ring Control Registers.
+ * @is64b: If 1, the ring uses 64-byte instructions. If 0, the
+ *   ring uses 32-byte instructions.
+ * @enb: Enable for the input ring.
+ */
+union nps_pkt_in_instr_ctl {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz	: 62;
+		u64 is64b : 1;
+		u64 enb	: 1;
+#else
+		u64 enb	: 1;
+		u64 is64b : 1;
+		u64 raz : 62;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_pkt_in_instr_rsize - Input instruction ring size registers
+ * @rsize: Ring size (number of instructions)
+ */
+union nps_pkt_in_instr_rsize {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz	: 32;
+		u64 rsize : 32;
+#else
+		u64 rsize : 32;
+		u64 raz	: 32;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_pkt_in_instr_baoff_dbell - Input instruction ring
+ *   base address offset and doorbell registers
+ * @aoff: Address offset. The offset from the NPS_PKT_IN_INSTR_BADDR
+ *   where the next pointer is read.
+ * @dbell: Pointer list doorbell count. Write operations to this field
+ *   increments the present value here. Read operations return the
+ *   present value.
+ */
+union nps_pkt_in_instr_baoff_dbell {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 aoff : 32;
+		u64 dbell : 32;
+#else
+		u64 dbell : 32;
+		u64 aoff : 32;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_core_int_ena_w1s - NPS core interrupt enable set register
+ * @host_nps_wr_err: Reads or sets enable for
+ *   NPS_CORE_INT[HOST_NPS_WR_ERR].
+ * @npco_dma_malform: Reads or sets enable for
+ *   NPS_CORE_INT[NPCO_DMA_MALFORM].
+ * @exec_wr_timeout: Reads or sets enable for
+ *   NPS_CORE_INT[EXEC_WR_TIMEOUT].
+ * @host_wr_timeout: Reads or sets enable for
+ *   NPS_CORE_INT[HOST_WR_TIMEOUT].
+ * @host_wr_err: Reads or sets enable for
+ *   NPS_CORE_INT[HOST_WR_ERR]
+ */
+union nps_core_int_ena_w1s {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz4 : 55;
+		u64 host_nps_wr_err : 1;
+		u64 npco_dma_malform : 1;
+		u64 exec_wr_timeout : 1;
+		u64 host_wr_timeout : 1;
+		u64 host_wr_err : 1;
+		u64 raz3 : 1;
+		u64 raz2 : 1;
+		u64 raz1 : 1;
+		u64 raz0 : 1;
+#else
+		u64 raz0 : 1;
+		u64 raz1 : 1;
+		u64 raz2 : 1;
+		u64 raz3 : 1;
+		u64 host_wr_err	: 1;
+		u64 host_wr_timeout : 1;
+		u64 exec_wr_timeout : 1;
+		u64 npco_dma_malform : 1;
+		u64 host_nps_wr_err : 1;
+		u64 raz4 : 55;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_core_gbl_vfcfg - Global VF Configuration Register.
+ * @ilk_disable: When set, this bit indicates that the ILK interface has
+ *    been disabled.
+ * @obaf: BMO allocation control
+ *    0 = allocate per queue
+ *    1 = allocate per VF
+ * @ibaf: BMI allocation control
+ *    0 = allocate per queue
+ *    1 = allocate per VF
+ * @zaf: ZIP allocation control
+ *    0 = allocate per queue
+ *    1 = allocate per VF
+ * @aeaf: AE allocation control
+ *    0 = allocate per queue
+ *    1 = allocate per VF
+ * @seaf: SE allocation control
+ *    0 = allocation per queue
+ *    1 = allocate per VF
+ * @cfg: VF/PF mode.
+ */
+union nps_core_gbl_vfcfg {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64  raz :55;
+		u64  ilk_disable :1;
+		u64  obaf :1;
+		u64  ibaf :1;
+		u64  zaf :1;
+		u64  aeaf :1;
+		u64  seaf :1;
+		u64  cfg :3;
+#else
+		u64  cfg :3;
+		u64  seaf :1;
+		u64  aeaf :1;
+		u64  zaf :1;
+		u64  ibaf :1;
+		u64  obaf :1;
+		u64  ilk_disable :1;
+		u64  raz :55;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_core_int_active - NPS Core Interrupt Active Register
+ * @resend: Resend MSI-X interrupt if needs to handle interrupts
+ *    Sofware can set this bit and then exit the ISR.
+ * @ocla: Set when any OCLA(0)_INT and corresponding OCLA(0_INT_ENA_W1C
+ *    bit are set
+ * @mbox: Set when any NPS_PKT_MBOX_INT_LO/HI and corresponding
+ *    NPS_PKT_MBOX_INT_LO_ENA_W1C/HI_ENA_W1C bits are set
+ * @emu: bit i is set in [EMU] when any EMU(i)_INT bit is set
+ * @bmo: Set when any BMO_INT bit is set
+ * @bmi: Set when any BMI_INT bit is set or when any non-RO
+ *    BMI_INT and corresponding BMI_INT_ENA_W1C bits are both set
+ * @aqm: Set when any AQM_INT bit is set
+ * @zqm: Set when any ZQM_INT bit is set
+ * @efl: Set when any EFL_INT RO bit is set or when any non-RO EFL_INT
+ *    and corresponding EFL_INT_ENA_W1C bits are both set
+ * @ilk: Set when any ILK_INT bit is set
+ * @lbc: Set when any LBC_INT RO bit is set or when any non-RO LBC_INT
+ *    and corresponding LBC_INT_ENA_W1C bits are bot set
+ * @pem: Set when any PEM(0)_INT RO bit is set or when any non-RO
+ *    PEM(0)_INT and corresponding PEM(0)_INT_ENA_W1C bit are both set
+ * @ucd: Set when any UCD_INT bit is set
+ * @zctl: Set when any ZIP_INT RO bit is set or when any non-RO ZIP_INT
+ *    and corresponding ZIP_INT_ENA_W1C bits are both set
+ * @lbm: Set when any LBM_INT bit is set
+ * @nps_pkt: Set when any NPS_PKT_INT bit is set
+ * @nps_core: Set when any NPS_CORE_INT RO bit is set or when non-RO
+ *    NPS_CORE_INT and corresponding NSP_CORE_INT_ENA_W1C bits are both set
+ */
+union nps_core_int_active {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 resend : 1;
+		u64 raz	: 43;
+		u64 ocla : 1;
+		u64 mbox : 1;
+		u64 emu	: 4;
+		u64 bmo	: 1;
+		u64 bmi	: 1;
+		u64 aqm	: 1;
+		u64 zqm	: 1;
+		u64 efl	: 1;
+		u64 ilk	: 1;
+		u64 lbc	: 1;
+		u64 pem	: 1;
+		u64 pom	: 1;
+		u64 ucd	: 1;
+		u64 zctl : 1;
+		u64 lbm	: 1;
+		u64 nps_pkt : 1;
+		u64 nps_core : 1;
+#else
+		u64 nps_core : 1;
+		u64 nps_pkt : 1;
+		u64 lbm	: 1;
+		u64 zctl: 1;
+		u64 ucd	: 1;
+		u64 pom	: 1;
+		u64 pem	: 1;
+		u64 lbc	: 1;
+		u64 ilk	: 1;
+		u64 efl	: 1;
+		u64 zqm	: 1;
+		u64 aqm	: 1;
+		u64 bmi	: 1;
+		u64 bmo	: 1;
+		u64 emu	: 4;
+		u64 mbox : 1;
+		u64 ocla : 1;
+		u64 raz	: 43;
+		u64 resend : 1;
+#endif
+	} s;
+};
+
+/**
+ * struct efl_core_int - EFL Interrupt Registers
+ * @epci_decode_err: EPCI decoded a transacation that was unknown
+ *    This error should only occurred when there is a micrcode/SE error
+ *    and should be considered fatal
+ * @ae_err: An AE uncorrectable error occurred.
+ *    See EFL_CORE(0..3)_AE_ERR_INT
+ * @se_err: An SE uncorrectable error occurred.
+ *    See EFL_CORE(0..3)_SE_ERR_INT
+ * @dbe: Double-bit error occurred in EFL
+ * @sbe: Single-bit error occurred in EFL
+ * @d_left: Asserted when new POM-Header-BMI-data is
+ *    being sent to an Exec, and that Exec has Not read all BMI
+ *    data associated with the previous POM header
+ * @len_ovr: Asserted when an Exec-Read is issued that is more than
+ *    14 greater in length that the BMI data left to be read
+ */
+union efl_core_int {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz	: 57;
+		u64 epci_decode_err : 1;
+		u64 ae_err : 1;
+		u64 se_err : 1;
+		u64 dbe	: 1;
+		u64 sbe	: 1;
+		u64 d_left : 1;
+		u64 len_ovr : 1;
+#else
+		u64 len_ovr : 1;
+		u64 d_left : 1;
+		u64 sbe	: 1;
+		u64 dbe	: 1;
+		u64 se_err : 1;
+		u64 ae_err : 1;
+		u64 epci_decode_err  : 1;
+		u64 raz	: 57;
+#endif
+	} s;
+};
+
+/**
+ * struct efl_core_int_ena_w1s - EFL core interrupt enable set register
+ * @epci_decode_err: Reads or sets enable for
+ *   EFL_CORE(0..3)_INT[EPCI_DECODE_ERR].
+ * @d_left: Reads or sets enable for
+ *   EFL_CORE(0..3)_INT[D_LEFT].
+ * @len_ovr: Reads or sets enable for
+ *   EFL_CORE(0..3)_INT[LEN_OVR].
+ */
+union efl_core_int_ena_w1s {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_7_63 : 57;
+		u64 epci_decode_err : 1;
+		u64 raz_2_5 : 4;
+		u64 d_left : 1;
+		u64 len_ovr : 1;
+#else
+		u64 len_ovr : 1;
+		u64 d_left : 1;
+		u64 raz_2_5 : 4;
+		u64 epci_decode_err : 1;
+		u64 raz_7_63 : 57;
+#endif
+	} s;
+};
+
+/**
+ * struct efl_rnm_ctl_status - RNM Control and Status Register
+ * @ent_sel: Select input to RNM FIFO
+ * @exp_ent: Exported entropy enable for random number generator
+ * @rng_rst: Reset to RNG. Setting this bit to 1 cancels the generation
+ *    of the current random number.
+ * @rnm_rst: Reset the RNM. Setting this bit to 1 clears all sorted numbers
+ *    in the random number memory.
+ * @rng_en: Enabled the output of the RNG.
+ * @ent_en: Entropy enable for random number generator.
+ */
+union efl_rnm_ctl_status {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_9_63 : 55;
+		u64 ent_sel : 4;
+		u64 exp_ent : 1;
+		u64 rng_rst : 1;
+		u64 rnm_rst : 1;
+		u64 rng_en : 1;
+		u64 ent_en : 1;
+#else
+		u64 ent_en : 1;
+		u64 rng_en : 1;
+		u64 rnm_rst : 1;
+		u64 rng_rst : 1;
+		u64 exp_ent : 1;
+		u64 ent_sel : 4;
+		u64 raz_9_63 : 55;
+#endif
+	} s;
+};
+
+/**
+ * struct bmi_ctl - BMI control register
+ * @ilk_hdrq_thrsh: Maximum number of header queue locations
+ *   that ILK packets may consume. When the threshold is
+ *   exceeded ILK_XOFF is sent to the BMI_X2P_ARB.
+ * @nps_hdrq_thrsh: Maximum number of header queue locations
+ *   that NPS packets may consume. When the threshold is
+ *   exceeded NPS_XOFF is sent to the BMI_X2P_ARB.
+ * @totl_hdrq_thrsh: Maximum number of header queue locations
+ *   that the sum of ILK and NPS packets may consume.
+ * @ilk_free_thrsh: Maximum number of buffers that ILK packet
+ *   flows may consume before ILK_XOFF is sent to the BMI_X2P_ARB.
+ * @nps_free_thrsh: Maximum number of buffers that NPS packet
+ *   flows may consume before NPS XOFF is sent to the BMI_X2p_ARB.
+ * @totl_free_thrsh: Maximum number of buffers that bot ILK and NPS
+ *   packet flows may consume before both NPS_XOFF and ILK_XOFF
+ *   are asserted to the BMI_X2P_ARB.
+ * @max_pkt_len: Maximum packet length, integral number of 256B
+ *   buffers.
+ */
+union bmi_ctl {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_56_63 : 8;
+		u64 ilk_hdrq_thrsh : 8;
+		u64 nps_hdrq_thrsh : 8;
+		u64 totl_hdrq_thrsh : 8;
+		u64 ilk_free_thrsh : 8;
+		u64 nps_free_thrsh : 8;
+		u64 totl_free_thrsh : 8;
+		u64 max_pkt_len : 8;
+#else
+		u64 max_pkt_len : 8;
+		u64 totl_free_thrsh : 8;
+		u64 nps_free_thrsh : 8;
+		u64 ilk_free_thrsh : 8;
+		u64 totl_hdrq_thrsh : 8;
+		u64 nps_hdrq_thrsh : 8;
+		u64 ilk_hdrq_thrsh : 8;
+		u64 raz_56_63 : 8;
+#endif
+	} s;
+};
+
+/**
+ * struct bmi_int_ena_w1s - BMI interrupt enable set register
+ * @ilk_req_oflw: Reads or sets enable for
+ *   BMI_INT[ILK_REQ_OFLW].
+ * @nps_req_oflw: Reads or sets enable for
+ *   BMI_INT[NPS_REQ_OFLW].
+ * @fpf_undrrn: Reads or sets enable for
+ *   BMI_INT[FPF_UNDRRN].
+ * @eop_err_ilk: Reads or sets enable for
+ *   BMI_INT[EOP_ERR_ILK].
+ * @eop_err_nps: Reads or sets enable for
+ *   BMI_INT[EOP_ERR_NPS].
+ * @sop_err_ilk: Reads or sets enable for
+ *   BMI_INT[SOP_ERR_ILK].
+ * @sop_err_nps: Reads or sets enable for
+ *   BMI_INT[SOP_ERR_NPS].
+ * @pkt_rcv_err_ilk: Reads or sets enable for
+ *   BMI_INT[PKT_RCV_ERR_ILK].
+ * @pkt_rcv_err_nps: Reads or sets enable for
+ *   BMI_INT[PKT_RCV_ERR_NPS].
+ * @max_len_err_ilk: Reads or sets enable for
+ *   BMI_INT[MAX_LEN_ERR_ILK].
+ * @max_len_err_nps: Reads or sets enable for
+ *   BMI_INT[MAX_LEN_ERR_NPS].
+ */
+union bmi_int_ena_w1s {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_13_63	: 51;
+		u64 ilk_req_oflw : 1;
+		u64 nps_req_oflw : 1;
+		u64 raz_10 : 1;
+		u64 raz_9 : 1;
+		u64 fpf_undrrn	: 1;
+		u64 eop_err_ilk	: 1;
+		u64 eop_err_nps	: 1;
+		u64 sop_err_ilk	: 1;
+		u64 sop_err_nps	: 1;
+		u64 pkt_rcv_err_ilk : 1;
+		u64 pkt_rcv_err_nps : 1;
+		u64 max_len_err_ilk : 1;
+		u64 max_len_err_nps : 1;
+#else
+		u64 max_len_err_nps : 1;
+		u64 max_len_err_ilk : 1;
+		u64 pkt_rcv_err_nps : 1;
+		u64 pkt_rcv_err_ilk : 1;
+		u64 sop_err_nps	: 1;
+		u64 sop_err_ilk	: 1;
+		u64 eop_err_nps	: 1;
+		u64 eop_err_ilk	: 1;
+		u64 fpf_undrrn	: 1;
+		u64 raz_9 : 1;
+		u64 raz_10 : 1;
+		u64 nps_req_oflw : 1;
+		u64 ilk_req_oflw : 1;
+		u64 raz_13_63 : 51;
+#endif
+	} s;
+};
+
+/**
+ * struct bmo_ctl2 - BMO Control2 Register
+ * @arb_sel: Determines P2X Arbitration
+ * @ilk_buf_thrsh: Maximum number of buffers that the
+ *    ILK packet flows may consume before ILK XOFF is
+ *    asserted to the POM.
+ * @nps_slc_buf_thrsh: Maximum number of buffers that the
+ *    NPS_SLC packet flow may consume before NPS_SLC XOFF is
+ *    asserted to the POM.
+ * @nps_uns_buf_thrsh: Maximum number of buffers that the
+ *    NPS_UNS packet flow may consume before NPS_UNS XOFF is
+ *    asserted to the POM.
+ * @totl_buf_thrsh: Maximum number of buffers that ILK, NPS_UNS and
+ *    NPS_SLC packet flows may consume before NPS_UNS XOFF, NSP_SLC and
+ *    ILK_XOFF are all asserted POM.
+ */
+union bmo_ctl2 {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 arb_sel : 1;
+		u64 raz_32_62 : 31;
+		u64 ilk_buf_thrsh : 8;
+		u64 nps_slc_buf_thrsh : 8;
+		u64 nps_uns_buf_thrsh : 8;
+		u64 totl_buf_thrsh : 8;
+#else
+		u64 totl_buf_thrsh : 8;
+		u64 nps_uns_buf_thrsh : 8;
+		u64 nps_slc_buf_thrsh : 8;
+		u64 ilk_buf_thrsh : 8;
+		u64 raz_32_62 : 31;
+		u64 arb_sel : 1;
+#endif
+	} s;
+};
+
+/**
+ * struct pom_int_ena_w1s - POM interrupt enable set register
+ * @illegal_intf: Reads or sets enable for POM_INT[ILLEGAL_INTF].
+ * @illegal_dport: Reads or sets enable for POM_INT[ILLEGAL_DPORT].
+ */
+union pom_int_ena_w1s {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz2 : 60;
+		u64 illegal_intf : 1;
+		u64 illegal_dport : 1;
+		u64 raz1 : 1;
+		u64 raz0 : 1;
+#else
+		u64 raz0 : 1;
+		u64 raz1 : 1;
+		u64 illegal_dport : 1;
+		u64 illegal_intf : 1;
+		u64 raz2 : 60;
+#endif
+	} s;
+};
+
+/**
+ * struct lbc_inval_ctl - LBC invalidation control register
+ * @wait_timer: Wait timer for wait state. [WAIT_TIMER] must
+ *   always be written with its reset value.
+ * @cam_inval_start: Software should write [CAM_INVAL_START]=1
+ *   to initiate an LBC cache invalidation. After this, software
+ *   should read LBC_INVAL_STATUS until LBC_INVAL_STATUS[DONE] is set.
+ *   LBC hardware clears [CAVM_INVAL_START] before software can
+ *   observed LBC_INVAL_STATUS[DONE] to be set
+ */
+union lbc_inval_ctl {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz2 : 48;
+		u64 wait_timer : 8;
+		u64 raz1 : 6;
+		u64 cam_inval_start : 1;
+		u64 raz0 : 1;
+#else
+		u64 raz0 : 1;
+		u64 cam_inval_start : 1;
+		u64 raz1 : 6;
+		u64 wait_timer : 8;
+		u64 raz2 : 48;
+#endif
+	} s;
+};
+
+/**
+ * struct lbc_int_ena_w1s - LBC interrupt enable set register
+ * @cam_hard_err: Reads or sets enable for LBC_INT[CAM_HARD_ERR].
+ * @cam_inval_abort: Reads or sets enable for LBC_INT[CAM_INVAL_ABORT].
+ * @over_fetch_err: Reads or sets enable for LBC_INT[OVER_FETCH_ERR].
+ * @cache_line_to_err: Reads or sets enable for
+ *   LBC_INT[CACHE_LINE_TO_ERR].
+ * @cam_soft_err: Reads or sets enable for
+ *   LBC_INT[CAM_SOFT_ERR].
+ * @dma_rd_err: Reads or sets enable for
+ *   LBC_INT[DMA_RD_ERR].
+ */
+union lbc_int_ena_w1s {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_10_63 : 54;
+		u64 cam_hard_err : 1;
+		u64 cam_inval_abort : 1;
+		u64 over_fetch_err : 1;
+		u64 cache_line_to_err : 1;
+		u64 raz_2_5 : 4;
+		u64 cam_soft_err : 1;
+		u64 dma_rd_err : 1;
+#else
+		u64 dma_rd_err : 1;
+		u64 cam_soft_err : 1;
+		u64 raz_2_5 : 4;
+		u64 cache_line_to_err : 1;
+		u64 over_fetch_err : 1;
+		u64 cam_inval_abort : 1;
+		u64 cam_hard_err : 1;
+		u64 raz_10_63 : 54;
+#endif
+	} s;
+};
+
+/**
+ * struct lbc_int - LBC interrupt summary register
+ * @cam_hard_err: indicates a fatal hardware error.
+ *   It requires system reset.
+ *   When [CAM_HARD_ERR] is set, LBC stops logging any new information in
+ *   LBC_POM_MISS_INFO_LOG,
+ *   LBC_POM_MISS_ADDR_LOG,
+ *   LBC_EFL_MISS_INFO_LOG, and
+ *   LBC_EFL_MISS_ADDR_LOG.
+ *   Software should sample them.
+ * @cam_inval_abort: indicates a fatal hardware error.
+ *   System reset is required.
+ * @over_fetch_err: indicates a fatal hardware error
+ *   System reset is required
+ * @cache_line_to_err: is a debug feature.
+ *   This timeout interrupt bit tells the software that
+ *   a cacheline in LBC has non-zero usage and the context
+ *   has not been used for greater than the
+ *   LBC_TO_CNT[TO_CNT] time interval.
+ * @sbe: Memory SBE error. This is recoverable via ECC.
+ *   See LBC_ECC_INT for more details.
+ * @dbe: Memory DBE error. This is a fatal and requires a
+ *   system reset.
+ * @pref_dat_len_mismatch_err: Summary bit for context length
+ *   mismatch errors.
+ * @rd_dat_len_mismatch_err: Summary bit for SE read data length
+ *   greater than data prefect length errors.
+ * @cam_soft_err: is recoverable. Software must complete a
+ *   LBC_INVAL_CTL[CAM_INVAL_START] invalidation sequence and
+ *   then clear [CAM_SOFT_ERR].
+ * @dma_rd_err: A context prefect read of host memory returned with
+ *   a read error.
+ */
+union lbc_int {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_10_63 : 54;
+		u64 cam_hard_err : 1;
+		u64 cam_inval_abort : 1;
+		u64 over_fetch_err : 1;
+		u64 cache_line_to_err : 1;
+		u64 sbe : 1;
+		u64 dbe	: 1;
+		u64 pref_dat_len_mismatch_err : 1;
+		u64 rd_dat_len_mismatch_err : 1;
+		u64 cam_soft_err : 1;
+		u64 dma_rd_err : 1;
+#else
+		u64 dma_rd_err : 1;
+		u64 cam_soft_err : 1;
+		u64 rd_dat_len_mismatch_err : 1;
+		u64 pref_dat_len_mismatch_err : 1;
+		u64 dbe	: 1;
+		u64 sbe	: 1;
+		u64 cache_line_to_err : 1;
+		u64 over_fetch_err : 1;
+		u64 cam_inval_abort : 1;
+		u64 cam_hard_err : 1;
+		u64 raz_10_63 : 54;
+#endif
+	} s;
+};
+
+/**
+ * struct lbc_inval_status: LBC Invalidation status register
+ * @cam_clean_entry_complete_cnt: The number of entries that are
+ *   cleaned up successfully.
+ * @cam_clean_entry_cnt: The number of entries that have the CAM
+ *   inval command issued.
+ * @cam_inval_state: cam invalidation FSM state
+ * @cam_inval_abort: cam invalidation abort
+ * @cam_rst_rdy: lbc_cam reset ready
+ * @done: LBC clears [DONE] when
+ *   LBC_INVAL_CTL[CAM_INVAL_START] is written with a one,
+ *   and sets [DONE] when it completes the invalidation
+ *   sequence.
+ */
+union lbc_inval_status {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz3 : 23;
+		u64 cam_clean_entry_complete_cnt : 9;
+		u64 raz2 : 7;
+		u64 cam_clean_entry_cnt : 9;
+		u64 raz1 : 5;
+		u64 cam_inval_state : 3;
+		u64 raz0 : 5;
+		u64 cam_inval_abort : 1;
+		u64 cam_rst_rdy	: 1;
+		u64 done : 1;
+#else
+		u64 done : 1;
+		u64 cam_rst_rdy : 1;
+		u64 cam_inval_abort : 1;
+		u64 raz0 : 5;
+		u64 cam_inval_state : 3;
+		u64 raz1 : 5;
+		u64 cam_clean_entry_cnt : 9;
+		u64 raz2 : 7;
+		u64 cam_clean_entry_complete_cnt : 9;
+		u64 raz3 : 23;
+#endif
+	} s;
+};
+
+#endif /* __NITROX_CSR_H */
diff --git a/drivers/crypto/cavium/nitrox/nitrox_dev.h b/drivers/crypto/cavium/nitrox/nitrox_dev.h
new file mode 100644
index 0000000..57858b0
--- /dev/null
+++ b/drivers/crypto/cavium/nitrox/nitrox_dev.h
@@ -0,0 +1,179 @@
+#ifndef __NITROX_DEV_H
+#define __NITROX_DEV_H
+
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+
+#define VERSION_LEN 32
+
+struct nitrox_cmdq {
+	/* command queue lock */
+	spinlock_t cmdq_lock;
+	/* response list lock */
+	spinlock_t response_lock;
+	/* backlog list lock */
+	spinlock_t backlog_lock;
+
+	/* request submitted to chip, in progress */
+	struct list_head response_head;
+	/* hw queue full, hold in backlog list */
+	struct list_head backlog_head;
+
+	/* doorbell address */
+	u8 __iomem *dbell_csr_addr;
+	/* base address of the queue */
+	u8 *head;
+
+	struct nitrox_device *ndev;
+	/* flush pending backlog commands */
+	struct work_struct backlog_qflush;
+
+	/* requests posted waiting for completion */
+	atomic_t pending_count;
+	/* requests in backlog queues */
+	atomic_t backlog_count;
+
+	/* command size 32B/64B */
+	u8 instr_size;
+	u8 qno;
+	u32 qsize;
+
+	/* unaligned addresses */
+	u8 *head_unaligned;
+	dma_addr_t dma_unaligned;
+	/* dma address of the base */
+	dma_addr_t dma;
+};
+
+struct nitrox_hw {
+	/* firmware version */
+	char fw_name[VERSION_LEN];
+
+	u16 vendor_id;
+	u16 device_id;
+	u8 revision_id;
+
+	/* CNN55XX cores */
+	u8 se_cores;
+	u8 ae_cores;
+	u8 zip_cores;
+};
+
+#define MAX_MSIX_VECTOR_NAME	20
+/**
+ * vectors for queues (64 AE, 64 SE and 64 ZIP) and
+ * error condition/mailbox.
+ */
+#define MAX_MSIX_VECTORS	192
+
+struct nitrox_msix {
+	struct msix_entry *entries;
+	char **names;
+	DECLARE_BITMAP(irqs, MAX_MSIX_VECTORS);
+	u32 nr_entries;
+};
+
+struct bh_data {
+	/* slc port completion count address */
+	u8 __iomem *completion_cnt_csr_addr;
+
+	struct nitrox_cmdq *cmdq;
+	struct tasklet_struct resp_handler;
+};
+
+struct nitrox_bh {
+	struct bh_data *slc;
+};
+
+/* NITROX-5 driver state */
+#define NITROX_UCODE_LOADED	0
+#define NITROX_READY		1
+
+/* command queue size */
+#define DEFAULT_CMD_QLEN 2048
+/* command timeout in milliseconds */
+#define CMD_TIMEOUT 2000
+
+#define DEV(ndev) ((struct device *)(&(ndev)->pdev->dev))
+#define PF_MODE 0
+
+#define NITROX_CSR_ADDR(ndev, offset) \
+	((ndev)->bar_addr + (offset))
+
+/**
+ * struct nitrox_device - NITROX Device Information.
+ * @list: pointer to linked list of devices
+ * @bar_addr: iomap address
+ * @pdev: PCI device information
+ * @status: NITROX status
+ * @timeout: Request timeout in jiffies
+ * @refcnt: Device usage count
+ * @idx: device index (0..N)
+ * @node: NUMA node id attached
+ * @qlen: Command queue length
+ * @nr_queues: Number of command queues
+ * @ctx_pool: DMA pool for crypto context
+ * @pkt_cmdqs: SE Command queues
+ * @msix: MSI-X information
+ * @bh: post processing work
+ * @hw: hardware information
+ * @debugfs_dir: debugfs directory
+ */
+struct nitrox_device {
+	struct list_head list;
+
+	u8 __iomem *bar_addr;
+	struct pci_dev *pdev;
+
+	unsigned long status;
+	unsigned long timeout;
+	refcount_t refcnt;
+
+	u8 idx;
+	int node;
+	u16 qlen;
+	u16 nr_queues;
+
+	struct dma_pool *ctx_pool;
+	struct nitrox_cmdq *pkt_cmdqs;
+
+	struct nitrox_msix msix;
+	struct nitrox_bh bh;
+
+	struct nitrox_hw hw;
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+	struct dentry *debugfs_dir;
+#endif
+};
+
+/**
+ * nitrox_read_csr - Read from device register
+ * @ndev: NITROX device
+ * @offset: offset of the register to read
+ *
+ * Returns: value read
+ */
+static inline u64 nitrox_read_csr(struct nitrox_device *ndev, u64 offset)
+{
+	return readq(ndev->bar_addr + offset);
+}
+
+/**
+ * nitrox_write_csr - Write to device register
+ * @ndev: NITROX device
+ * @offset: offset of the register to write
+ * @value: value to write
+ */
+static inline void nitrox_write_csr(struct nitrox_device *ndev, u64 offset,
+				    u64 value)
+{
+	writeq(value, (ndev->bar_addr + offset));
+}
+
+static inline int nitrox_ready(struct nitrox_device *ndev)
+{
+	return test_bit(NITROX_READY, &ndev->status);
+}
+
+#endif /* __NITROX_DEV_H */
diff --git a/drivers/crypto/cavium/nitrox/nitrox_hal.c b/drivers/crypto/cavium/nitrox/nitrox_hal.c
new file mode 100644
index 0000000..f0655f8
--- /dev/null
+++ b/drivers/crypto/cavium/nitrox/nitrox_hal.c
@@ -0,0 +1,401 @@
+#include <linux/delay.h>
+
+#include "nitrox_dev.h"
+#include "nitrox_csr.h"
+
+/**
+ * emu_enable_cores - Enable EMU cluster cores.
+ * @ndev: N5 device
+ */
+static void emu_enable_cores(struct nitrox_device *ndev)
+{
+	union emu_se_enable emu_se;
+	union emu_ae_enable emu_ae;
+	int i;
+
+	/* AE cores 20 per cluster */
+	emu_ae.value = 0;
+	emu_ae.s.enable = 0xfffff;
+
+	/* SE cores 16 per cluster */
+	emu_se.value = 0;
+	emu_se.s.enable = 0xffff;
+
+	/* enable per cluster cores */
+	for (i = 0; i < NR_CLUSTERS; i++) {
+		nitrox_write_csr(ndev, EMU_AE_ENABLEX(i), emu_ae.value);
+		nitrox_write_csr(ndev, EMU_SE_ENABLEX(i), emu_se.value);
+	}
+}
+
+/**
+ * nitrox_config_emu_unit - configure EMU unit.
+ * @ndev: N5 device
+ */
+void nitrox_config_emu_unit(struct nitrox_device *ndev)
+{
+	union emu_wd_int_ena_w1s emu_wd_int;
+	union emu_ge_int_ena_w1s emu_ge_int;
+	u64 offset;
+	int i;
+
+	/* enable cores */
+	emu_enable_cores(ndev);
+
+	/* enable general error and watch dog interrupts */
+	emu_ge_int.value = 0;
+	emu_ge_int.s.se_ge = 0xffff;
+	emu_ge_int.s.ae_ge = 0xfffff;
+	emu_wd_int.value = 0;
+	emu_wd_int.s.se_wd = 1;
+
+	for (i = 0; i < NR_CLUSTERS; i++) {
+		offset = EMU_WD_INT_ENA_W1SX(i);
+		nitrox_write_csr(ndev, offset, emu_wd_int.value);
+		offset = EMU_GE_INT_ENA_W1SX(i);
+		nitrox_write_csr(ndev, offset, emu_ge_int.value);
+	}
+}
+
+static void reset_pkt_input_ring(struct nitrox_device *ndev, int ring)
+{
+	union nps_pkt_in_instr_ctl pkt_in_ctl;
+	union nps_pkt_in_instr_baoff_dbell pkt_in_dbell;
+	union nps_pkt_in_done_cnts pkt_in_cnts;
+	u64 offset;
+
+	offset = NPS_PKT_IN_INSTR_CTLX(ring);
+	/* disable the ring */
+	pkt_in_ctl.value = nitrox_read_csr(ndev, offset);
+	pkt_in_ctl.s.enb = 0;
+	nitrox_write_csr(ndev, offset, pkt_in_ctl.value);
+	usleep_range(100, 150);
+
+	/* wait to clear [ENB] */
+	do {
+		pkt_in_ctl.value = nitrox_read_csr(ndev, offset);
+	} while (pkt_in_ctl.s.enb);
+
+	/* clear off door bell counts */
+	offset = NPS_PKT_IN_INSTR_BAOFF_DBELLX(ring);
+	pkt_in_dbell.value = 0;
+	pkt_in_dbell.s.dbell = 0xffffffff;
+	nitrox_write_csr(ndev, offset, pkt_in_dbell.value);
+
+	/* clear done counts */
+	offset = NPS_PKT_IN_DONE_CNTSX(ring);
+	pkt_in_cnts.value = nitrox_read_csr(ndev, offset);
+	nitrox_write_csr(ndev, offset, pkt_in_cnts.value);
+	usleep_range(50, 100);
+}
+
+void enable_pkt_input_ring(struct nitrox_device *ndev, int ring)
+{
+	union nps_pkt_in_instr_ctl pkt_in_ctl;
+	u64 offset;
+
+	/* 64-byte instruction size */
+	offset = NPS_PKT_IN_INSTR_CTLX(ring);
+	pkt_in_ctl.value = nitrox_read_csr(ndev, offset);
+	pkt_in_ctl.s.is64b = 1;
+	pkt_in_ctl.s.enb = 1;
+	nitrox_write_csr(ndev, offset, pkt_in_ctl.value);
+
+	/* wait for set [ENB] */
+	do {
+		pkt_in_ctl.value = nitrox_read_csr(ndev, offset);
+	} while (!pkt_in_ctl.s.enb);
+}
+
+/**
+ * nitrox_config_pkt_input_rings - configure Packet Input Rings
+ * @ndev: N5 device
+ */
+void nitrox_config_pkt_input_rings(struct nitrox_device *ndev)
+{
+	int i;
+
+	for (i = 0; i < ndev->nr_queues; i++) {
+		struct nitrox_cmdq *cmdq = &ndev->pkt_cmdqs[i];
+		union nps_pkt_in_instr_rsize pkt_in_rsize;
+		u64 offset;
+
+		reset_pkt_input_ring(ndev, i);
+
+		/* configure ring base address 16-byte aligned,
+		 * size and interrupt threshold.
+		 */
+		offset = NPS_PKT_IN_INSTR_BADDRX(i);
+		nitrox_write_csr(ndev, NPS_PKT_IN_INSTR_BADDRX(i), cmdq->dma);
+
+		/* configure ring size */
+		offset = NPS_PKT_IN_INSTR_RSIZEX(i);
+		pkt_in_rsize.value = 0;
+		pkt_in_rsize.s.rsize = ndev->qlen;
+		nitrox_write_csr(ndev, offset, pkt_in_rsize.value);
+
+		/* set high threshold for pkt input ring interrupts */
+		offset = NPS_PKT_IN_INT_LEVELSX(i);
+		nitrox_write_csr(ndev, offset, 0xffffffff);
+
+		enable_pkt_input_ring(ndev, i);
+	}
+}
+
+static void reset_pkt_solicit_port(struct nitrox_device *ndev, int port)
+{
+	union nps_pkt_slc_ctl pkt_slc_ctl;
+	union nps_pkt_slc_cnts pkt_slc_cnts;
+	u64 offset;
+
+	/* disable slc port */
+	offset = NPS_PKT_SLC_CTLX(port);
+	pkt_slc_ctl.value = nitrox_read_csr(ndev, offset);
+	pkt_slc_ctl.s.enb = 0;
+	nitrox_write_csr(ndev, offset, pkt_slc_ctl.value);
+	usleep_range(100, 150);
+
+	/* wait to clear [ENB] */
+	do {
+		pkt_slc_ctl.value = nitrox_read_csr(ndev, offset);
+	} while (pkt_slc_ctl.s.enb);
+
+	/* clear slc counters */
+	offset = NPS_PKT_SLC_CNTSX(port);
+	pkt_slc_cnts.value = nitrox_read_csr(ndev, offset);
+	nitrox_write_csr(ndev, offset, pkt_slc_cnts.value);
+	usleep_range(50, 100);
+}
+
+void enable_pkt_solicit_port(struct nitrox_device *ndev, int port)
+{
+	union nps_pkt_slc_ctl pkt_slc_ctl;
+	u64 offset;
+
+	offset = NPS_PKT_SLC_CTLX(port);
+	pkt_slc_ctl.value = 0;
+	pkt_slc_ctl.s.enb = 1;
+
+	/*
+	 * 8 trailing 0x00 bytes will be added
+	 * to the end of the outgoing packet.
+	 */
+	pkt_slc_ctl.s.z = 1;
+	/* enable response header */
+	pkt_slc_ctl.s.rh = 1;
+	nitrox_write_csr(ndev, offset, pkt_slc_ctl.value);
+
+	/* wait to set [ENB] */
+	do {
+		pkt_slc_ctl.value = nitrox_read_csr(ndev, offset);
+	} while (!pkt_slc_ctl.s.enb);
+}
+
+static void config_single_pkt_solicit_port(struct nitrox_device *ndev,
+					   int port)
+{
+	union nps_pkt_slc_int_levels pkt_slc_int;
+	u64 offset;
+
+	reset_pkt_solicit_port(ndev, port);
+
+	offset = NPS_PKT_SLC_INT_LEVELSX(port);
+	pkt_slc_int.value = 0;
+	/* time interrupt threshold */
+	pkt_slc_int.s.timet = 0x3fffff;
+	nitrox_write_csr(ndev, offset, pkt_slc_int.value);
+
+	enable_pkt_solicit_port(ndev, port);
+}
+
+void nitrox_config_pkt_solicit_ports(struct nitrox_device *ndev)
+{
+	int i;
+
+	for (i = 0; i < ndev->nr_queues; i++)
+		config_single_pkt_solicit_port(ndev, i);
+}
+
+/**
+ * enable_nps_interrupts - enable NPS interrutps
+ * @ndev: N5 device.
+ *
+ * This includes NPS core, packet in and slc interrupts.
+ */
+static void enable_nps_interrupts(struct nitrox_device *ndev)
+{
+	union nps_core_int_ena_w1s core_int;
+
+	/* NPS core interrutps */
+	core_int.value = 0;
+	core_int.s.host_wr_err = 1;
+	core_int.s.host_wr_timeout = 1;
+	core_int.s.exec_wr_timeout = 1;
+	core_int.s.npco_dma_malform = 1;
+	core_int.s.host_nps_wr_err = 1;
+	nitrox_write_csr(ndev, NPS_CORE_INT_ENA_W1S, core_int.value);
+
+	/* NPS packet in ring interrupts */
+	nitrox_write_csr(ndev, NPS_PKT_IN_RERR_LO_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, NPS_PKT_IN_RERR_HI_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, NPS_PKT_IN_ERR_TYPE_ENA_W1S, (~0ULL));
+	/* NPS packet slc port interrupts */
+	nitrox_write_csr(ndev, NPS_PKT_SLC_RERR_HI_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, NPS_PKT_SLC_RERR_LO_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, NPS_PKT_SLC_ERR_TYPE_ENA_W1S, (~0uLL));
+}
+
+void nitrox_config_nps_unit(struct nitrox_device *ndev)
+{
+	union nps_core_gbl_vfcfg core_gbl_vfcfg;
+
+	/* endian control information */
+	nitrox_write_csr(ndev, NPS_CORE_CONTROL, 1ULL);
+
+	/* disable ILK interface */
+	core_gbl_vfcfg.value = 0;
+	core_gbl_vfcfg.s.ilk_disable = 1;
+	core_gbl_vfcfg.s.cfg = PF_MODE;
+	nitrox_write_csr(ndev, NPS_CORE_GBL_VFCFG, core_gbl_vfcfg.value);
+	/* config input and solicit ports */
+	nitrox_config_pkt_input_rings(ndev);
+	nitrox_config_pkt_solicit_ports(ndev);
+
+	/* enable interrupts */
+	enable_nps_interrupts(ndev);
+}
+
+void nitrox_config_pom_unit(struct nitrox_device *ndev)
+{
+	union pom_int_ena_w1s pom_int;
+	int i;
+
+	/* enable pom interrupts */
+	pom_int.value = 0;
+	pom_int.s.illegal_dport = 1;
+	nitrox_write_csr(ndev, POM_INT_ENA_W1S, pom_int.value);
+
+	/* enable perf counters */
+	for (i = 0; i < ndev->hw.se_cores; i++)
+		nitrox_write_csr(ndev, POM_PERF_CTL, BIT_ULL(i));
+}
+
+/**
+ * nitrox_config_rand_unit - enable N5 random number unit
+ * @ndev: N5 device
+ */
+void nitrox_config_rand_unit(struct nitrox_device *ndev)
+{
+	union efl_rnm_ctl_status efl_rnm_ctl;
+	u64 offset;
+
+	offset = EFL_RNM_CTL_STATUS;
+	efl_rnm_ctl.value = nitrox_read_csr(ndev, offset);
+	efl_rnm_ctl.s.ent_en = 1;
+	efl_rnm_ctl.s.rng_en = 1;
+	nitrox_write_csr(ndev, offset, efl_rnm_ctl.value);
+}
+
+void nitrox_config_efl_unit(struct nitrox_device *ndev)
+{
+	int i;
+
+	for (i = 0; i < NR_CLUSTERS; i++) {
+		union efl_core_int_ena_w1s efl_core_int;
+		u64 offset;
+
+		/* EFL core interrupts */
+		offset = EFL_CORE_INT_ENA_W1SX(i);
+		efl_core_int.value = 0;
+		efl_core_int.s.len_ovr = 1;
+		efl_core_int.s.d_left = 1;
+		efl_core_int.s.epci_decode_err = 1;
+		nitrox_write_csr(ndev, offset, efl_core_int.value);
+
+		offset = EFL_CORE_VF_ERR_INT0_ENA_W1SX(i);
+		nitrox_write_csr(ndev, offset, (~0ULL));
+		offset = EFL_CORE_VF_ERR_INT1_ENA_W1SX(i);
+		nitrox_write_csr(ndev, offset, (~0ULL));
+	}
+}
+
+void nitrox_config_bmi_unit(struct nitrox_device *ndev)
+{
+	union bmi_ctl bmi_ctl;
+	union bmi_int_ena_w1s bmi_int_ena;
+	u64 offset;
+
+	/* no threshold limits for PCIe */
+	offset = BMI_CTL;
+	bmi_ctl.value = nitrox_read_csr(ndev, offset);
+	bmi_ctl.s.max_pkt_len = 0xff;
+	bmi_ctl.s.nps_free_thrsh = 0xff;
+	bmi_ctl.s.nps_hdrq_thrsh = 0x7a;
+	nitrox_write_csr(ndev, offset, bmi_ctl.value);
+
+	/* enable interrupts */
+	offset = BMI_INT_ENA_W1S;
+	bmi_int_ena.value = 0;
+	bmi_int_ena.s.max_len_err_nps = 1;
+	bmi_int_ena.s.pkt_rcv_err_nps = 1;
+	bmi_int_ena.s.fpf_undrrn = 1;
+	nitrox_write_csr(ndev, offset, bmi_int_ena.value);
+}
+
+void nitrox_config_bmo_unit(struct nitrox_device *ndev)
+{
+	union bmo_ctl2 bmo_ctl2;
+	u64 offset;
+
+	/* no threshold limits for PCIe */
+	offset = BMO_CTL2;
+	bmo_ctl2.value = nitrox_read_csr(ndev, offset);
+	bmo_ctl2.s.nps_slc_buf_thrsh = 0xff;
+	nitrox_write_csr(ndev, offset, bmo_ctl2.value);
+}
+
+void invalidate_lbc(struct nitrox_device *ndev)
+{
+	union lbc_inval_ctl lbc_ctl;
+	union lbc_inval_status lbc_stat;
+	u64 offset;
+
+	/* invalidate LBC */
+	offset = LBC_INVAL_CTL;
+	lbc_ctl.value = nitrox_read_csr(ndev, offset);
+	lbc_ctl.s.cam_inval_start = 1;
+	nitrox_write_csr(ndev, offset, lbc_ctl.value);
+
+	offset = LBC_INVAL_STATUS;
+
+	do {
+		lbc_stat.value = nitrox_read_csr(ndev, offset);
+	} while (!lbc_stat.s.done);
+}
+
+void nitrox_config_lbc_unit(struct nitrox_device *ndev)
+{
+	union lbc_int_ena_w1s lbc_int_ena;
+	u64 offset;
+
+	invalidate_lbc(ndev);
+
+	/* enable interrupts */
+	offset = LBC_INT_ENA_W1S;
+	lbc_int_ena.value = 0;
+	lbc_int_ena.s.dma_rd_err = 1;
+	lbc_int_ena.s.over_fetch_err = 1;
+	lbc_int_ena.s.cam_inval_abort = 1;
+	lbc_int_ena.s.cam_hard_err = 1;
+	nitrox_write_csr(ndev, offset, lbc_int_ena.value);
+
+	offset = LBC_PLM_VF1_64_INT_ENA_W1S;
+	nitrox_write_csr(ndev, offset, (~0ULL));
+	offset = LBC_PLM_VF65_128_INT_ENA_W1S;
+	nitrox_write_csr(ndev, offset, (~0ULL));
+
+	offset = LBC_ELM_VF1_64_INT_ENA_W1S;
+	nitrox_write_csr(ndev, offset, (~0ULL));
+	offset = LBC_ELM_VF65_128_INT_ENA_W1S;
+	nitrox_write_csr(ndev, offset, (~0ULL));
+}
diff --git a/drivers/crypto/cavium/nitrox/nitrox_isr.c b/drivers/crypto/cavium/nitrox/nitrox_isr.c
new file mode 100644
index 0000000..71f9348
--- /dev/null
+++ b/drivers/crypto/cavium/nitrox/nitrox_isr.c
@@ -0,0 +1,467 @@
+#include <linux/pci.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+
+#include "nitrox_dev.h"
+#include "nitrox_csr.h"
+#include "nitrox_common.h"
+
+#define NR_RING_VECTORS 3
+#define NPS_CORE_INT_ACTIVE_ENTRY 192
+
+/**
+ * nps_pkt_slc_isr - IRQ handler for NPS solicit port
+ * @irq: irq number
+ * @data: argument
+ */
+static irqreturn_t nps_pkt_slc_isr(int irq, void *data)
+{
+	struct bh_data *slc = data;
+	union nps_pkt_slc_cnts pkt_slc_cnts;
+
+	pkt_slc_cnts.value = readq(slc->completion_cnt_csr_addr);
+	/* New packet on SLC output port */
+	if (pkt_slc_cnts.s.slc_int)
+		tasklet_hi_schedule(&slc->resp_handler);
+
+	return IRQ_HANDLED;
+}
+
+static void clear_nps_core_err_intr(struct nitrox_device *ndev)
+{
+	u64 value;
+
+	/* Write 1 to clear */
+	value = nitrox_read_csr(ndev, NPS_CORE_INT);
+	nitrox_write_csr(ndev, NPS_CORE_INT, value);
+
+	dev_err_ratelimited(DEV(ndev), "NSP_CORE_INT  0x%016llx\n", value);
+}
+
+static void clear_nps_pkt_err_intr(struct nitrox_device *ndev)
+{
+	union nps_pkt_int pkt_int;
+	unsigned long value, offset;
+	int i;
+
+	pkt_int.value = nitrox_read_csr(ndev, NPS_PKT_INT);
+	dev_err_ratelimited(DEV(ndev), "NPS_PKT_INT  0x%016llx\n",
+			    pkt_int.value);
+
+	if (pkt_int.s.slc_err) {
+		offset = NPS_PKT_SLC_ERR_TYPE;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+		dev_err_ratelimited(DEV(ndev),
+				    "NPS_PKT_SLC_ERR_TYPE  0x%016lx\n", value);
+
+		offset = NPS_PKT_SLC_RERR_LO;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+		/* enable the solicit ports */
+		for_each_set_bit(i, &value, BITS_PER_LONG)
+			enable_pkt_solicit_port(ndev, i);
+
+		dev_err_ratelimited(DEV(ndev),
+				    "NPS_PKT_SLC_RERR_LO  0x%016lx\n", value);
+
+		offset = NPS_PKT_SLC_RERR_HI;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+		dev_err_ratelimited(DEV(ndev),
+				    "NPS_PKT_SLC_RERR_HI  0x%016lx\n", value);
+	}
+
+	if (pkt_int.s.in_err) {
+		offset = NPS_PKT_IN_ERR_TYPE;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+		dev_err_ratelimited(DEV(ndev),
+				    "NPS_PKT_IN_ERR_TYPE  0x%016lx\n", value);
+		offset = NPS_PKT_IN_RERR_LO;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+		/* enable the input ring */
+		for_each_set_bit(i, &value, BITS_PER_LONG)
+			enable_pkt_input_ring(ndev, i);
+
+		dev_err_ratelimited(DEV(ndev),
+				    "NPS_PKT_IN_RERR_LO  0x%016lx\n", value);
+
+		offset = NPS_PKT_IN_RERR_HI;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+		dev_err_ratelimited(DEV(ndev),
+				    "NPS_PKT_IN_RERR_HI  0x%016lx\n", value);
+	}
+}
+
+static void clear_pom_err_intr(struct nitrox_device *ndev)
+{
+	u64 value;
+
+	value = nitrox_read_csr(ndev, POM_INT);
+	nitrox_write_csr(ndev, POM_INT, value);
+	dev_err_ratelimited(DEV(ndev), "POM_INT  0x%016llx\n", value);
+}
+
+static void clear_pem_err_intr(struct nitrox_device *ndev)
+{
+	u64 value;
+
+	value = nitrox_read_csr(ndev, PEM0_INT);
+	nitrox_write_csr(ndev, PEM0_INT, value);
+	dev_err_ratelimited(DEV(ndev), "PEM(0)_INT  0x%016llx\n", value);
+}
+
+static void clear_lbc_err_intr(struct nitrox_device *ndev)
+{
+	union lbc_int lbc_int;
+	u64 value, offset;
+	int i;
+
+	lbc_int.value = nitrox_read_csr(ndev, LBC_INT);
+	dev_err_ratelimited(DEV(ndev), "LBC_INT  0x%016llx\n", lbc_int.value);
+
+	if (lbc_int.s.dma_rd_err) {
+		for (i = 0; i < NR_CLUSTERS; i++) {
+			offset = EFL_CORE_VF_ERR_INT0X(i);
+			value = nitrox_read_csr(ndev, offset);
+			nitrox_write_csr(ndev, offset, value);
+			offset = EFL_CORE_VF_ERR_INT1X(i);
+			value = nitrox_read_csr(ndev, offset);
+			nitrox_write_csr(ndev, offset, value);
+		}
+	}
+
+	if (lbc_int.s.cam_soft_err) {
+		dev_err_ratelimited(DEV(ndev), "CAM_SOFT_ERR, invalidating LBC\n");
+		invalidate_lbc(ndev);
+	}
+
+	if (lbc_int.s.pref_dat_len_mismatch_err) {
+		offset = LBC_PLM_VF1_64_INT;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+		offset = LBC_PLM_VF65_128_INT;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+	}
+
+	if (lbc_int.s.rd_dat_len_mismatch_err) {
+		offset = LBC_ELM_VF1_64_INT;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+		offset = LBC_ELM_VF65_128_INT;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+	}
+	nitrox_write_csr(ndev, LBC_INT, lbc_int.value);
+}
+
+static void clear_efl_err_intr(struct nitrox_device *ndev)
+{
+	int i;
+
+	for (i = 0; i < NR_CLUSTERS; i++) {
+		union efl_core_int core_int;
+		u64 value, offset;
+
+		offset = EFL_CORE_INTX(i);
+		core_int.value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, core_int.value);
+		dev_err_ratelimited(DEV(ndev), "ELF_CORE(%d)_INT  0x%016llx\n",
+				    i, core_int.value);
+		if (core_int.s.se_err) {
+			offset = EFL_CORE_SE_ERR_INTX(i);
+			value = nitrox_read_csr(ndev, offset);
+			nitrox_write_csr(ndev, offset, value);
+		}
+	}
+}
+
+static void clear_bmi_err_intr(struct nitrox_device *ndev)
+{
+	u64 value;
+
+	value = nitrox_read_csr(ndev, BMI_INT);
+	nitrox_write_csr(ndev, BMI_INT, value);
+	dev_err_ratelimited(DEV(ndev), "BMI_INT  0x%016llx\n", value);
+}
+
+/**
+ * clear_nps_core_int_active - clear NPS_CORE_INT_ACTIVE interrupts
+ * @ndev: NITROX device
+ */
+static void clear_nps_core_int_active(struct nitrox_device *ndev)
+{
+	union nps_core_int_active core_int_active;
+
+	core_int_active.value = nitrox_read_csr(ndev, NPS_CORE_INT_ACTIVE);
+
+	if (core_int_active.s.nps_core)
+		clear_nps_core_err_intr(ndev);
+
+	if (core_int_active.s.nps_pkt)
+		clear_nps_pkt_err_intr(ndev);
+
+	if (core_int_active.s.pom)
+		clear_pom_err_intr(ndev);
+
+	if (core_int_active.s.pem)
+		clear_pem_err_intr(ndev);
+
+	if (core_int_active.s.lbc)
+		clear_lbc_err_intr(ndev);
+
+	if (core_int_active.s.efl)
+		clear_efl_err_intr(ndev);
+
+	if (core_int_active.s.bmi)
+		clear_bmi_err_intr(ndev);
+
+	/* If more work callback the ISR, set resend */
+	core_int_active.s.resend = 1;
+	nitrox_write_csr(ndev, NPS_CORE_INT_ACTIVE, core_int_active.value);
+}
+
+static irqreturn_t nps_core_int_isr(int irq, void *data)
+{
+	struct nitrox_device *ndev = data;
+
+	clear_nps_core_int_active(ndev);
+
+	return IRQ_HANDLED;
+}
+
+static int nitrox_enable_msix(struct nitrox_device *ndev)
+{
+	struct msix_entry *entries;
+	char **names;
+	int i, nr_entries, ret;
+
+	/*
+	 * PF MSI-X vectors
+	 *
+	 * Entry 0: NPS PKT ring 0
+	 * Entry 1: AQMQ ring 0
+	 * Entry 2: ZQM ring 0
+	 * Entry 3: NPS PKT ring 1
+	 * Entry 4: AQMQ ring 1
+	 * Entry 5: ZQM ring 1
+	 * ....
+	 * Entry 192: NPS_CORE_INT_ACTIVE
+	 */
+	nr_entries = (ndev->nr_queues * NR_RING_VECTORS) + 1;
+	entries = kzalloc_node(nr_entries * sizeof(struct msix_entry),
+			       GFP_KERNEL, ndev->node);
+	if (!entries)
+		return -ENOMEM;
+
+	names = kcalloc(nr_entries, sizeof(char *), GFP_KERNEL);
+	if (!names) {
+		kfree(entries);
+		return -ENOMEM;
+	}
+
+	/* fill entires */
+	for (i = 0; i < (nr_entries - 1); i++)
+		entries[i].entry = i;
+
+	entries[i].entry = NPS_CORE_INT_ACTIVE_ENTRY;
+
+	for (i = 0; i < nr_entries; i++) {
+		*(names + i) = kzalloc(MAX_MSIX_VECTOR_NAME, GFP_KERNEL);
+		if (!(*(names + i))) {
+			ret = -ENOMEM;
+			goto msix_fail;
+		}
+	}
+	ndev->msix.entries = entries;
+	ndev->msix.names = names;
+	ndev->msix.nr_entries = nr_entries;
+
+	ret = pci_enable_msix_exact(ndev->pdev, ndev->msix.entries,
+				    ndev->msix.nr_entries);
+	if (ret) {
+		dev_err(&ndev->pdev->dev, "Failed to enable MSI-X IRQ(s) %d\n",
+			ret);
+		goto msix_fail;
+	}
+	return 0;
+
+msix_fail:
+	for (i = 0; i < nr_entries; i++)
+		kfree(*(names + i));
+
+	kfree(entries);
+	kfree(names);
+	return ret;
+}
+
+static void nitrox_cleanup_pkt_slc_bh(struct nitrox_device *ndev)
+{
+	int i;
+
+	if (!ndev->bh.slc)
+		return;
+
+	for (i = 0; i < ndev->nr_queues; i++) {
+		struct bh_data *bh = &ndev->bh.slc[i];
+
+		tasklet_disable(&bh->resp_handler);
+		tasklet_kill(&bh->resp_handler);
+	}
+	kfree(ndev->bh.slc);
+	ndev->bh.slc = NULL;
+}
+
+static int nitrox_setup_pkt_slc_bh(struct nitrox_device *ndev)
+{
+	u32 size;
+	int i;
+
+	size = ndev->nr_queues * sizeof(struct bh_data);
+	ndev->bh.slc = kzalloc(size, GFP_KERNEL);
+	if (!ndev->bh.slc)
+		return -ENOMEM;
+
+	for (i = 0; i < ndev->nr_queues; i++) {
+		struct bh_data *bh = &ndev->bh.slc[i];
+		u64 offset;
+
+		offset = NPS_PKT_SLC_CNTSX(i);
+		/* pre calculate completion count address */
+		bh->completion_cnt_csr_addr = NITROX_CSR_ADDR(ndev, offset);
+		bh->cmdq = &ndev->pkt_cmdqs[i];
+
+		tasklet_init(&bh->resp_handler, pkt_slc_resp_handler,
+			     (unsigned long)bh);
+	}
+
+	return 0;
+}
+
+static int nitrox_request_irqs(struct nitrox_device *ndev)
+{
+	struct pci_dev *pdev = ndev->pdev;
+	struct msix_entry *msix_ent = ndev->msix.entries;
+	int nr_ring_vectors, i = 0, ring, cpu, ret;
+	char *name;
+
+	/*
+	 * PF MSI-X vectors
+	 *
+	 * Entry 0: NPS PKT ring 0
+	 * Entry 1: AQMQ ring 0
+	 * Entry 2: ZQM ring 0
+	 * Entry 3: NPS PKT ring 1
+	 * ....
+	 * Entry 192: NPS_CORE_INT_ACTIVE
+	 */
+	nr_ring_vectors = ndev->nr_queues * NR_RING_VECTORS;
+
+	/* request irq for pkt ring/ports only */
+	while (i < nr_ring_vectors) {
+		name = *(ndev->msix.names + i);
+		ring = (i / NR_RING_VECTORS);
+		snprintf(name, MAX_MSIX_VECTOR_NAME, "n5(%d)-slc-ring%d",
+			 ndev->idx, ring);
+
+		ret = request_irq(msix_ent[i].vector, nps_pkt_slc_isr, 0,
+				  name, &ndev->bh.slc[ring]);
+		if (ret) {
+			dev_err(&pdev->dev, "failed to get irq %d for %s\n",
+				msix_ent[i].vector, name);
+			return ret;
+		}
+		cpu = ring % num_online_cpus();
+		irq_set_affinity_hint(msix_ent[i].vector, get_cpu_mask(cpu));
+
+		set_bit(i, ndev->msix.irqs);
+		i += NR_RING_VECTORS;
+	}
+
+	/* Request IRQ for NPS_CORE_INT_ACTIVE */
+	name = *(ndev->msix.names + i);
+	snprintf(name, MAX_MSIX_VECTOR_NAME, "n5(%d)-nps-core-int", ndev->idx);
+	ret = request_irq(msix_ent[i].vector, nps_core_int_isr, 0, name, ndev);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to get irq %d for %s\n",
+			msix_ent[i].vector, name);
+		return ret;
+	}
+	set_bit(i, ndev->msix.irqs);
+
+	return 0;
+}
+
+static void nitrox_disable_msix(struct nitrox_device *ndev)
+{
+	struct msix_entry *msix_ent = ndev->msix.entries;
+	char **names = ndev->msix.names;
+	int i = 0, ring, nr_ring_vectors;
+
+	nr_ring_vectors = ndev->msix.nr_entries - 1;
+
+	/* clear pkt ring irqs */
+	while (i < nr_ring_vectors) {
+		if (test_and_clear_bit(i, ndev->msix.irqs)) {
+			ring = (i / NR_RING_VECTORS);
+			irq_set_affinity_hint(msix_ent[i].vector, NULL);
+			free_irq(msix_ent[i].vector, &ndev->bh.slc[ring]);
+		}
+		i += NR_RING_VECTORS;
+	}
+	irq_set_affinity_hint(msix_ent[i].vector, NULL);
+	free_irq(msix_ent[i].vector, ndev);
+	clear_bit(i, ndev->msix.irqs);
+
+	kfree(ndev->msix.entries);
+	for (i = 0; i < ndev->msix.nr_entries; i++)
+		kfree(*(names + i));
+
+	kfree(names);
+	pci_disable_msix(ndev->pdev);
+}
+
+/**
+ * nitrox_pf_cleanup_isr: Cleanup PF MSI-X and IRQ
+ * @ndev: NITROX device
+ */
+void nitrox_pf_cleanup_isr(struct nitrox_device *ndev)
+{
+	nitrox_disable_msix(ndev);
+	nitrox_cleanup_pkt_slc_bh(ndev);
+}
+
+/**
+ * nitrox_init_isr - Initialize PF MSI-X vectors and IRQ
+ * @ndev: NITROX device
+ *
+ * Return: 0 on success, a negative value on failure.
+ */
+int nitrox_pf_init_isr(struct nitrox_device *ndev)
+{
+	int err;
+
+	err = nitrox_setup_pkt_slc_bh(ndev);
+	if (err)
+		return err;
+
+	err = nitrox_enable_msix(ndev);
+	if (err)
+		goto msix_fail;
+
+	err = nitrox_request_irqs(ndev);
+	if (err)
+		goto irq_fail;
+
+	return 0;
+
+irq_fail:
+	nitrox_disable_msix(ndev);
+msix_fail:
+	nitrox_cleanup_pkt_slc_bh(ndev);
+	return err;
+}
diff --git a/drivers/crypto/cavium/nitrox/nitrox_lib.c b/drivers/crypto/cavium/nitrox/nitrox_lib.c
new file mode 100644
index 0000000..b4a391a
--- /dev/null
+++ b/drivers/crypto/cavium/nitrox/nitrox_lib.c
@@ -0,0 +1,210 @@
+#include <linux/cpumask.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/delay.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci_regs.h>
+#include <linux/vmalloc.h>
+#include <linux/pci.h>
+
+#include "nitrox_dev.h"
+#include "nitrox_common.h"
+#include "nitrox_req.h"
+#include "nitrox_csr.h"
+
+#define CRYPTO_CTX_SIZE	256
+
+/* command queue alignments */
+#define PKT_IN_ALIGN	16
+
+static int cmdq_common_init(struct nitrox_cmdq *cmdq)
+{
+	struct nitrox_device *ndev = cmdq->ndev;
+	u32 qsize;
+
+	qsize = (ndev->qlen) * cmdq->instr_size;
+	cmdq->head_unaligned = dma_zalloc_coherent(DEV(ndev),
+						   (qsize + PKT_IN_ALIGN),
+						   &cmdq->dma_unaligned,
+						   GFP_KERNEL);
+	if (!cmdq->head_unaligned)
+		return -ENOMEM;
+
+	cmdq->head = PTR_ALIGN(cmdq->head_unaligned, PKT_IN_ALIGN);
+	cmdq->dma = PTR_ALIGN(cmdq->dma_unaligned, PKT_IN_ALIGN);
+	cmdq->qsize = (qsize + PKT_IN_ALIGN);
+
+	spin_lock_init(&cmdq->response_lock);
+	spin_lock_init(&cmdq->cmdq_lock);
+	spin_lock_init(&cmdq->backlog_lock);
+
+	INIT_LIST_HEAD(&cmdq->response_head);
+	INIT_LIST_HEAD(&cmdq->backlog_head);
+	INIT_WORK(&cmdq->backlog_qflush, backlog_qflush_work);
+
+	atomic_set(&cmdq->pending_count, 0);
+	atomic_set(&cmdq->backlog_count, 0);
+	return 0;
+}
+
+static void cmdq_common_cleanup(struct nitrox_cmdq *cmdq)
+{
+	struct nitrox_device *ndev = cmdq->ndev;
+
+	cancel_work_sync(&cmdq->backlog_qflush);
+
+	dma_free_coherent(DEV(ndev), cmdq->qsize,
+			  cmdq->head_unaligned, cmdq->dma_unaligned);
+
+	atomic_set(&cmdq->pending_count, 0);
+	atomic_set(&cmdq->backlog_count, 0);
+
+	cmdq->dbell_csr_addr = NULL;
+	cmdq->head = NULL;
+	cmdq->dma = 0;
+	cmdq->qsize = 0;
+	cmdq->instr_size = 0;
+}
+
+static void nitrox_cleanup_pkt_cmdqs(struct nitrox_device *ndev)
+{
+	int i;
+
+	for (i = 0; i < ndev->nr_queues; i++) {
+		struct nitrox_cmdq *cmdq = &ndev->pkt_cmdqs[i];
+
+		cmdq_common_cleanup(cmdq);
+	}
+	kfree(ndev->pkt_cmdqs);
+	ndev->pkt_cmdqs = NULL;
+}
+
+static int nitrox_init_pkt_cmdqs(struct nitrox_device *ndev)
+{
+	int i, err, size;
+
+	size = ndev->nr_queues * sizeof(struct nitrox_cmdq);
+	ndev->pkt_cmdqs = kzalloc(size, GFP_KERNEL);
+	if (!ndev->pkt_cmdqs)
+		return -ENOMEM;
+
+	for (i = 0; i < ndev->nr_queues; i++) {
+		struct nitrox_cmdq *cmdq;
+		u64 offset;
+
+		cmdq = &ndev->pkt_cmdqs[i];
+		cmdq->ndev = ndev;
+		cmdq->qno = i;
+		cmdq->instr_size = sizeof(struct nps_pkt_instr);
+
+		offset = NPS_PKT_IN_INSTR_BAOFF_DBELLX(i);
+		/* SE ring doorbell address for this queue */
+		cmdq->dbell_csr_addr = NITROX_CSR_ADDR(ndev, offset);
+
+		err = cmdq_common_init(cmdq);
+		if (err)
+			goto pkt_cmdq_fail;
+	}
+	return 0;
+
+pkt_cmdq_fail:
+	nitrox_cleanup_pkt_cmdqs(ndev);
+	return err;
+}
+
+static int create_crypto_dma_pool(struct nitrox_device *ndev)
+{
+	size_t size;
+
+	/* Crypto context pool, 16 byte aligned */
+	size = CRYPTO_CTX_SIZE + sizeof(struct ctx_hdr);
+	ndev->ctx_pool = dma_pool_create("crypto-context",
+					 DEV(ndev), size, 16, 0);
+	if (!ndev->ctx_pool)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void destroy_crypto_dma_pool(struct nitrox_device *ndev)
+{
+	if (!ndev->ctx_pool)
+		return;
+
+	dma_pool_destroy(ndev->ctx_pool);
+	ndev->ctx_pool = NULL;
+}
+
+/*
+ * crypto_alloc_context - Allocate crypto context from pool
+ * @ndev: NITROX Device
+ */
+void *crypto_alloc_context(struct nitrox_device *ndev)
+{
+	struct ctx_hdr *ctx;
+	void *vaddr;
+	dma_addr_t dma;
+
+	vaddr = dma_pool_alloc(ndev->ctx_pool, (GFP_ATOMIC | __GFP_ZERO), &dma);
+	if (!vaddr)
+		return NULL;
+
+	/* fill meta data */
+	ctx = vaddr;
+	ctx->pool = ndev->ctx_pool;
+	ctx->dma = dma;
+	ctx->ctx_dma = dma + sizeof(struct ctx_hdr);
+
+	return ((u8 *)vaddr + sizeof(struct ctx_hdr));
+}
+
+/**
+ * crypto_free_context - Free crypto context to pool
+ * @ctx: context to free
+ */
+void crypto_free_context(void *ctx)
+{
+	struct ctx_hdr *ctxp;
+
+	if (!ctx)
+		return;
+
+	ctxp = (struct ctx_hdr *)((u8 *)ctx - sizeof(struct ctx_hdr));
+	dma_pool_free(ctxp->pool, ctxp, ctxp->dma);
+}
+
+/**
+ * nitrox_common_sw_init - allocate software resources.
+ * @ndev: NITROX device
+ *
+ * Allocates crypto context pools and command queues etc.
+ *
+ * Return: 0 on success, or a negative error code on error.
+ */
+int nitrox_common_sw_init(struct nitrox_device *ndev)
+{
+	int err = 0;
+
+	/* per device crypto context pool */
+	err = create_crypto_dma_pool(ndev);
+	if (err)
+		return err;
+
+	err = nitrox_init_pkt_cmdqs(ndev);
+	if (err)
+		destroy_crypto_dma_pool(ndev);
+
+	return err;
+}
+
+/**
+ * nitrox_common_sw_cleanup - free software resources.
+ * @ndev: NITROX device
+ */
+void nitrox_common_sw_cleanup(struct nitrox_device *ndev)
+{
+	nitrox_cleanup_pkt_cmdqs(ndev);
+	destroy_crypto_dma_pool(ndev);
+}
diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c
new file mode 100644
index 0000000..ae44a46
--- /dev/null
+++ b/drivers/crypto/cavium/nitrox/nitrox_main.c
@@ -0,0 +1,640 @@
+#include <linux/aer.h>
+#include <linux/delay.h>
+#include <linux/debugfs.h>
+#include <linux/firmware.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+
+#include "nitrox_dev.h"
+#include "nitrox_common.h"
+#include "nitrox_csr.h"
+
+#define CNN55XX_DEV_ID	0x12
+#define MAX_PF_QUEUES	64
+#define UCODE_HLEN 48
+#define SE_GROUP 0
+
+#define DRIVER_VERSION "1.0"
+/* SE microcode */
+#define SE_FW	"cnn55xx_se.fw"
+
+static const char nitrox_driver_name[] = "CNN55XX";
+
+static LIST_HEAD(ndevlist);
+static DEFINE_MUTEX(devlist_lock);
+static unsigned int num_devices;
+
+/**
+ * nitrox_pci_tbl - PCI Device ID Table
+ */
+static const struct pci_device_id nitrox_pci_tbl[] = {
+	{PCI_VDEVICE(CAVIUM, CNN55XX_DEV_ID), 0},
+	/* required last entry */
+	{0, }
+};
+MODULE_DEVICE_TABLE(pci, nitrox_pci_tbl);
+
+static unsigned int qlen = DEFAULT_CMD_QLEN;
+module_param(qlen, uint, 0644);
+MODULE_PARM_DESC(qlen, "Command queue length - default 2048");
+
+/**
+ * struct ucode - Firmware Header
+ * @id: microcode ID
+ * @version: firmware version
+ * @code_size: code section size
+ * @raz: alignment
+ * @code: code section
+ */
+struct ucode {
+	u8 id;
+	char version[VERSION_LEN - 1];
+	__be32 code_size;
+	u8 raz[12];
+	u64 code[0];
+};
+
+/**
+ * write_to_ucd_unit - Write Firmware to NITROX UCD unit
+ */
+static void write_to_ucd_unit(struct nitrox_device *ndev,
+			      struct ucode *ucode)
+{
+	u32 code_size = be32_to_cpu(ucode->code_size) * 2;
+	u64 offset, data;
+	int i = 0;
+
+	/*
+	 * UCD structure
+	 *
+	 *  -------------
+	 *  |    BLK 7  |
+	 *  -------------
+	 *  |    BLK 6  |
+	 *  -------------
+	 *  |    ...    |
+	 *  -------------
+	 *  |    BLK 0  |
+	 *  -------------
+	 *  Total of 8 blocks, each size 32KB
+	 */
+
+	/* set the block number */
+	offset = UCD_UCODE_LOAD_BLOCK_NUM;
+	nitrox_write_csr(ndev, offset, 0);
+
+	code_size = roundup(code_size, 8);
+	while (code_size) {
+		data = ucode->code[i];
+		/* write 8 bytes at a time */
+		offset = UCD_UCODE_LOAD_IDX_DATAX(i);
+		nitrox_write_csr(ndev, offset, data);
+		code_size -= 8;
+		i++;
+	}
+
+	/* put all SE cores in group 0 */
+	offset = POM_GRP_EXECMASKX(SE_GROUP);
+	nitrox_write_csr(ndev, offset, (~0ULL));
+
+	for (i = 0; i < ndev->hw.se_cores; i++) {
+		/*
+		 * write block number and firware length
+		 * bit:<2:0> block number
+		 * bit:3 is set SE uses 32KB microcode
+		 * bit:3 is clear SE uses 64KB microcode
+		 */
+		offset = UCD_SE_EID_UCODE_BLOCK_NUMX(i);
+		nitrox_write_csr(ndev, offset, 0x8);
+	}
+	usleep_range(300, 400);
+}
+
+static int nitrox_load_fw(struct nitrox_device *ndev, const char *fw_name)
+{
+	const struct firmware *fw;
+	struct ucode *ucode;
+	int ret;
+
+	dev_info(DEV(ndev), "Loading firmware \"%s\"\n", fw_name);
+
+	ret = request_firmware(&fw, fw_name, DEV(ndev));
+	if (ret < 0) {
+		dev_err(DEV(ndev), "failed to get firmware %s\n", fw_name);
+		return ret;
+	}
+
+	ucode = (struct ucode *)fw->data;
+	/* copy the firmware version */
+	memcpy(ndev->hw.fw_name, ucode->version, (VERSION_LEN - 2));
+	ndev->hw.fw_name[VERSION_LEN - 1] = '\0';
+
+	write_to_ucd_unit(ndev, ucode);
+	release_firmware(fw);
+
+	set_bit(NITROX_UCODE_LOADED, &ndev->status);
+	/* barrier to sync with other cpus */
+	smp_mb__after_atomic();
+	return 0;
+}
+
+/**
+ * nitrox_add_to_devlist - add NITROX device to global device list
+ * @ndev: NITROX device
+ */
+static int nitrox_add_to_devlist(struct nitrox_device *ndev)
+{
+	struct nitrox_device *dev;
+	int ret = 0;
+
+	INIT_LIST_HEAD(&ndev->list);
+	refcount_set(&ndev->refcnt, 1);
+
+	mutex_lock(&devlist_lock);
+	list_for_each_entry(dev, &ndevlist, list) {
+		if (dev == ndev) {
+			ret = -EEXIST;
+			goto unlock;
+		}
+	}
+	ndev->idx = num_devices++;
+	list_add_tail(&ndev->list, &ndevlist);
+unlock:
+	mutex_unlock(&devlist_lock);
+	return ret;
+}
+
+/**
+ * nitrox_remove_from_devlist - remove NITROX device from
+ *   global device list
+ * @ndev: NITROX device
+ */
+static void nitrox_remove_from_devlist(struct nitrox_device *ndev)
+{
+	mutex_lock(&devlist_lock);
+	list_del(&ndev->list);
+	num_devices--;
+	mutex_unlock(&devlist_lock);
+}
+
+struct nitrox_device *nitrox_get_first_device(void)
+{
+	struct nitrox_device *ndev = NULL;
+
+	mutex_lock(&devlist_lock);
+	list_for_each_entry(ndev, &ndevlist, list) {
+		if (nitrox_ready(ndev))
+			break;
+	}
+	mutex_unlock(&devlist_lock);
+	if (!ndev)
+		return NULL;
+
+	refcount_inc(&ndev->refcnt);
+	/* barrier to sync with other cpus */
+	smp_mb__after_atomic();
+	return ndev;
+}
+
+void nitrox_put_device(struct nitrox_device *ndev)
+{
+	if (!ndev)
+		return;
+
+	refcount_dec(&ndev->refcnt);
+	/* barrier to sync with other cpus */
+	smp_mb__after_atomic();
+}
+
+static int nitrox_reset_device(struct pci_dev *pdev)
+{
+	int pos = 0;
+
+	pos = pci_save_state(pdev);
+	if (pos) {
+		dev_err(&pdev->dev, "Failed to save pci state\n");
+		return -ENOMEM;
+	}
+
+	pos = pci_pcie_cap(pdev);
+	if (!pos)
+		return -ENOTTY;
+
+	if (!pci_wait_for_pending_transaction(pdev))
+		dev_err(&pdev->dev, "waiting for pending transaction\n");
+
+	pcie_capability_set_word(pdev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
+	msleep(100);
+	pci_restore_state(pdev);
+
+	return 0;
+}
+
+static int nitrox_pf_sw_init(struct nitrox_device *ndev)
+{
+	int err;
+
+	err = nitrox_common_sw_init(ndev);
+	if (err)
+		return err;
+
+	err = nitrox_pf_init_isr(ndev);
+	if (err)
+		nitrox_common_sw_cleanup(ndev);
+
+	return err;
+}
+
+static void nitrox_pf_sw_cleanup(struct nitrox_device *ndev)
+{
+	nitrox_pf_cleanup_isr(ndev);
+	nitrox_common_sw_cleanup(ndev);
+}
+
+/**
+ * nitrox_bist_check - Check NITORX BIST registers status
+ * @ndev: NITROX device
+ */
+static int nitrox_bist_check(struct nitrox_device *ndev)
+{
+	u64 value = 0;
+	int i;
+
+	for (i = 0; i < NR_CLUSTERS; i++) {
+		value += nitrox_read_csr(ndev, EMU_BIST_STATUSX(i));
+		value += nitrox_read_csr(ndev, EFL_CORE_BIST_REGX(i));
+	}
+	value += nitrox_read_csr(ndev, UCD_BIST_STATUS);
+	value += nitrox_read_csr(ndev, NPS_CORE_BIST_REG);
+	value += nitrox_read_csr(ndev, NPS_CORE_NPC_BIST_REG);
+	value += nitrox_read_csr(ndev, NPS_PKT_SLC_BIST_REG);
+	value += nitrox_read_csr(ndev, NPS_PKT_IN_BIST_REG);
+	value += nitrox_read_csr(ndev, POM_BIST_REG);
+	value += nitrox_read_csr(ndev, BMI_BIST_REG);
+	value += nitrox_read_csr(ndev, EFL_TOP_BIST_STAT);
+	value += nitrox_read_csr(ndev, BMO_BIST_REG);
+	value += nitrox_read_csr(ndev, LBC_BIST_STATUS);
+	value += nitrox_read_csr(ndev, PEM_BIST_STATUSX(0));
+	if (value)
+		return -EIO;
+	return 0;
+}
+
+static void nitrox_get_hwinfo(struct nitrox_device *ndev)
+{
+	union emu_fuse_map emu_fuse;
+	u64 offset;
+	int i;
+
+	for (i = 0; i < NR_CLUSTERS; i++) {
+		u8 dead_cores;
+
+		offset = EMU_FUSE_MAPX(i);
+		emu_fuse.value = nitrox_read_csr(ndev, offset);
+		if (emu_fuse.s.valid) {
+			dead_cores = hweight32(emu_fuse.s.ae_fuse);
+			ndev->hw.ae_cores += AE_CORES_PER_CLUSTER - dead_cores;
+			dead_cores = hweight16(emu_fuse.s.se_fuse);
+			ndev->hw.se_cores += SE_CORES_PER_CLUSTER - dead_cores;
+		}
+	}
+}
+
+static int nitrox_pf_hw_init(struct nitrox_device *ndev)
+{
+	int err;
+
+	err = nitrox_bist_check(ndev);
+	if (err) {
+		dev_err(&ndev->pdev->dev, "BIST check failed\n");
+		return err;
+	}
+	/* get cores information */
+	nitrox_get_hwinfo(ndev);
+
+	nitrox_config_nps_unit(ndev);
+	nitrox_config_pom_unit(ndev);
+	nitrox_config_efl_unit(ndev);
+	/* configure IO units */
+	nitrox_config_bmi_unit(ndev);
+	nitrox_config_bmo_unit(ndev);
+	/* configure Local Buffer Cache */
+	nitrox_config_lbc_unit(ndev);
+	nitrox_config_rand_unit(ndev);
+
+	/* load firmware on SE cores */
+	err = nitrox_load_fw(ndev, SE_FW);
+	if (err)
+		return err;
+
+	nitrox_config_emu_unit(ndev);
+
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+static int registers_show(struct seq_file *s, void *v)
+{
+	struct nitrox_device *ndev = s->private;
+	u64 offset;
+
+	/* NPS DMA stats */
+	offset = NPS_STATS_PKT_DMA_RD_CNT;
+	seq_printf(s, "NPS_STATS_PKT_DMA_RD_CNT  0x%016llx\n",
+		   nitrox_read_csr(ndev, offset));
+	offset = NPS_STATS_PKT_DMA_WR_CNT;
+	seq_printf(s, "NPS_STATS_PKT_DMA_WR_CNT  0x%016llx\n",
+		   nitrox_read_csr(ndev, offset));
+
+	/* BMI/BMO stats */
+	offset = BMI_NPS_PKT_CNT;
+	seq_printf(s, "BMI_NPS_PKT_CNT  0x%016llx\n",
+		   nitrox_read_csr(ndev, offset));
+	offset = BMO_NPS_SLC_PKT_CNT;
+	seq_printf(s, "BMO_NPS_PKT_CNT  0x%016llx\n",
+		   nitrox_read_csr(ndev, offset));
+
+	return 0;
+}
+
+static int registers_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, registers_show, inode->i_private);
+}
+
+static const struct file_operations register_fops = {
+	.owner = THIS_MODULE,
+	.open = registers_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int firmware_show(struct seq_file *s, void *v)
+{
+	struct nitrox_device *ndev = s->private;
+
+	seq_printf(s, "Version: %s\n", ndev->hw.fw_name);
+	return 0;
+}
+
+static int firmware_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, firmware_show, inode->i_private);
+}
+
+static const struct file_operations firmware_fops = {
+	.owner = THIS_MODULE,
+	.open = firmware_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int nitrox_show(struct seq_file *s, void *v)
+{
+	struct nitrox_device *ndev = s->private;
+
+	seq_printf(s, "NITROX-5 [idx: %d]\n", ndev->idx);
+	seq_printf(s, "  Revision ID: 0x%0x\n", ndev->hw.revision_id);
+	seq_printf(s, "  Cores [AE: %u  SE: %u]\n",
+		   ndev->hw.ae_cores, ndev->hw.se_cores);
+	seq_printf(s, "  Number of Queues: %u\n", ndev->nr_queues);
+	seq_printf(s, "  Queue length: %u\n", ndev->qlen);
+	seq_printf(s, "  Node: %u\n", ndev->node);
+
+	return 0;
+}
+
+static int nitrox_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, nitrox_show, inode->i_private);
+}
+
+static const struct file_operations nitrox_fops = {
+	.owner = THIS_MODULE,
+	.open = nitrox_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void nitrox_debugfs_exit(struct nitrox_device *ndev)
+{
+	debugfs_remove_recursive(ndev->debugfs_dir);
+	ndev->debugfs_dir = NULL;
+}
+
+static int nitrox_debugfs_init(struct nitrox_device *ndev)
+{
+	struct dentry *dir, *f;
+
+	dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	if (!dir)
+		return -ENOMEM;
+
+	ndev->debugfs_dir = dir;
+	f = debugfs_create_file("counters", 0400, dir, ndev, &register_fops);
+	if (!f)
+		goto err;
+	f = debugfs_create_file("firmware", 0400, dir, ndev, &firmware_fops);
+	if (!f)
+		goto err;
+	f = debugfs_create_file("nitrox", 0400, dir, ndev, &nitrox_fops);
+	if (!f)
+		goto err;
+
+	return 0;
+
+err:
+	nitrox_debugfs_exit(ndev);
+	return -ENODEV;
+}
+#else
+static int nitrox_debugfs_init(struct nitrox_device *ndev)
+{
+	return 0;
+}
+
+static void nitrox_debugfs_exit(struct nitrox_device *ndev)
+{
+}
+#endif
+
+/**
+ * nitrox_probe - NITROX Initialization function.
+ * @pdev: PCI device information struct
+ * @id: entry in nitrox_pci_tbl
+ *
+ * Return: 0, if the driver is bound to the device, or
+ *         a negative error if there is failure.
+ */
+static int nitrox_probe(struct pci_dev *pdev,
+			const struct pci_device_id *id)
+{
+	struct nitrox_device *ndev;
+	int err;
+
+	dev_info_once(&pdev->dev, "%s driver version %s\n",
+		      nitrox_driver_name, DRIVER_VERSION);
+
+	err = pci_enable_device_mem(pdev);
+	if (err)
+		return err;
+
+	/* do FLR */
+	err = nitrox_reset_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "FLR failed\n");
+		pci_disable_device(pdev);
+		return err;
+	}
+
+	if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
+		dev_dbg(&pdev->dev, "DMA to 64-BIT address\n");
+	} else {
+		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pdev->dev, "DMA configuration failed\n");
+			pci_disable_device(pdev);
+			return err;
+		}
+	}
+
+	err = pci_request_mem_regions(pdev, nitrox_driver_name);
+	if (err) {
+		pci_disable_device(pdev);
+		return err;
+	}
+	pci_set_master(pdev);
+
+	ndev = kzalloc(sizeof(*ndev), GFP_KERNEL);
+	if (!ndev)
+		goto ndev_fail;
+
+	pci_set_drvdata(pdev, ndev);
+	ndev->pdev = pdev;
+
+	/* add to device list */
+	nitrox_add_to_devlist(ndev);
+
+	ndev->hw.vendor_id = pdev->vendor;
+	ndev->hw.device_id = pdev->device;
+	ndev->hw.revision_id = pdev->revision;
+	/* command timeout in jiffies */
+	ndev->timeout = msecs_to_jiffies(CMD_TIMEOUT);
+	ndev->node = dev_to_node(&pdev->dev);
+	if (ndev->node == NUMA_NO_NODE)
+		ndev->node = 0;
+
+	ndev->bar_addr = ioremap(pci_resource_start(pdev, 0),
+				 pci_resource_len(pdev, 0));
+	if (!ndev->bar_addr) {
+		err = -EIO;
+		goto ioremap_err;
+	}
+	/* allocate command queus based on cpus, max queues are 64 */
+	ndev->nr_queues = min_t(u32, MAX_PF_QUEUES, num_online_cpus());
+	ndev->qlen = qlen;
+
+	err = nitrox_pf_sw_init(ndev);
+	if (err)
+		goto ioremap_err;
+
+	err = nitrox_pf_hw_init(ndev);
+	if (err)
+		goto pf_hw_fail;
+
+	err = nitrox_debugfs_init(ndev);
+	if (err)
+		goto pf_hw_fail;
+
+	set_bit(NITROX_READY, &ndev->status);
+	/* barrier to sync with other cpus */
+	smp_mb__after_atomic();
+
+	err = nitrox_crypto_register();
+	if (err)
+		goto crypto_fail;
+
+	return 0;
+
+crypto_fail:
+	nitrox_debugfs_exit(ndev);
+	clear_bit(NITROX_READY, &ndev->status);
+	/* barrier to sync with other cpus */
+	smp_mb__after_atomic();
+pf_hw_fail:
+	nitrox_pf_sw_cleanup(ndev);
+ioremap_err:
+	nitrox_remove_from_devlist(ndev);
+	kfree(ndev);
+	pci_set_drvdata(pdev, NULL);
+ndev_fail:
+	pci_release_mem_regions(pdev);
+	pci_disable_device(pdev);
+	return err;
+}
+
+/**
+ * nitrox_remove - Unbind the driver from the device.
+ * @pdev: PCI device information struct
+ */
+static void nitrox_remove(struct pci_dev *pdev)
+{
+	struct nitrox_device *ndev = pci_get_drvdata(pdev);
+
+	if (!ndev)
+		return;
+
+	if (!refcount_dec_and_test(&ndev->refcnt)) {
+		dev_err(DEV(ndev), "Device refcnt not zero (%d)\n",
+			refcount_read(&ndev->refcnt));
+		return;
+	}
+
+	dev_info(DEV(ndev), "Removing Device %x:%x\n",
+		 ndev->hw.vendor_id, ndev->hw.device_id);
+
+	clear_bit(NITROX_READY, &ndev->status);
+	/* barrier to sync with other cpus */
+	smp_mb__after_atomic();
+
+	nitrox_remove_from_devlist(ndev);
+	nitrox_crypto_unregister();
+	nitrox_debugfs_exit(ndev);
+	nitrox_pf_sw_cleanup(ndev);
+
+	iounmap(ndev->bar_addr);
+	kfree(ndev);
+
+	pci_set_drvdata(pdev, NULL);
+	pci_release_mem_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static void nitrox_shutdown(struct pci_dev *pdev)
+{
+	pci_set_drvdata(pdev, NULL);
+	pci_release_mem_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static struct pci_driver nitrox_driver = {
+	.name = nitrox_driver_name,
+	.id_table = nitrox_pci_tbl,
+	.probe = nitrox_probe,
+	.remove	= nitrox_remove,
+	.shutdown = nitrox_shutdown,
+};
+
+module_pci_driver(nitrox_driver);
+
+MODULE_AUTHOR("Srikanth Jampala <Jampala.Srikanth@cavium.com>");
+MODULE_DESCRIPTION("Cavium CNN55XX PF Driver" DRIVER_VERSION " ");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_FIRMWARE(SE_FW);
diff --git a/drivers/crypto/cavium/nitrox/nitrox_req.h b/drivers/crypto/cavium/nitrox/nitrox_req.h
new file mode 100644
index 0000000..74f4c20
--- /dev/null
+++ b/drivers/crypto/cavium/nitrox/nitrox_req.h
@@ -0,0 +1,445 @@
+#ifndef __NITROX_REQ_H
+#define __NITROX_REQ_H
+
+#include <linux/dma-mapping.h>
+#include <crypto/aes.h>
+
+#include "nitrox_dev.h"
+
+/**
+ * struct gphdr - General purpose Header
+ * @param0: first parameter.
+ * @param1: second parameter.
+ * @param2: third parameter.
+ * @param3: fourth parameter.
+ *
+ * Params tell the iv and enc/dec data offsets.
+ */
+struct gphdr {
+	__be16 param0;
+	__be16 param1;
+	__be16 param2;
+	__be16 param3;
+};
+
+/**
+ * struct se_req_ctrl - SE request information.
+ * @arg: Minor number of the opcode
+ * @ctxc: Context control.
+ * @unca: Uncertainity enabled.
+ * @info: Additional information for SE cores.
+ * @ctxl: Context length in bytes.
+ * @uddl: User defined data length
+ */
+union se_req_ctrl {
+	u64 value;
+	struct {
+		u64 raz	: 22;
+		u64 arg	: 8;
+		u64 ctxc : 2;
+		u64 unca : 1;
+		u64 info : 3;
+		u64 unc : 8;
+		u64 ctxl : 12;
+		u64 uddl : 8;
+	} s;
+};
+
+struct nitrox_sglist {
+	u16 len;
+	u16 raz0;
+	u32 raz1;
+	dma_addr_t dma;
+};
+
+#define MAX_IV_LEN 16
+
+/**
+ * struct se_crypto_request - SE crypto request structure.
+ * @opcode: Request opcode (enc/dec)
+ * @flags: flags from crypto subsystem
+ * @ctx_handle: Crypto context handle.
+ * @gph: GP Header
+ * @ctrl: Request Information.
+ * @in: Input sglist
+ * @out: Output sglist
+ */
+struct se_crypto_request {
+	u8 opcode;
+	gfp_t gfp;
+	u32 flags;
+	u64 ctx_handle;
+
+	struct gphdr gph;
+	union se_req_ctrl ctrl;
+
+	u8 iv[MAX_IV_LEN];
+	u16 ivsize;
+
+	struct scatterlist *src;
+	struct scatterlist *dst;
+};
+
+/* Crypto opcodes */
+#define FLEXI_CRYPTO_ENCRYPT_HMAC	0x33
+#define ENCRYPT	0
+#define DECRYPT 1
+
+/* IV from context */
+#define IV_FROM_CTX	0
+/* IV from Input data */
+#define IV_FROM_DPTR	1
+
+/**
+ * cipher opcodes for firmware
+ */
+enum flexi_cipher {
+	CIPHER_NULL = 0,
+	CIPHER_3DES_CBC,
+	CIPHER_3DES_ECB,
+	CIPHER_AES_CBC,
+	CIPHER_AES_ECB,
+	CIPHER_AES_CFB,
+	CIPHER_AES_CTR,
+	CIPHER_AES_GCM,
+	CIPHER_AES_XTS,
+	CIPHER_AES_CCM,
+	CIPHER_AES_CBC_CTS,
+	CIPHER_AES_ECB_CTS,
+	CIPHER_INVALID
+};
+
+/**
+ * struct crypto_keys - Crypto keys
+ * @key: Encryption key or KEY1 for AES-XTS
+ * @iv: Encryption IV or Tweak for AES-XTS
+ */
+struct crypto_keys {
+	union {
+		u8 key[AES_MAX_KEY_SIZE];
+		u8 key1[AES_MAX_KEY_SIZE];
+	} u;
+	u8 iv[AES_BLOCK_SIZE];
+};
+
+/**
+ * struct auth_keys - Authentication keys
+ * @ipad: IPAD or KEY2 for AES-XTS
+ * @opad: OPAD or AUTH KEY if auth_input_type = 1
+ */
+struct auth_keys {
+	union {
+		u8 ipad[64];
+		u8 key2[64];
+	} u;
+	u8 opad[64];
+};
+
+/**
+ * struct flexi_crypto_context - Crypto context
+ * @cipher_type: Encryption cipher type
+ * @aes_keylen: AES key length
+ * @iv_source: Encryption IV source
+ * @hash_type: Authentication type
+ * @auth_input_type: Authentication input type
+ *   1 - Authentication IV and KEY, microcode calculates OPAD/IPAD
+ *   0 - Authentication OPAD/IPAD
+ * @mac_len: mac length
+ * @crypto: Crypto keys
+ * @auth: Authentication keys
+ */
+struct flexi_crypto_context {
+	union {
+		__be64 flags;
+		struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+			u64 cipher_type	: 4;
+			u64 reserved_59	: 1;
+			u64 aes_keylen : 2;
+			u64 iv_source : 1;
+			u64 hash_type : 4;
+			u64 reserved_49_51 : 3;
+			u64 auth_input_type: 1;
+			u64 mac_len : 8;
+			u64 reserved_0_39 : 40;
+#else
+			u64 reserved_0_39 : 40;
+			u64 mac_len : 8;
+			u64 auth_input_type: 1;
+			u64 reserved_49_51 : 3;
+			u64 hash_type : 4;
+			u64 iv_source : 1;
+			u64 aes_keylen : 2;
+			u64 reserved_59	: 1;
+			u64 cipher_type	: 4;
+#endif
+		} w0;
+	};
+
+	struct crypto_keys crypto;
+	struct auth_keys auth;
+};
+
+struct nitrox_crypto_ctx {
+	struct nitrox_device *ndev;
+	union {
+		u64 ctx_handle;
+		struct flexi_crypto_context *fctx;
+	} u;
+};
+
+struct nitrox_kcrypt_request {
+	struct se_crypto_request creq;
+	struct nitrox_crypto_ctx *nctx;
+	struct skcipher_request *skreq;
+};
+
+/**
+ * struct pkt_instr_hdr - Packet Instruction Header
+ * @g: Gather used
+ *   When [G] is set and [GSZ] != 0, the instruction is
+ *   indirect gather instruction.
+ *   When [G] is set and [GSZ] = 0, the instruction is
+ *   direct gather instruction.
+ * @gsz: Number of pointers in the indirect gather list
+ * @ihi: When set hardware duplicates the 1st 8 bytes of pkt_instr_hdr
+ *   and adds them to the packet after the pkt_instr_hdr but before any UDD
+ * @ssz: Not used by the input hardware. But can become slc_store_int[SSZ]
+ *   when [IHI] is set.
+ * @fsz: The number of front data bytes directly included in the
+ *   PCIe instruction.
+ * @tlen: The length of the input packet in bytes, include:
+ *   - 16B pkt_hdr
+ *   - Inline context bytes if any,
+ *   - UDD if any,
+ *   - packet payload bytes
+ */
+union pkt_instr_hdr {
+	u64 value;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 raz_48_63 : 16;
+		u64 g : 1;
+		u64 gsz	: 7;
+		u64 ihi	: 1;
+		u64 ssz	: 7;
+		u64 raz_30_31 : 2;
+		u64 fsz	: 6;
+		u64 raz_16_23 : 8;
+		u64 tlen : 16;
+#else
+		u64 tlen : 16;
+		u64 raz_16_23 : 8;
+		u64 fsz	: 6;
+		u64 raz_30_31 : 2;
+		u64 ssz	: 7;
+		u64 ihi	: 1;
+		u64 gsz	: 7;
+		u64 g : 1;
+		u64 raz_48_63 : 16;
+#endif
+	} s;
+};
+
+/**
+ * struct pkt_hdr - Packet Input Header
+ * @opcode: Request opcode (Major)
+ * @arg: Request opcode (Minor)
+ * @ctxc: Context control.
+ * @unca: When set [UNC] is the uncertainty count for an input packet.
+ *        The hardware uses uncertainty counts to predict
+ *        output buffer use and avoid deadlock.
+ * @info: Not used by input hardware. Available for use
+ *        during SE processing.
+ * @destport: The expected destination port/ring/channel for the packet.
+ * @unc: Uncertainty count for an input packet.
+ * @grp: SE group that will process the input packet.
+ * @ctxl: Context Length in 64-bit words.
+ * @uddl: User-defined data (UDD) length in bytes.
+ * @ctxp: Context pointer. CTXP<63,2:0> must be zero in all cases.
+ */
+union pkt_hdr {
+	u64 value[2];
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 opcode : 8;
+		u64 arg	: 8;
+		u64 ctxc : 2;
+		u64 unca : 1;
+		u64 raz_44 : 1;
+		u64 info : 3;
+		u64 destport : 9;
+		u64 unc	: 8;
+		u64 raz_19_23 : 5;
+		u64 grp	: 3;
+		u64 raz_15 : 1;
+		u64 ctxl : 7;
+		u64 uddl : 8;
+#else
+		u64 uddl : 8;
+		u64 ctxl : 7;
+		u64 raz_15 : 1;
+		u64 grp	: 3;
+		u64 raz_19_23 : 5;
+		u64 unc	: 8;
+		u64 destport : 9;
+		u64 info : 3;
+		u64 raz_44 : 1;
+		u64 unca : 1;
+		u64 ctxc : 2;
+		u64 arg	: 8;
+		u64 opcode : 8;
+#endif
+		__be64 ctxp;
+	} s;
+};
+
+/**
+ * struct slc_store_info - Solicited Paceket Output Store Information.
+ * @ssz: The number of scatterlist pointers for the solicited output port
+ *       packet.
+ * @rptr: The result pointer for the solicited output port packet.
+ *        If [SSZ]=0, [RPTR] must point directly to a buffer on the remote
+ *        host that is large enough to hold the entire output packet.
+ *        If [SSZ]!=0, [RPTR] must point to an array of ([SSZ]+3)/4
+ *        sglist components at [RPTR] on the remote host.
+ */
+union slc_store_info {
+	u64 value[2];
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 raz_39_63 : 25;
+		u64 ssz	: 7;
+		u64 raz_0_31 : 32;
+#else
+		u64 raz_0_31 : 32;
+		u64 ssz	: 7;
+		u64 raz_39_63 : 25;
+#endif
+		__be64 rptr;
+	} s;
+};
+
+/**
+ * struct nps_pkt_instr - NPS Packet Instruction of SE cores.
+ * @dptr0 : Input pointer points to buffer in remote host.
+ * @ih: Packet Instruction Header (8 bytes)
+ * @irh: Packet Input Header (16 bytes)
+ * @slc: Solicited Packet Output Store Information (16 bytes)
+ * @fdata: Front data
+ *
+ * 64-Byte Instruction Format
+ */
+struct nps_pkt_instr {
+	__be64 dptr0;
+	union pkt_instr_hdr ih;
+	union pkt_hdr irh;
+	union slc_store_info slc;
+	u64 fdata[2];
+};
+
+/**
+ * struct ctx_hdr - Book keeping data about the crypto context
+ * @pool: Pool used to allocate crypto context
+ * @dma: Base DMA address of the cypto context
+ * @ctx_dma: Actual usable crypto context for NITROX
+ */
+struct ctx_hdr {
+	struct dma_pool *pool;
+	dma_addr_t dma;
+	dma_addr_t ctx_dma;
+};
+
+/*
+ * struct sglist_component - SG list component format
+ * @len0: The number of bytes at [PTR0] on the remote host.
+ * @len1: The number of bytes at [PTR1] on the remote host.
+ * @len2: The number of bytes at [PTR2] on the remote host.
+ * @len3: The number of bytes at [PTR3] on the remote host.
+ * @dma0: First pointer point to buffer in remote host.
+ * @dma1: Second pointer point to buffer in remote host.
+ * @dma2: Third pointer point to buffer in remote host.
+ * @dma3: Fourth pointer point to buffer in remote host.
+ */
+struct nitrox_sgcomp {
+	__be16 len[4];
+	__be64 dma[4];
+};
+
+/*
+ * strutct nitrox_sgtable - SG list information
+ * @map_cnt: Number of buffers mapped
+ * @nr_comp: Number of sglist components
+ * @total_bytes: Total bytes in sglist.
+ * @len: Total sglist components length.
+ * @dma: DMA address of sglist component.
+ * @dir: DMA direction.
+ * @buf: crypto request buffer.
+ * @sglist: SG list of input/output buffers.
+ * @sgcomp: sglist component for NITROX.
+ */
+struct nitrox_sgtable {
+	u8 map_bufs_cnt;
+	u8 nr_sgcomp;
+	u16 total_bytes;
+	u32 len;
+	dma_addr_t dma;
+	enum dma_data_direction dir;
+
+	struct scatterlist *buf;
+	struct nitrox_sglist *sglist;
+	struct nitrox_sgcomp *sgcomp;
+};
+
+/* Response Header Length */
+#define ORH_HLEN	8
+/* Completion bytes Length */
+#define COMP_HLEN	8
+
+struct resp_hdr {
+	u64 orh;
+	dma_addr_t orh_dma;
+	u64 completion;
+	dma_addr_t completion_dma;
+};
+
+typedef void (*completion_t)(struct skcipher_request *skreq, int err);
+
+/**
+ * struct nitrox_softreq - Represents the NIROX Request.
+ * @response: response list entry
+ * @backlog: Backlog list entry
+ * @ndev: Device used to submit the request
+ * @cmdq: Command queue for submission
+ * @resp: Response headers
+ * @instr: 64B instruction
+ * @in: SG table for input
+ * @out SG table for output
+ * @tstamp: Request submitted time in jiffies
+ * @callback: callback after request completion/timeout
+ * @cb_arg: callback argument
+ */
+struct nitrox_softreq {
+	struct list_head response;
+	struct list_head backlog;
+
+	u32 flags;
+	gfp_t gfp;
+	atomic_t status;
+	bool inplace;
+
+	struct nitrox_device *ndev;
+	struct nitrox_cmdq *cmdq;
+
+	struct nps_pkt_instr instr;
+	struct resp_hdr resp;
+	struct nitrox_sgtable in;
+	struct nitrox_sgtable out;
+
+	unsigned long tstamp;
+
+	completion_t callback;
+	struct skcipher_request *skreq;
+};
+
+#endif /* __NITROX_REQ_H */
diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
new file mode 100644
index 0000000..4bb4377
--- /dev/null
+++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
@@ -0,0 +1,735 @@
+#include <linux/gfp.h>
+#include <linux/workqueue.h>
+#include <crypto/internal/skcipher.h>
+
+#include "nitrox_dev.h"
+#include "nitrox_req.h"
+#include "nitrox_csr.h"
+#include "nitrox_req.h"
+
+/* SLC_STORE_INFO */
+#define MIN_UDD_LEN 16
+/* PKT_IN_HDR + SLC_STORE_INFO */
+#define FDATA_SIZE 32
+/* Base destination port for the solicited requests */
+#define SOLICIT_BASE_DPORT 256
+#define PENDING_SIG	0xFFFFFFFFFFFFFFFFUL
+
+#define REQ_NOT_POSTED 1
+#define REQ_BACKLOG    2
+#define REQ_POSTED     3
+
+/**
+ * Response codes from SE microcode
+ * 0x00 - Success
+ *   Completion with no error
+ * 0x43 - ERR_GC_DATA_LEN_INVALID
+ *   Invalid Data length if Encryption Data length is
+ *   less than 16 bytes for AES-XTS and AES-CTS.
+ * 0x45 - ERR_GC_CTX_LEN_INVALID
+ *   Invalid context length: CTXL != 23 words.
+ * 0x4F - ERR_GC_DOCSIS_CIPHER_INVALID
+ *   DOCSIS support is enabled with other than
+ *   AES/DES-CBC mode encryption.
+ * 0x50 - ERR_GC_DOCSIS_OFFSET_INVALID
+ *   Authentication offset is other than 0 with
+ *   Encryption IV source = 0.
+ *   Authentication offset is other than 8 (DES)/16 (AES)
+ *   with Encryption IV source = 1
+ * 0x51 - ERR_GC_CRC32_INVALID_SELECTION
+ *   CRC32 is enabled for other than DOCSIS encryption.
+ * 0x52 - ERR_GC_AES_CCM_FLAG_INVALID
+ *   Invalid flag options in AES-CCM IV.
+ */
+
+/**
+ * dma_free_sglist - unmap and free the sg lists.
+ * @ndev: N5 device
+ * @sgtbl: SG table
+ */
+static void softreq_unmap_sgbufs(struct nitrox_softreq *sr)
+{
+	struct nitrox_device *ndev = sr->ndev;
+	struct device *dev = DEV(ndev);
+	struct nitrox_sglist *sglist;
+
+	/* unmap in sgbuf */
+	sglist = sr->in.sglist;
+	if (!sglist)
+		goto out_unmap;
+
+	/* unmap iv */
+	dma_unmap_single(dev, sglist->dma, sglist->len, DMA_BIDIRECTIONAL);
+	/* unmpa src sglist */
+	dma_unmap_sg(dev, sr->in.buf, (sr->in.map_bufs_cnt - 1), sr->in.dir);
+	/* unamp gather component */
+	dma_unmap_single(dev, sr->in.dma, sr->in.len, DMA_TO_DEVICE);
+	kfree(sr->in.sglist);
+	kfree(sr->in.sgcomp);
+	sr->in.sglist = NULL;
+	sr->in.buf = NULL;
+	sr->in.map_bufs_cnt = 0;
+
+out_unmap:
+	/* unmap out sgbuf */
+	sglist = sr->out.sglist;
+	if (!sglist)
+		return;
+
+	/* unmap orh */
+	dma_unmap_single(dev, sr->resp.orh_dma, ORH_HLEN, sr->out.dir);
+
+	/* unmap dst sglist */
+	if (!sr->inplace) {
+		dma_unmap_sg(dev, sr->out.buf, (sr->out.map_bufs_cnt - 3),
+			     sr->out.dir);
+	}
+	/* unmap completion */
+	dma_unmap_single(dev, sr->resp.completion_dma, COMP_HLEN, sr->out.dir);
+
+	/* unmap scatter component */
+	dma_unmap_single(dev, sr->out.dma, sr->out.len, DMA_TO_DEVICE);
+	kfree(sr->out.sglist);
+	kfree(sr->out.sgcomp);
+	sr->out.sglist = NULL;
+	sr->out.buf = NULL;
+	sr->out.map_bufs_cnt = 0;
+}
+
+static void softreq_destroy(struct nitrox_softreq *sr)
+{
+	softreq_unmap_sgbufs(sr);
+	kfree(sr);
+}
+
+/**
+ * create_sg_component - create SG componets for N5 device.
+ * @sr: Request structure
+ * @sgtbl: SG table
+ * @nr_comp: total number of components required
+ *
+ * Component structure
+ *
+ *   63     48 47     32 31    16 15      0
+ *   --------------------------------------
+ *   |   LEN0  |  LEN1  |  LEN2  |  LEN3  |
+ *   |-------------------------------------
+ *   |               PTR0                 |
+ *   --------------------------------------
+ *   |               PTR1                 |
+ *   --------------------------------------
+ *   |               PTR2                 |
+ *   --------------------------------------
+ *   |               PTR3                 |
+ *   --------------------------------------
+ *
+ *   Returns 0 if success or a negative errno code on error.
+ */
+static int create_sg_component(struct nitrox_softreq *sr,
+			       struct nitrox_sgtable *sgtbl, int map_nents)
+{
+	struct nitrox_device *ndev = sr->ndev;
+	struct nitrox_sgcomp *sgcomp;
+	struct nitrox_sglist *sglist;
+	dma_addr_t dma;
+	size_t sz_comp;
+	int i, j, nr_sgcomp;
+
+	nr_sgcomp = roundup(map_nents, 4) / 4;
+
+	/* each component holds 4 dma pointers */
+	sz_comp = nr_sgcomp * sizeof(*sgcomp);
+	sgcomp = kzalloc(sz_comp, sr->gfp);
+	if (!sgcomp)
+		return -ENOMEM;
+
+	sgtbl->sgcomp = sgcomp;
+	sgtbl->nr_sgcomp = nr_sgcomp;
+
+	sglist = sgtbl->sglist;
+	/* populate device sg component */
+	for (i = 0; i < nr_sgcomp; i++) {
+		for (j = 0; j < 4; j++) {
+			sgcomp->len[j] = cpu_to_be16(sglist->len);
+			sgcomp->dma[j] = cpu_to_be64(sglist->dma);
+			sglist++;
+		}
+		sgcomp++;
+	}
+	/* map the device sg component */
+	dma = dma_map_single(DEV(ndev), sgtbl->sgcomp, sz_comp, DMA_TO_DEVICE);
+	if (dma_mapping_error(DEV(ndev), dma)) {
+		kfree(sgtbl->sgcomp);
+		sgtbl->sgcomp = NULL;
+		return -ENOMEM;
+	}
+
+	sgtbl->dma = dma;
+	sgtbl->len = sz_comp;
+
+	return 0;
+}
+
+/**
+ * dma_map_inbufs - DMA map input sglist and creates sglist component
+ *                  for N5 device.
+ * @sr: Request structure
+ * @req: Crypto request structre
+ *
+ * Returns 0 if successful or a negative errno code on error.
+ */
+static int dma_map_inbufs(struct nitrox_softreq *sr,
+			  struct se_crypto_request *req)
+{
+	struct device *dev = DEV(sr->ndev);
+	struct scatterlist *sg = req->src;
+	struct nitrox_sglist *glist;
+	int i, nents, ret = 0;
+	dma_addr_t dma;
+	size_t sz;
+
+	nents = sg_nents(req->src);
+
+	/* creater gather list IV and src entries */
+	sz = roundup((1 + nents), 4) * sizeof(*glist);
+	glist = kzalloc(sz, sr->gfp);
+	if (!glist)
+		return -ENOMEM;
+
+	sr->in.sglist = glist;
+	/* map IV */
+	dma = dma_map_single(dev, &req->iv, req->ivsize, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, dma)) {
+		ret = -EINVAL;
+		goto iv_map_err;
+	}
+
+	sr->in.dir = (req->src == req->dst) ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+	/* map src entries */
+	nents = dma_map_sg(dev, req->src, nents, sr->in.dir);
+	if (!nents) {
+		ret = -EINVAL;
+		goto src_map_err;
+	}
+	sr->in.buf = req->src;
+
+	/* store the mappings */
+	glist->len = req->ivsize;
+	glist->dma = dma;
+	glist++;
+	sr->in.total_bytes += req->ivsize;
+
+	for_each_sg(req->src, sg, nents, i) {
+		glist->len = sg_dma_len(sg);
+		glist->dma = sg_dma_address(sg);
+		sr->in.total_bytes += glist->len;
+		glist++;
+	}
+	/* roundup map count to align with entires in sg component */
+	sr->in.map_bufs_cnt = (1 + nents);
+
+	/* create NITROX gather component */
+	ret = create_sg_component(sr, &sr->in, sr->in.map_bufs_cnt);
+	if (ret)
+		goto incomp_err;
+
+	return 0;
+
+incomp_err:
+	dma_unmap_sg(dev, req->src, nents, sr->in.dir);
+	sr->in.map_bufs_cnt = 0;
+src_map_err:
+	dma_unmap_single(dev, dma, req->ivsize, DMA_BIDIRECTIONAL);
+iv_map_err:
+	kfree(sr->in.sglist);
+	sr->in.sglist = NULL;
+	return ret;
+}
+
+static int dma_map_outbufs(struct nitrox_softreq *sr,
+			   struct se_crypto_request *req)
+{
+	struct device *dev = DEV(sr->ndev);
+	struct nitrox_sglist *glist = sr->in.sglist;
+	struct nitrox_sglist *slist;
+	struct scatterlist *sg;
+	int i, nents, map_bufs_cnt, ret = 0;
+	size_t sz;
+
+	nents = sg_nents(req->dst);
+
+	/* create scatter list ORH, IV, dst entries and Completion header */
+	sz = roundup((3 + nents), 4) * sizeof(*slist);
+	slist = kzalloc(sz, sr->gfp);
+	if (!slist)
+		return -ENOMEM;
+
+	sr->out.sglist = slist;
+	sr->out.dir = DMA_BIDIRECTIONAL;
+	/* map ORH */
+	sr->resp.orh_dma = dma_map_single(dev, &sr->resp.orh, ORH_HLEN,
+					  sr->out.dir);
+	if (dma_mapping_error(dev, sr->resp.orh_dma)) {
+		ret = -EINVAL;
+		goto orh_map_err;
+	}
+
+	/* map completion */
+	sr->resp.completion_dma = dma_map_single(dev, &sr->resp.completion,
+						 COMP_HLEN, sr->out.dir);
+	if (dma_mapping_error(dev, sr->resp.completion_dma)) {
+		ret = -EINVAL;
+		goto compl_map_err;
+	}
+
+	sr->inplace = (req->src == req->dst) ? true : false;
+	/* out place */
+	if (!sr->inplace) {
+		nents = dma_map_sg(dev, req->dst, nents, sr->out.dir);
+		if (!nents) {
+			ret = -EINVAL;
+			goto dst_map_err;
+		}
+	}
+	sr->out.buf = req->dst;
+
+	/* store the mappings */
+	/* orh */
+	slist->len = ORH_HLEN;
+	slist->dma = sr->resp.orh_dma;
+	slist++;
+
+	/* copy the glist mappings */
+	if (sr->inplace) {
+		nents = sr->in.map_bufs_cnt - 1;
+		map_bufs_cnt = sr->in.map_bufs_cnt;
+		while (map_bufs_cnt--) {
+			slist->len = glist->len;
+			slist->dma = glist->dma;
+			slist++;
+			glist++;
+		}
+	} else {
+		/* copy iv mapping */
+		slist->len = glist->len;
+		slist->dma = glist->dma;
+		slist++;
+		/* copy remaining maps */
+		for_each_sg(req->dst, sg, nents, i) {
+			slist->len = sg_dma_len(sg);
+			slist->dma = sg_dma_address(sg);
+			slist++;
+		}
+	}
+
+	/* completion */
+	slist->len = COMP_HLEN;
+	slist->dma = sr->resp.completion_dma;
+
+	sr->out.map_bufs_cnt = (3 + nents);
+
+	ret = create_sg_component(sr, &sr->out, sr->out.map_bufs_cnt);
+	if (ret)
+		goto outcomp_map_err;
+
+	return 0;
+
+outcomp_map_err:
+	if (!sr->inplace)
+		dma_unmap_sg(dev, req->dst, nents, sr->out.dir);
+	sr->out.map_bufs_cnt = 0;
+	sr->out.buf = NULL;
+dst_map_err:
+	dma_unmap_single(dev, sr->resp.completion_dma, COMP_HLEN, sr->out.dir);
+	sr->resp.completion_dma = 0;
+compl_map_err:
+	dma_unmap_single(dev, sr->resp.orh_dma, ORH_HLEN, sr->out.dir);
+	sr->resp.orh_dma = 0;
+orh_map_err:
+	kfree(sr->out.sglist);
+	sr->out.sglist = NULL;
+	return ret;
+}
+
+static inline int softreq_map_iobuf(struct nitrox_softreq *sr,
+				    struct se_crypto_request *creq)
+{
+	int ret;
+
+	ret = dma_map_inbufs(sr, creq);
+	if (ret)
+		return ret;
+
+	ret = dma_map_outbufs(sr, creq);
+	if (ret)
+		softreq_unmap_sgbufs(sr);
+
+	return ret;
+}
+
+static inline void backlog_list_add(struct nitrox_softreq *sr,
+				    struct nitrox_cmdq *cmdq)
+{
+	INIT_LIST_HEAD(&sr->backlog);
+
+	spin_lock_bh(&cmdq->backlog_lock);
+	list_add_tail(&sr->backlog, &cmdq->backlog_head);
+	atomic_inc(&cmdq->backlog_count);
+	atomic_set(&sr->status, REQ_BACKLOG);
+	spin_unlock_bh(&cmdq->backlog_lock);
+}
+
+static inline void response_list_add(struct nitrox_softreq *sr,
+				     struct nitrox_cmdq *cmdq)
+{
+	INIT_LIST_HEAD(&sr->response);
+
+	spin_lock_bh(&cmdq->response_lock);
+	list_add_tail(&sr->response, &cmdq->response_head);
+	spin_unlock_bh(&cmdq->response_lock);
+}
+
+static inline void response_list_del(struct nitrox_softreq *sr,
+				     struct nitrox_cmdq *cmdq)
+{
+	spin_lock_bh(&cmdq->response_lock);
+	list_del(&sr->response);
+	spin_unlock_bh(&cmdq->response_lock);
+}
+
+static struct nitrox_softreq *
+get_first_response_entry(struct nitrox_cmdq *cmdq)
+{
+	return list_first_entry_or_null(&cmdq->response_head,
+					struct nitrox_softreq, response);
+}
+
+static inline bool cmdq_full(struct nitrox_cmdq *cmdq, int qlen)
+{
+	if (atomic_inc_return(&cmdq->pending_count) > qlen) {
+		atomic_dec(&cmdq->pending_count);
+		/* sync with other cpus */
+		smp_mb__after_atomic();
+		return true;
+	}
+	return false;
+}
+
+/**
+ * post_se_instr - Post SE instruction to Packet Input ring
+ * @sr: Request structure
+ *
+ * Returns 0 if successful or a negative error code,
+ * if no space in ring.
+ */
+static void post_se_instr(struct nitrox_softreq *sr,
+			  struct nitrox_cmdq *cmdq)
+{
+	struct nitrox_device *ndev = sr->ndev;
+	union nps_pkt_in_instr_baoff_dbell pkt_in_baoff_dbell;
+	u64 offset;
+	u8 *ent;
+
+	spin_lock_bh(&cmdq->cmdq_lock);
+
+	/* get the next write offset */
+	offset = NPS_PKT_IN_INSTR_BAOFF_DBELLX(cmdq->qno);
+	pkt_in_baoff_dbell.value = nitrox_read_csr(ndev, offset);
+	/* copy the instruction */
+	ent = cmdq->head + pkt_in_baoff_dbell.s.aoff;
+	memcpy(ent, &sr->instr, cmdq->instr_size);
+	/* flush the command queue updates */
+	dma_wmb();
+
+	sr->tstamp = jiffies;
+	atomic_set(&sr->status, REQ_POSTED);
+	response_list_add(sr, cmdq);
+
+	/* Ring doorbell with count 1 */
+	writeq(1, cmdq->dbell_csr_addr);
+	/* orders the doorbell rings */
+	mmiowb();
+
+	spin_unlock_bh(&cmdq->cmdq_lock);
+}
+
+static int post_backlog_cmds(struct nitrox_cmdq *cmdq)
+{
+	struct nitrox_device *ndev = cmdq->ndev;
+	struct nitrox_softreq *sr, *tmp;
+	int ret = 0;
+
+	spin_lock_bh(&cmdq->backlog_lock);
+
+	list_for_each_entry_safe(sr, tmp, &cmdq->backlog_head, backlog) {
+		struct skcipher_request *skreq;
+
+		/* submit until space available */
+		if (unlikely(cmdq_full(cmdq, ndev->qlen))) {
+			ret = -EBUSY;
+			break;
+		}
+		/* delete from backlog list */
+		list_del(&sr->backlog);
+		atomic_dec(&cmdq->backlog_count);
+		/* sync with other cpus */
+		smp_mb__after_atomic();
+
+		skreq = sr->skreq;
+		/* post the command */
+		post_se_instr(sr, cmdq);
+
+		/* backlog requests are posted, wakeup with -EINPROGRESS */
+		skcipher_request_complete(skreq, -EINPROGRESS);
+	}
+	spin_unlock_bh(&cmdq->backlog_lock);
+
+	return ret;
+}
+
+static int nitrox_enqueue_request(struct nitrox_softreq *sr)
+{
+	struct nitrox_cmdq *cmdq = sr->cmdq;
+	struct nitrox_device *ndev = sr->ndev;
+	int ret = -EBUSY;
+
+	if (unlikely(cmdq_full(cmdq, ndev->qlen))) {
+		if (!(sr->flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			return -EAGAIN;
+
+		backlog_list_add(sr, cmdq);
+	} else {
+		ret = post_backlog_cmds(cmdq);
+		if (ret) {
+			backlog_list_add(sr, cmdq);
+			return ret;
+		}
+		post_se_instr(sr, cmdq);
+		ret = -EINPROGRESS;
+	}
+	return ret;
+}
+
+/**
+ * nitrox_se_request - Send request to SE core
+ * @ndev: NITROX device
+ * @req: Crypto request
+ *
+ * Returns 0 on success, or a negative error code.
+ */
+int nitrox_process_se_request(struct nitrox_device *ndev,
+			      struct se_crypto_request *req,
+			      completion_t callback,
+			      struct skcipher_request *skreq)
+{
+	struct nitrox_softreq *sr;
+	dma_addr_t ctx_handle = 0;
+	int qno, ret = 0;
+
+	if (!nitrox_ready(ndev))
+		return -ENODEV;
+
+	sr = kzalloc(sizeof(*sr), req->gfp);
+	if (!sr)
+		return -ENOMEM;
+
+	sr->ndev = ndev;
+	sr->flags = req->flags;
+	sr->gfp = req->gfp;
+	sr->callback = callback;
+	sr->skreq = skreq;
+
+	atomic_set(&sr->status, REQ_NOT_POSTED);
+
+	WRITE_ONCE(sr->resp.orh, PENDING_SIG);
+	WRITE_ONCE(sr->resp.completion, PENDING_SIG);
+
+	ret = softreq_map_iobuf(sr, req);
+	if (ret) {
+		kfree(sr);
+		return ret;
+	}
+
+	/* get the context handle */
+	if (req->ctx_handle) {
+		struct ctx_hdr *hdr;
+		u8 *ctx_ptr;
+
+		ctx_ptr = (u8 *)(uintptr_t)req->ctx_handle;
+		hdr = (struct ctx_hdr *)(ctx_ptr - sizeof(struct ctx_hdr));
+		ctx_handle = hdr->ctx_dma;
+	}
+
+	/* select the queue */
+	qno = smp_processor_id() % ndev->nr_queues;
+
+	sr->cmdq = &ndev->pkt_cmdqs[qno];
+
+	/*
+	 * 64-Byte Instruction Format
+	 *
+	 *  ----------------------
+	 *  |      DPTR0         | 8 bytes
+	 *  ----------------------
+	 *  |  PKT_IN_INSTR_HDR  | 8 bytes
+	 *  ----------------------
+	 *  |    PKT_IN_HDR      | 16 bytes
+	 *  ----------------------
+	 *  |    SLC_INFO        | 16 bytes
+	 *  ----------------------
+	 *  |   Front data       | 16 bytes
+	 *  ----------------------
+	 */
+
+	/* fill the packet instruction */
+	/* word 0 */
+	sr->instr.dptr0 = cpu_to_be64(sr->in.dma);
+
+	/* word 1 */
+	sr->instr.ih.value = 0;
+	sr->instr.ih.s.g = 1;
+	sr->instr.ih.s.gsz = sr->in.map_bufs_cnt;
+	sr->instr.ih.s.ssz = sr->out.map_bufs_cnt;
+	sr->instr.ih.s.fsz = FDATA_SIZE + sizeof(struct gphdr);
+	sr->instr.ih.s.tlen = sr->instr.ih.s.fsz + sr->in.total_bytes;
+	sr->instr.ih.value = cpu_to_be64(sr->instr.ih.value);
+
+	/* word 2 */
+	sr->instr.irh.value[0] = 0;
+	sr->instr.irh.s.uddl = MIN_UDD_LEN;
+	/* context length in 64-bit words */
+	sr->instr.irh.s.ctxl = (req->ctrl.s.ctxl / 8);
+	/* offset from solicit base port 256 */
+	sr->instr.irh.s.destport = SOLICIT_BASE_DPORT + qno;
+	sr->instr.irh.s.ctxc = req->ctrl.s.ctxc;
+	sr->instr.irh.s.arg = req->ctrl.s.arg;
+	sr->instr.irh.s.opcode = req->opcode;
+	sr->instr.irh.value[0] = cpu_to_be64(sr->instr.irh.value[0]);
+
+	/* word 3 */
+	sr->instr.irh.s.ctxp = cpu_to_be64(ctx_handle);
+
+	/* word 4 */
+	sr->instr.slc.value[0] = 0;
+	sr->instr.slc.s.ssz = sr->out.map_bufs_cnt;
+	sr->instr.slc.value[0] = cpu_to_be64(sr->instr.slc.value[0]);
+
+	/* word 5 */
+	sr->instr.slc.s.rptr = cpu_to_be64(sr->out.dma);
+
+	/*
+	 * No conversion for front data,
+	 * It goes into payload
+	 * put GP Header in front data
+	 */
+	sr->instr.fdata[0] = *((u64 *)&req->gph);
+	sr->instr.fdata[1] = 0;
+	/* flush the soft_req changes before posting the cmd */
+	wmb();
+
+	ret = nitrox_enqueue_request(sr);
+	if (ret == -EAGAIN)
+		goto send_fail;
+
+	return ret;
+
+send_fail:
+	softreq_destroy(sr);
+	return ret;
+}
+
+static inline int cmd_timeout(unsigned long tstamp, unsigned long timeout)
+{
+	return time_after_eq(jiffies, (tstamp + timeout));
+}
+
+void backlog_qflush_work(struct work_struct *work)
+{
+	struct nitrox_cmdq *cmdq;
+
+	cmdq = container_of(work, struct nitrox_cmdq, backlog_qflush);
+	post_backlog_cmds(cmdq);
+}
+
+/**
+ * process_request_list - process completed requests
+ * @ndev: N5 device
+ * @qno: queue to operate
+ *
+ * Returns the number of responses processed.
+ */
+static void process_response_list(struct nitrox_cmdq *cmdq)
+{
+	struct nitrox_device *ndev = cmdq->ndev;
+	struct nitrox_softreq *sr;
+	struct skcipher_request *skreq;
+	completion_t callback;
+	int req_completed = 0, err = 0, budget;
+
+	/* check all pending requests */
+	budget = atomic_read(&cmdq->pending_count);
+
+	while (req_completed < budget) {
+		sr = get_first_response_entry(cmdq);
+		if (!sr)
+			break;
+
+		if (atomic_read(&sr->status) != REQ_POSTED)
+			break;
+
+		/* check orh and completion bytes updates */
+		if (READ_ONCE(sr->resp.orh) == READ_ONCE(sr->resp.completion)) {
+			/* request not completed, check for timeout */
+			if (!cmd_timeout(sr->tstamp, ndev->timeout))
+				break;
+			dev_err_ratelimited(DEV(ndev),
+					    "Request timeout, orh 0x%016llx\n",
+					    READ_ONCE(sr->resp.orh));
+		}
+		atomic_dec(&cmdq->pending_count);
+		/* sync with other cpus */
+		smp_mb__after_atomic();
+		/* remove from response list */
+		response_list_del(sr, cmdq);
+
+		callback = sr->callback;
+		skreq = sr->skreq;
+
+		/* ORH error code */
+		err = READ_ONCE(sr->resp.orh) & 0xff;
+		softreq_destroy(sr);
+
+		if (callback)
+			callback(skreq, err);
+
+		req_completed++;
+	}
+}
+
+/**
+ * pkt_slc_resp_handler - post processing of SE responses
+ */
+void pkt_slc_resp_handler(unsigned long data)
+{
+	struct bh_data *bh = (void *)(uintptr_t)(data);
+	struct nitrox_cmdq *cmdq = bh->cmdq;
+	union nps_pkt_slc_cnts pkt_slc_cnts;
+
+	/* read completion count */
+	pkt_slc_cnts.value = readq(bh->completion_cnt_csr_addr);
+	/* resend the interrupt if more work to do */
+	pkt_slc_cnts.s.resend = 1;
+
+	process_response_list(cmdq);
+
+	/*
+	 * clear the interrupt with resend bit enabled,
+	 * MSI-X interrupt generates if Completion count > Threshold
+	 */
+	writeq(pkt_slc_cnts.value, bh->completion_cnt_csr_addr);
+	/* order the writes */
+	mmiowb();
+
+	if (atomic_read(&cmdq->backlog_count))
+		schedule_work(&cmdq->backlog_qflush);
+}
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
index 60919a3..59493fd 100644
--- a/drivers/crypto/ccp/Makefile
+++ b/drivers/crypto/ccp/Makefile
@@ -4,7 +4,8 @@
 	    ccp-dev-v3.o \
 	    ccp-dev-v5.o \
 	    ccp-platform.o \
-	    ccp-dmaengine.o
+	    ccp-dmaengine.o \
+	    ccp-debugfs.o
 ccp-$(CONFIG_PCI) += ccp-pci.o
 
 obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index 6b46eea..ce97b38 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -18,6 +18,7 @@
 #include <linux/crypto.h>
 #include <crypto/algapi.h>
 #include <crypto/hash.h>
+#include <crypto/hmac.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
 #include <crypto/scatterwalk.h>
@@ -308,8 +309,8 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
 	}
 
 	for (i = 0; i < block_size; i++) {
-		ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ 0x36;
-		ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ 0x5c;
+		ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ HMAC_IPAD_VALUE;
+		ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ HMAC_OPAD_VALUE;
 	}
 
 	sg_init_one(&ctx->u.sha.opad_sg, ctx->u.sha.opad, block_size);
diff --git a/drivers/crypto/ccp/ccp-debugfs.c b/drivers/crypto/ccp/ccp-debugfs.c
new file mode 100644
index 0000000..3cd6c83
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-debugfs.c
@@ -0,0 +1,344 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+
+/* DebugFS helpers */
+#define	OBUFP		(obuf + oboff)
+#define	OBUFLEN		512
+#define	OBUFSPC		(OBUFLEN - oboff)
+#define	OSCNPRINTF(fmt, ...) \
+		scnprintf(OBUFP, OBUFSPC, fmt, ## __VA_ARGS__)
+
+#define BUFLEN	63
+
+#define	RI_VERSION_NUM	0x0000003F
+#define	RI_AES_PRESENT	0x00000040
+#define	RI_3DES_PRESENT	0x00000080
+#define	RI_SHA_PRESENT	0x00000100
+#define	RI_RSA_PRESENT	0x00000200
+#define	RI_ECC_PRESENT	0x00000400
+#define	RI_ZDE_PRESENT	0x00000800
+#define	RI_ZCE_PRESENT	0x00001000
+#define	RI_TRNG_PRESENT	0x00002000
+#define	RI_ELFC_PRESENT	0x00004000
+#define	RI_ELFC_SHIFT	14
+#define	RI_NUM_VQM	0x00078000
+#define	RI_NVQM_SHIFT	15
+#define	RI_NVQM(r)	(((r) * RI_NUM_VQM) >> RI_NVQM_SHIFT)
+#define	RI_LSB_ENTRIES	0x0FF80000
+#define	RI_NLSB_SHIFT	19
+#define	RI_NLSB(r)	(((r) * RI_LSB_ENTRIES) >> RI_NLSB_SHIFT)
+
+static ssize_t ccp5_debugfs_info_read(struct file *filp, char __user *ubuf,
+				      size_t count, loff_t *offp)
+{
+	struct ccp_device *ccp = filp->private_data;
+	unsigned int oboff = 0;
+	unsigned int regval;
+	ssize_t ret;
+	char *obuf;
+
+	if (!ccp)
+		return 0;
+
+	obuf = kmalloc(OBUFLEN, GFP_KERNEL);
+	if (!obuf)
+		return -ENOMEM;
+
+	oboff += OSCNPRINTF("Device name: %s\n", ccp->name);
+	oboff += OSCNPRINTF("   RNG name: %s\n", ccp->rngname);
+	oboff += OSCNPRINTF("   # Queues: %d\n", ccp->cmd_q_count);
+	oboff += OSCNPRINTF("     # Cmds: %d\n", ccp->cmd_count);
+
+	regval = ioread32(ccp->io_regs + CMD5_PSP_CCP_VERSION);
+	oboff += OSCNPRINTF("    Version: %d\n", regval & RI_VERSION_NUM);
+	oboff += OSCNPRINTF("    Engines:");
+	if (regval & RI_AES_PRESENT)
+		oboff += OSCNPRINTF(" AES");
+	if (regval & RI_3DES_PRESENT)
+		oboff += OSCNPRINTF(" 3DES");
+	if (regval & RI_SHA_PRESENT)
+		oboff += OSCNPRINTF(" SHA");
+	if (regval & RI_RSA_PRESENT)
+		oboff += OSCNPRINTF(" RSA");
+	if (regval & RI_ECC_PRESENT)
+		oboff += OSCNPRINTF(" ECC");
+	if (regval & RI_ZDE_PRESENT)
+		oboff += OSCNPRINTF(" ZDE");
+	if (regval & RI_ZCE_PRESENT)
+		oboff += OSCNPRINTF(" ZCE");
+	if (regval & RI_TRNG_PRESENT)
+		oboff += OSCNPRINTF(" TRNG");
+	oboff += OSCNPRINTF("\n");
+	oboff += OSCNPRINTF("     Queues: %d\n",
+		   (regval & RI_NUM_VQM) >> RI_NVQM_SHIFT);
+	oboff += OSCNPRINTF("LSB Entries: %d\n",
+		   (regval & RI_LSB_ENTRIES) >> RI_NLSB_SHIFT);
+
+	ret = simple_read_from_buffer(ubuf, count, offp, obuf, oboff);
+	kfree(obuf);
+
+	return ret;
+}
+
+/* Return a formatted buffer containing the current
+ * statistics across all queues for a CCP.
+ */
+static ssize_t ccp5_debugfs_stats_read(struct file *filp, char __user *ubuf,
+				       size_t count, loff_t *offp)
+{
+	struct ccp_device *ccp = filp->private_data;
+	unsigned long total_xts_aes_ops = 0;
+	unsigned long total_3des_ops = 0;
+	unsigned long total_aes_ops = 0;
+	unsigned long total_sha_ops = 0;
+	unsigned long total_rsa_ops = 0;
+	unsigned long total_ecc_ops = 0;
+	unsigned long total_pt_ops = 0;
+	unsigned long total_ops = 0;
+	unsigned int oboff = 0;
+	ssize_t ret = 0;
+	unsigned int i;
+	char *obuf;
+
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
+
+		total_ops += cmd_q->total_ops;
+		total_aes_ops += cmd_q->total_aes_ops;
+		total_xts_aes_ops += cmd_q->total_xts_aes_ops;
+		total_3des_ops += cmd_q->total_3des_ops;
+		total_sha_ops += cmd_q->total_sha_ops;
+		total_rsa_ops += cmd_q->total_rsa_ops;
+		total_pt_ops += cmd_q->total_pt_ops;
+		total_ecc_ops += cmd_q->total_ecc_ops;
+	}
+
+	obuf = kmalloc(OBUFLEN, GFP_KERNEL);
+	if (!obuf)
+		return -ENOMEM;
+
+	oboff += OSCNPRINTF("Total Interrupts Handled: %ld\n",
+			    ccp->total_interrupts);
+	oboff += OSCNPRINTF("        Total Operations: %ld\n",
+			    total_ops);
+	oboff += OSCNPRINTF("                     AES: %ld\n",
+			    total_aes_ops);
+	oboff += OSCNPRINTF("                 XTS AES: %ld\n",
+			    total_xts_aes_ops);
+	oboff += OSCNPRINTF("                     SHA: %ld\n",
+			    total_3des_ops);
+	oboff += OSCNPRINTF("                     SHA: %ld\n",
+			    total_sha_ops);
+	oboff += OSCNPRINTF("                     RSA: %ld\n",
+			    total_rsa_ops);
+	oboff += OSCNPRINTF("               Pass-Thru: %ld\n",
+			    total_pt_ops);
+	oboff += OSCNPRINTF("                     ECC: %ld\n",
+			    total_ecc_ops);
+
+	ret = simple_read_from_buffer(ubuf, count, offp, obuf, oboff);
+	kfree(obuf);
+
+	return ret;
+}
+
+/* Reset the counters in a queue
+ */
+static void ccp5_debugfs_reset_queue_stats(struct ccp_cmd_queue *cmd_q)
+{
+	cmd_q->total_ops = 0L;
+	cmd_q->total_aes_ops = 0L;
+	cmd_q->total_xts_aes_ops = 0L;
+	cmd_q->total_3des_ops = 0L;
+	cmd_q->total_sha_ops = 0L;
+	cmd_q->total_rsa_ops = 0L;
+	cmd_q->total_pt_ops = 0L;
+	cmd_q->total_ecc_ops = 0L;
+}
+
+/* A value was written to the stats variable, which
+ * should be used to reset the queue counters across
+ * that device.
+ */
+static ssize_t ccp5_debugfs_stats_write(struct file *filp,
+					const char __user *ubuf,
+					size_t count, loff_t *offp)
+{
+	struct ccp_device *ccp = filp->private_data;
+	int i;
+
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		ccp5_debugfs_reset_queue_stats(&ccp->cmd_q[i]);
+	ccp->total_interrupts = 0L;
+
+	return count;
+}
+
+/* Return a formatted buffer containing the current information
+ * for that queue
+ */
+static ssize_t ccp5_debugfs_queue_read(struct file *filp, char __user *ubuf,
+				       size_t count, loff_t *offp)
+{
+	struct ccp_cmd_queue *cmd_q = filp->private_data;
+	unsigned int oboff = 0;
+	unsigned int regval;
+	ssize_t ret;
+	char *obuf;
+
+	if (!cmd_q)
+		return 0;
+
+	obuf = kmalloc(OBUFLEN, GFP_KERNEL);
+	if (!obuf)
+		return -ENOMEM;
+
+	oboff += OSCNPRINTF("  Total Queue Operations: %ld\n",
+			    cmd_q->total_ops);
+	oboff += OSCNPRINTF("                     AES: %ld\n",
+			    cmd_q->total_aes_ops);
+	oboff += OSCNPRINTF("                 XTS AES: %ld\n",
+			    cmd_q->total_xts_aes_ops);
+	oboff += OSCNPRINTF("                     SHA: %ld\n",
+			    cmd_q->total_3des_ops);
+	oboff += OSCNPRINTF("                     SHA: %ld\n",
+			    cmd_q->total_sha_ops);
+	oboff += OSCNPRINTF("                     RSA: %ld\n",
+			    cmd_q->total_rsa_ops);
+	oboff += OSCNPRINTF("               Pass-Thru: %ld\n",
+			    cmd_q->total_pt_ops);
+	oboff += OSCNPRINTF("                     ECC: %ld\n",
+			    cmd_q->total_ecc_ops);
+
+	regval = ioread32(cmd_q->reg_int_enable);
+	oboff += OSCNPRINTF("      Enabled Interrupts:");
+	if (regval & INT_EMPTY_QUEUE)
+		oboff += OSCNPRINTF(" EMPTY");
+	if (regval & INT_QUEUE_STOPPED)
+		oboff += OSCNPRINTF(" STOPPED");
+	if (regval & INT_ERROR)
+		oboff += OSCNPRINTF(" ERROR");
+	if (regval & INT_COMPLETION)
+		oboff += OSCNPRINTF(" COMPLETION");
+	oboff += OSCNPRINTF("\n");
+
+	ret = simple_read_from_buffer(ubuf, count, offp, obuf, oboff);
+	kfree(obuf);
+
+	return ret;
+}
+
+/* A value was written to the stats variable for a
+ * queue. Reset the queue counters to this value.
+ */
+static ssize_t ccp5_debugfs_queue_write(struct file *filp,
+					const char __user *ubuf,
+					size_t count, loff_t *offp)
+{
+	struct ccp_cmd_queue *cmd_q = filp->private_data;
+
+	ccp5_debugfs_reset_queue_stats(cmd_q);
+
+	return count;
+}
+
+static const struct file_operations ccp_debugfs_info_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = ccp5_debugfs_info_read,
+	.write = NULL,
+};
+
+static const struct file_operations ccp_debugfs_queue_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = ccp5_debugfs_queue_read,
+	.write = ccp5_debugfs_queue_write,
+};
+
+static const struct file_operations ccp_debugfs_stats_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = ccp5_debugfs_stats_read,
+	.write = ccp5_debugfs_stats_write,
+};
+
+static struct dentry *ccp_debugfs_dir;
+static DEFINE_RWLOCK(ccp_debugfs_lock);
+
+#define	MAX_NAME_LEN	20
+
+void ccp5_debugfs_setup(struct ccp_device *ccp)
+{
+	struct ccp_cmd_queue *cmd_q;
+	char name[MAX_NAME_LEN + 1];
+	struct dentry *debugfs_info;
+	struct dentry *debugfs_stats;
+	struct dentry *debugfs_q_instance;
+	struct dentry *debugfs_q_stats;
+	unsigned long flags;
+	int i;
+
+	if (!debugfs_initialized())
+		return;
+
+	write_lock_irqsave(&ccp_debugfs_lock, flags);
+	if (!ccp_debugfs_dir)
+		ccp_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	write_unlock_irqrestore(&ccp_debugfs_lock, flags);
+	if (!ccp_debugfs_dir)
+		return;
+
+	ccp->debugfs_instance = debugfs_create_dir(ccp->name, ccp_debugfs_dir);
+	if (!ccp->debugfs_instance)
+		return;
+
+	debugfs_info = debugfs_create_file("info", 0400,
+					   ccp->debugfs_instance, ccp,
+					   &ccp_debugfs_info_ops);
+	if (!debugfs_info)
+		return;
+
+	debugfs_stats = debugfs_create_file("stats", 0600,
+					    ccp->debugfs_instance, ccp,
+					    &ccp_debugfs_stats_ops);
+	if (!debugfs_stats)
+		return;
+
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+
+		snprintf(name, MAX_NAME_LEN - 1, "q%d", cmd_q->id);
+
+		debugfs_q_instance =
+			debugfs_create_dir(name, ccp->debugfs_instance);
+		if (!debugfs_q_instance)
+			return;
+
+		debugfs_q_stats =
+			debugfs_create_file("stats", 0600,
+					    debugfs_q_instance, cmd_q,
+					    &ccp_debugfs_queue_ops);
+		if (!debugfs_q_stats)
+			return;
+	}
+}
+
+void ccp5_debugfs_destroy(void)
+{
+	debugfs_remove_recursive(ccp_debugfs_dir);
+}
diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
index ccbe32d..b10d2d2 100644
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/kthread.h>
+#include <linux/debugfs.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/compiler.h>
@@ -231,6 +232,8 @@ static int ccp5_do_cmd(struct ccp5_desc *desc,
 	int	i;
 	int ret = 0;
 
+	cmd_q->total_ops++;
+
 	if (CCP5_CMD_SOC(desc)) {
 		CCP5_CMD_IOC(desc) = 1;
 		CCP5_CMD_SOC(desc) = 0;
@@ -282,6 +285,8 @@ static int ccp5_perform_aes(struct ccp_op *op)
 	union ccp_function function;
 	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
 
+	op->cmd_q->total_aes_ops++;
+
 	/* Zero out all the fields of the command desc */
 	memset(&desc, 0, Q_DESC_SIZE);
 
@@ -325,6 +330,8 @@ static int ccp5_perform_xts_aes(struct ccp_op *op)
 	union ccp_function function;
 	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
 
+	op->cmd_q->total_xts_aes_ops++;
+
 	/* Zero out all the fields of the command desc */
 	memset(&desc, 0, Q_DESC_SIZE);
 
@@ -364,6 +371,8 @@ static int ccp5_perform_sha(struct ccp_op *op)
 	struct ccp5_desc desc;
 	union ccp_function function;
 
+	op->cmd_q->total_sha_ops++;
+
 	/* Zero out all the fields of the command desc */
 	memset(&desc, 0, Q_DESC_SIZE);
 
@@ -404,6 +413,8 @@ static int ccp5_perform_des3(struct ccp_op *op)
 	union ccp_function function;
 	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
 
+	op->cmd_q->total_3des_ops++;
+
 	/* Zero out all the fields of the command desc */
 	memset(&desc, 0, sizeof(struct ccp5_desc));
 
@@ -444,6 +455,8 @@ static int ccp5_perform_rsa(struct ccp_op *op)
 	struct ccp5_desc desc;
 	union ccp_function function;
 
+	op->cmd_q->total_rsa_ops++;
+
 	/* Zero out all the fields of the command desc */
 	memset(&desc, 0, Q_DESC_SIZE);
 
@@ -487,6 +500,8 @@ static int ccp5_perform_passthru(struct ccp_op *op)
 	struct ccp_dma_info *daddr = &op->dst.u.dma;
 
 
+	op->cmd_q->total_pt_ops++;
+
 	memset(&desc, 0, Q_DESC_SIZE);
 
 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU;
@@ -543,6 +558,8 @@ static int ccp5_perform_ecc(struct ccp_op *op)
 	struct ccp5_desc desc;
 	union ccp_function function;
 
+	op->cmd_q->total_ecc_ops++;
+
 	/* Zero out all the fields of the command desc */
 	memset(&desc, 0, Q_DESC_SIZE);
 
@@ -592,7 +609,6 @@ static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status)
 	return queues ? 0 : -EINVAL;
 }
 
-
 static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp,
 					int lsb_cnt, int n_lsbs,
 					unsigned long *lsb_pub)
@@ -757,6 +773,7 @@ static irqreturn_t ccp5_irq_handler(int irq, void *data)
 	struct ccp_device *ccp = dev_get_drvdata(dev);
 
 	ccp5_disable_queue_interrupts(ccp);
+	ccp->total_interrupts++;
 	if (ccp->use_tasklet)
 		tasklet_schedule(&ccp->irq_tasklet);
 	else
@@ -956,6 +973,9 @@ static int ccp5_init(struct ccp_device *ccp)
 	if (ret)
 		goto e_hwrng;
 
+	/* Set up debugfs entries */
+	ccp5_debugfs_setup(ccp);
+
 	return 0;
 
 e_hwrng:
@@ -992,6 +1012,12 @@ static void ccp5_destroy(struct ccp_device *ccp)
 	/* Remove this device from the list of available units first */
 	ccp_del_device(ccp);
 
+	/* We're in the process of tearing down the entire driver;
+	 * when all the devices are gone clean up debugfs
+	 */
+	if (ccp_present())
+		ccp5_debugfs_destroy();
+
 	/* Disable and clear interrupts */
 	ccp5_disable_queue_interrupts(ccp);
 	for (i = 0; i < ccp->cmd_q_count; i++) {
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index 92d1c69..2506b50 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -31,8 +31,9 @@
 #include "ccp-dev.h"
 
 MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
+MODULE_AUTHOR("Gary R Hook <gary.hook@amd.com>");
 MODULE_LICENSE("GPL");
-MODULE_VERSION("1.0.0");
+MODULE_VERSION("1.1.0");
 MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver");
 
 struct ccp_tasklet_data {
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 0cb09d0..a70154a 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -70,6 +70,7 @@
 #define LSB_PUBLIC_MASK_HI_OFFSET	0x1C
 #define LSB_PRIVATE_MASK_LO_OFFSET	0x20
 #define LSB_PRIVATE_MASK_HI_OFFSET	0x24
+#define CMD5_PSP_CCP_VERSION		0x100
 
 #define CMD5_Q_CONTROL_BASE		0x0000
 #define CMD5_Q_TAIL_LO_BASE		0x0004
@@ -322,6 +323,16 @@ struct ccp_cmd_queue {
 	/* Interrupt wait queue */
 	wait_queue_head_t int_queue;
 	unsigned int int_rcvd;
+
+	/* Per-queue Statistics */
+	unsigned long total_ops;
+	unsigned long total_aes_ops;
+	unsigned long total_xts_aes_ops;
+	unsigned long total_3des_ops;
+	unsigned long total_sha_ops;
+	unsigned long total_rsa_ops;
+	unsigned long total_pt_ops;
+	unsigned long total_ecc_ops;
 } ____cacheline_aligned;
 
 struct ccp_device {
@@ -419,6 +430,12 @@ struct ccp_device {
 
 	/* DMA caching attribute support */
 	unsigned int axcache;
+
+	/* Device Statistics */
+	unsigned long total_interrupts;
+
+	/* DebugFS info */
+	struct dentry *debugfs_instance;
 };
 
 enum ccp_memtype {
@@ -632,6 +649,9 @@ void ccp_unregister_rng(struct ccp_device *ccp);
 int ccp_dmaengine_register(struct ccp_device *ccp);
 void ccp_dmaengine_unregister(struct ccp_device *ccp);
 
+void ccp5_debugfs_setup(struct ccp_device *ccp);
+void ccp5_debugfs_destroy(void);
+
 /* Structure for computation functions that are device-specific */
 struct ccp_actions {
 	int (*aes)(struct ccp_op *);
diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c
index 351f28d8..e26969e 100644
--- a/drivers/crypto/ccp/ccp-platform.c
+++ b/drivers/crypto/ccp/ccp-platform.c
@@ -44,7 +44,7 @@ static struct ccp_vdata *ccp_get_of_version(struct platform_device *pdev)
 	if (match && match->data)
 		return (struct ccp_vdata *)match->data;
 #endif
-	return 0;
+	return NULL;
 }
 
 static struct ccp_vdata *ccp_get_acpi_version(struct platform_device *pdev)
@@ -56,7 +56,7 @@ static struct ccp_vdata *ccp_get_acpi_version(struct platform_device *pdev)
 	if (match && match->driver_data)
 		return (struct ccp_vdata *)match->driver_data;
 #endif
-	return 0;
+	return NULL;
 }
 
 static int ccp_get_irq(struct ccp_device *ccp)
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index f00e0d8..aa4e5b8 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -55,6 +55,8 @@
 #include <crypto/hash.h>
 #include <crypto/sha.h>
 #include <crypto/authenc.h>
+#include <crypto/ctr.h>
+#include <crypto/gf128mul.h>
 #include <crypto/internal/aead.h>
 #include <crypto/null.h>
 #include <crypto/internal/skcipher.h>
@@ -126,13 +128,13 @@ static void chcr_verify_tag(struct aead_request *req, u8 *input, int *err)
 	fw6_pld = (struct cpl_fw6_pld *)input;
 	if ((get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) ||
 	    (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_GCM)) {
-		cmp = memcmp(&fw6_pld->data[2], (fw6_pld + 1), authsize);
+		cmp = crypto_memneq(&fw6_pld->data[2], (fw6_pld + 1), authsize);
 	} else {
 
 		sg_pcopy_to_buffer(req->src, sg_nents(req->src), temp,
 				authsize, req->assoclen +
 				req->cryptlen - authsize);
-		cmp = memcmp(temp, (fw6_pld + 1), authsize);
+		cmp = crypto_memneq(temp, (fw6_pld + 1), authsize);
 	}
 	if (cmp)
 		*err = -EBADMSG;
@@ -151,12 +153,12 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
 	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct chcr_req_ctx ctx_req;
-	struct cpl_fw6_pld *fw6_pld;
 	unsigned int digestsize, updated_digestsize;
+	struct adapter *adap = padap(ctx->dev);
 
 	switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
 	case CRYPTO_ALG_TYPE_AEAD:
-		ctx_req.req.aead_req = (struct aead_request *)req;
+		ctx_req.req.aead_req = aead_request_cast(req);
 		ctx_req.ctx.reqctx = aead_request_ctx(ctx_req.req.aead_req);
 		dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.ctx.reqctx->dst,
 			     ctx_req.ctx.reqctx->dst_nents, DMA_FROM_DEVICE);
@@ -164,32 +166,23 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
 			kfree_skb(ctx_req.ctx.reqctx->skb);
 			ctx_req.ctx.reqctx->skb = NULL;
 		}
+		free_new_sg(ctx_req.ctx.reqctx->newdstsg);
+		ctx_req.ctx.reqctx->newdstsg = NULL;
 		if (ctx_req.ctx.reqctx->verify == VERIFY_SW) {
 			chcr_verify_tag(ctx_req.req.aead_req, input,
 					&err);
 			ctx_req.ctx.reqctx->verify = VERIFY_HW;
 		}
+		ctx_req.req.aead_req->base.complete(req, err);
 		break;
 
 	case CRYPTO_ALG_TYPE_ABLKCIPHER:
-		ctx_req.req.ablk_req = (struct ablkcipher_request *)req;
-		ctx_req.ctx.ablk_ctx =
-			ablkcipher_request_ctx(ctx_req.req.ablk_req);
-		if (!err) {
-			fw6_pld = (struct cpl_fw6_pld *)input;
-			memcpy(ctx_req.req.ablk_req->info, &fw6_pld->data[2],
-			       AES_BLOCK_SIZE);
-		}
-		dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.req.ablk_req->dst,
-			     ctx_req.ctx.ablk_ctx->dst_nents, DMA_FROM_DEVICE);
-		if (ctx_req.ctx.ablk_ctx->skb) {
-			kfree_skb(ctx_req.ctx.ablk_ctx->skb);
-			ctx_req.ctx.ablk_ctx->skb = NULL;
-		}
+		 err = chcr_handle_cipher_resp(ablkcipher_request_cast(req),
+					       input, err);
 		break;
 
 	case CRYPTO_ALG_TYPE_AHASH:
-		ctx_req.req.ahash_req = (struct ahash_request *)req;
+		ctx_req.req.ahash_req = ahash_request_cast(req);
 		ctx_req.ctx.ahash_ctx =
 			ahash_request_ctx(ctx_req.req.ahash_req);
 		digestsize =
@@ -214,8 +207,10 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
 			       sizeof(struct cpl_fw6_pld),
 			       updated_digestsize);
 		}
+		ctx_req.req.ahash_req->base.complete(req, err);
 		break;
 	}
+	atomic_inc(&adap->chcr_stats.complete);
 	return err;
 }
 
@@ -392,7 +387,7 @@ static void write_phys_cpl(struct cpl_rx_phys_dsgl *phys_cpl,
 			   struct phys_sge_parm *sg_param)
 {
 	struct phys_sge_pairs *to;
-	int out_buf_size = sg_param->obsize;
+	unsigned int len = 0, left_size = sg_param->obsize;
 	unsigned int nents = sg_param->nents, i, j = 0;
 
 	phys_cpl->op_to_tid = htonl(CPL_RX_PHYS_DSGL_OPCODE_V(CPL_RX_PHYS_DSGL)
@@ -409,20 +404,15 @@ static void write_phys_cpl(struct cpl_rx_phys_dsgl *phys_cpl,
 	phys_cpl->rss_hdr_int.hash_val = 0;
 	to = (struct phys_sge_pairs *)((unsigned char *)phys_cpl +
 				       sizeof(struct cpl_rx_phys_dsgl));
-
-	for (i = 0; nents; to++) {
-		for (j = 0; j < 8 && nents; j++, nents--) {
-			out_buf_size -= sg_dma_len(sg);
-			to->len[j] = htons(sg_dma_len(sg));
+	for (i = 0; nents && left_size; to++) {
+		for (j = 0; j < 8 && nents && left_size; j++, nents--) {
+			len = min(left_size, sg_dma_len(sg));
+			to->len[j] = htons(len);
 			to->addr[j] = cpu_to_be64(sg_dma_address(sg));
+			left_size -= len;
 			sg = sg_next(sg);
 		}
 	}
-	if (out_buf_size) {
-		j--;
-		to--;
-		to->len[j] = htons(ntohs(to->len[j]) + (out_buf_size));
-	}
 }
 
 static inline int map_writesg_phys_cpl(struct device *dev,
@@ -431,7 +421,7 @@ static inline int map_writesg_phys_cpl(struct device *dev,
 					struct phys_sge_parm *sg_param)
 {
 	if (!sg || !sg_param->nents)
-		return 0;
+		return -EINVAL;
 
 	sg_param->nents = dma_map_sg(dev, sg, sg_param->nents, DMA_FROM_DEVICE);
 	if (sg_param->nents == 0) {
@@ -498,6 +488,24 @@ write_sg_to_skb(struct sk_buff *skb, unsigned int *frags,
 	}
 }
 
+static int cxgb4_is_crypto_q_full(struct net_device *dev, unsigned int idx)
+{
+	struct adapter *adap = netdev2adap(dev);
+	struct sge_uld_txq_info *txq_info =
+		adap->sge.uld_txq_info[CXGB4_TX_CRYPTO];
+	struct sge_uld_txq *txq;
+	int ret = 0;
+
+	local_bh_disable();
+	txq = &txq_info->uldtxq[idx];
+	spin_lock(&txq->sendq.lock);
+	if (txq->full)
+		ret = -1;
+	spin_unlock(&txq->sendq.lock);
+	local_bh_enable();
+	return ret;
+}
+
 static int generate_copy_rrkey(struct ablk_ctx *ablkctx,
 			       struct _key_ctx *key_ctx)
 {
@@ -512,13 +520,67 @@ static int generate_copy_rrkey(struct ablk_ctx *ablkctx,
 	}
 	return 0;
 }
+static int chcr_sg_ent_in_wr(struct scatterlist *src,
+			     struct scatterlist *dst,
+			     unsigned int minsg,
+			     unsigned int space,
+			     short int *sent,
+			     short int *dent)
+{
+	int srclen = 0, dstlen = 0;
+	int srcsg = minsg, dstsg = 0;
 
+	*sent = 0;
+	*dent = 0;
+	while (src && dst && ((srcsg + 1) <= MAX_SKB_FRAGS) &&
+	       space > (sgl_ent_len[srcsg + 1] + dsgl_ent_len[dstsg])) {
+		srclen += src->length;
+		srcsg++;
+		while (dst && ((dstsg + 1) <= MAX_DSGL_ENT) &&
+		       space > (sgl_ent_len[srcsg] + dsgl_ent_len[dstsg + 1])) {
+			if (srclen <= dstlen)
+				break;
+			dstlen += dst->length;
+			dst = sg_next(dst);
+			dstsg++;
+		}
+		src = sg_next(src);
+	}
+	*sent = srcsg - minsg;
+	*dent = dstsg;
+	return min(srclen, dstlen);
+}
+
+static int chcr_cipher_fallback(struct crypto_skcipher *cipher,
+				u32 flags,
+				struct scatterlist *src,
+				struct scatterlist *dst,
+				unsigned int nbytes,
+				u8 *iv,
+				unsigned short op_type)
+{
+	int err;
+
+	SKCIPHER_REQUEST_ON_STACK(subreq, cipher);
+	skcipher_request_set_tfm(subreq, cipher);
+	skcipher_request_set_callback(subreq, flags, NULL, NULL);
+	skcipher_request_set_crypt(subreq, src, dst,
+				   nbytes, iv);
+
+	err = op_type ? crypto_skcipher_decrypt(subreq) :
+		crypto_skcipher_encrypt(subreq);
+	skcipher_request_zero(subreq);
+
+	return err;
+
+}
 static inline void create_wreq(struct chcr_context *ctx,
 			       struct chcr_wr *chcr_req,
 			       void *req, struct sk_buff *skb,
 			       int kctx_len, int hash_sz,
 			       int is_iv,
-			       unsigned int sc_len)
+			       unsigned int sc_len,
+			       unsigned int lcb)
 {
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	int iv_loc = IV_DSGL;
@@ -543,7 +605,8 @@ static inline void create_wreq(struct chcr_context *ctx,
 	chcr_req->wreq.cookie = cpu_to_be64((uintptr_t)req);
 	chcr_req->wreq.rx_chid_to_rx_q_id =
 		FILL_WR_RX_Q_ID(ctx->dev->rx_channel_id, qid,
-				is_iv ? iv_loc : IV_NOP, ctx->tx_qidx);
+				is_iv ? iv_loc : IV_NOP, !!lcb,
+				ctx->tx_qidx);
 
 	chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id,
 						       qid);
@@ -563,69 +626,62 @@ static inline void create_wreq(struct chcr_context *ctx,
  *	@qid: ingress qid where response of this WR should be received.
  *	@op_type:	encryption or decryption
  */
-static struct sk_buff
-*create_cipher_wr(struct ablkcipher_request *req,
-		  unsigned short qid,
-		  unsigned short op_type)
+static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(wrparam->req);
 	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 	struct sk_buff *skb = NULL;
 	struct chcr_wr *chcr_req;
 	struct cpl_rx_phys_dsgl *phys_cpl;
-	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	struct chcr_blkcipher_req_ctx *reqctx =
+		ablkcipher_request_ctx(wrparam->req);
 	struct phys_sge_parm sg_param;
 	unsigned int frags = 0, transhdr_len, phys_dsgl;
-	unsigned int ivsize = crypto_ablkcipher_ivsize(tfm), kctx_len;
-	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
-			GFP_ATOMIC;
-
-	if (!req->info)
-		return ERR_PTR(-EINVAL);
-	reqctx->dst_nents = sg_nents_for_len(req->dst, req->nbytes);
-	if (reqctx->dst_nents <= 0) {
-		pr_err("AES:Invalid Destination sg lists\n");
-		return ERR_PTR(-EINVAL);
-	}
-	if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) ||
-	    (req->nbytes <= 0) || (req->nbytes % AES_BLOCK_SIZE)) {
-		pr_err("AES: Invalid value of Key Len %d nbytes %d IV Len %d\n",
-		       ablkctx->enckey_len, req->nbytes, ivsize);
-		return ERR_PTR(-EINVAL);
-	}
+	int error;
+	unsigned int ivsize = AES_BLOCK_SIZE, kctx_len;
+	gfp_t flags = wrparam->req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+			GFP_KERNEL : GFP_ATOMIC;
+	struct adapter *adap = padap(ctx->dev);
 
 	phys_dsgl = get_space_for_phys_dsgl(reqctx->dst_nents);
 
 	kctx_len = (DIV_ROUND_UP(ablkctx->enckey_len, 16) * 16);
 	transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, phys_dsgl);
 	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags);
-	if (!skb)
-		return ERR_PTR(-ENOMEM);
+	if (!skb) {
+		error = -ENOMEM;
+		goto err;
+	}
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
 	chcr_req = (struct chcr_wr *)__skb_put(skb, transhdr_len);
 	memset(chcr_req, 0, transhdr_len);
 	chcr_req->sec_cpl.op_ivinsrtofst =
 		FILL_SEC_CPL_OP_IVINSR(ctx->dev->rx_channel_id, 2, 1);
 
-	chcr_req->sec_cpl.pldlen = htonl(ivsize + req->nbytes);
+	chcr_req->sec_cpl.pldlen = htonl(ivsize + wrparam->bytes);
 	chcr_req->sec_cpl.aadstart_cipherstop_hi =
 			FILL_SEC_CPL_CIPHERSTOP_HI(0, 0, ivsize + 1, 0);
 
 	chcr_req->sec_cpl.cipherstop_lo_authinsert =
 			FILL_SEC_CPL_AUTHINSERT(0, 0, 0, 0);
-	chcr_req->sec_cpl.seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type, 0,
+	chcr_req->sec_cpl.seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(reqctx->op, 0,
 							 ablkctx->ciph_mode,
 							 0, 0, ivsize >> 1);
 	chcr_req->sec_cpl.ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 0,
 							  0, 1, phys_dsgl);
 
 	chcr_req->key_ctx.ctx_hdr = ablkctx->key_ctx_hdr;
-	if (op_type == CHCR_DECRYPT_OP) {
+	if ((reqctx->op == CHCR_DECRYPT_OP) &&
+	    (!(get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm)) ==
+	       CRYPTO_ALG_SUB_TYPE_CTR)) &&
+	    (!(get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm)) ==
+	       CRYPTO_ALG_SUB_TYPE_CTR_RFC3686))) {
 		generate_copy_rrkey(ablkctx, &chcr_req->key_ctx);
 	} else {
-		if (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) {
+		if ((ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) ||
+		    (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CTR)) {
 			memcpy(chcr_req->key_ctx.key, ablkctx->key,
 			       ablkctx->enckey_len);
 		} else {
@@ -640,45 +696,80 @@ static struct sk_buff
 	}
 	phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
 	sg_param.nents = reqctx->dst_nents;
-	sg_param.obsize = req->nbytes;
-	sg_param.qid = qid;
-	sg_param.align = 1;
-	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, req->dst,
-				 &sg_param))
+	sg_param.obsize =  wrparam->bytes;
+	sg_param.qid = wrparam->qid;
+	error = map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl,
+				       reqctx->dst, &sg_param);
+	if (error)
 		goto map_fail1;
 
 	skb_set_transport_header(skb, transhdr_len);
-	memcpy(reqctx->iv, req->info, ivsize);
 	write_buffer_to_skb(skb, &frags, reqctx->iv, ivsize);
-	write_sg_to_skb(skb, &frags, req->src, req->nbytes);
-	create_wreq(ctx, chcr_req, req, skb, kctx_len, 0, 1,
-			sizeof(struct cpl_rx_phys_dsgl) + phys_dsgl);
+	write_sg_to_skb(skb, &frags, wrparam->srcsg, wrparam->bytes);
+	atomic_inc(&adap->chcr_stats.cipher_rqst);
+	create_wreq(ctx, chcr_req, &(wrparam->req->base), skb, kctx_len, 0, 1,
+			sizeof(struct cpl_rx_phys_dsgl) + phys_dsgl,
+			ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC);
 	reqctx->skb = skb;
 	skb_get(skb);
 	return skb;
 map_fail1:
 	kfree_skb(skb);
-	return ERR_PTR(-ENOMEM);
+err:
+	return ERR_PTR(error);
 }
 
-static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static inline int chcr_keyctx_ck_size(unsigned int keylen)
+{
+	int ck_size = 0;
+
+	if (keylen == AES_KEYSIZE_128)
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+	else if (keylen == AES_KEYSIZE_192)
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
+	else if (keylen == AES_KEYSIZE_256)
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+	else
+		ck_size = 0;
+
+	return ck_size;
+}
+static int chcr_cipher_fallback_setkey(struct crypto_ablkcipher *cipher,
+				       const u8 *key,
+				       unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(cipher);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+	int err = 0;
+
+	crypto_skcipher_clear_flags(ablkctx->sw_cipher, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(ablkctx->sw_cipher, cipher->base.crt_flags &
+				  CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(ablkctx->sw_cipher, key, keylen);
+	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+	tfm->crt_flags |=
+		crypto_skcipher_get_flags(ablkctx->sw_cipher) &
+		CRYPTO_TFM_RES_MASK;
+	return err;
+}
+
+static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *cipher,
+			       const u8 *key,
 			       unsigned int keylen)
 {
-	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(cipher);
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 	unsigned int ck_size, context_size;
 	u16 alignment = 0;
+	int err;
 
-	if (keylen == AES_KEYSIZE_128) {
-		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
-	} else if (keylen == AES_KEYSIZE_192) {
-		alignment = 8;
-		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
-	} else if (keylen == AES_KEYSIZE_256) {
-		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
-	} else {
+	err = chcr_cipher_fallback_setkey(cipher, key, keylen);
+	if (err)
 		goto badkey_err;
-	}
+
+	ck_size = chcr_keyctx_ck_size(keylen);
+	alignment = ck_size == CHCR_KEYCTX_CIPHER_KEY_SIZE_192 ? 8 : 0;
 	memcpy(ablkctx->key, key, keylen);
 	ablkctx->enckey_len = keylen;
 	get_aes_decrypt_key(ablkctx->rrkey, ablkctx->key, keylen << 3);
@@ -690,35 +781,398 @@ static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_CBC;
 	return 0;
 badkey_err:
-	crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	ablkctx->enckey_len = 0;
-	return -EINVAL;
+
+	return err;
 }
 
-static int cxgb4_is_crypto_q_full(struct net_device *dev, unsigned int idx)
+static int chcr_aes_ctr_setkey(struct crypto_ablkcipher *cipher,
+				   const u8 *key,
+				   unsigned int keylen)
 {
-	struct adapter *adap = netdev2adap(dev);
-	struct sge_uld_txq_info *txq_info =
-		adap->sge.uld_txq_info[CXGB4_TX_CRYPTO];
-	struct sge_uld_txq *txq;
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(cipher);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+	unsigned int ck_size, context_size;
+	u16 alignment = 0;
+	int err;
+
+	err = chcr_cipher_fallback_setkey(cipher, key, keylen);
+	if (err)
+		goto badkey_err;
+	ck_size = chcr_keyctx_ck_size(keylen);
+	alignment = (ck_size == CHCR_KEYCTX_CIPHER_KEY_SIZE_192) ? 8 : 0;
+	memcpy(ablkctx->key, key, keylen);
+	ablkctx->enckey_len = keylen;
+	context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD +
+			keylen + alignment) >> 4;
+
+	ablkctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, CHCR_KEYCTX_NO_KEY,
+						0, 0, context_size);
+	ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_CTR;
+
+	return 0;
+badkey_err:
+	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	ablkctx->enckey_len = 0;
+
+	return err;
+}
+
+static int chcr_aes_rfc3686_setkey(struct crypto_ablkcipher *cipher,
+				   const u8 *key,
+				   unsigned int keylen)
+{
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(cipher);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+	unsigned int ck_size, context_size;
+	u16 alignment = 0;
+	int err;
+
+	if (keylen < CTR_RFC3686_NONCE_SIZE)
+		return -EINVAL;
+	memcpy(ablkctx->nonce, key + (keylen - CTR_RFC3686_NONCE_SIZE),
+	       CTR_RFC3686_NONCE_SIZE);
+
+	keylen -= CTR_RFC3686_NONCE_SIZE;
+	err = chcr_cipher_fallback_setkey(cipher, key, keylen);
+	if (err)
+		goto badkey_err;
+
+	ck_size = chcr_keyctx_ck_size(keylen);
+	alignment = (ck_size == CHCR_KEYCTX_CIPHER_KEY_SIZE_192) ? 8 : 0;
+	memcpy(ablkctx->key, key, keylen);
+	ablkctx->enckey_len = keylen;
+	context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD +
+			keylen + alignment) >> 4;
+
+	ablkctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, CHCR_KEYCTX_NO_KEY,
+						0, 0, context_size);
+	ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_CTR;
+
+	return 0;
+badkey_err:
+	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	ablkctx->enckey_len = 0;
+
+	return err;
+}
+static void ctr_add_iv(u8 *dstiv, u8 *srciv, u32 add)
+{
+	unsigned int size = AES_BLOCK_SIZE;
+	__be32 *b = (__be32 *)(dstiv + size);
+	u32 c, prev;
+
+	memcpy(dstiv, srciv, AES_BLOCK_SIZE);
+	for (; size >= 4; size -= 4) {
+		prev = be32_to_cpu(*--b);
+		c = prev + add;
+		*b = cpu_to_be32(c);
+		if (prev < c)
+			break;
+		add = 1;
+	}
+
+}
+
+static unsigned int adjust_ctr_overflow(u8 *iv, u32 bytes)
+{
+	__be32 *b = (__be32 *)(iv + AES_BLOCK_SIZE);
+	u64 c;
+	u32 temp = be32_to_cpu(*--b);
+
+	temp = ~temp;
+	c = (u64)temp +  1; // No of block can processed withou overflow
+	if ((bytes / AES_BLOCK_SIZE) > c)
+		bytes = c * AES_BLOCK_SIZE;
+	return bytes;
+}
+
+static int chcr_update_tweak(struct ablkcipher_request *req, u8 *iv)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	struct crypto_cipher *cipher;
+	int ret, i;
+	u8 *key;
+	unsigned int keylen;
+
+	cipher = crypto_alloc_cipher("aes-generic", 0, 0);
+	memcpy(iv, req->info, AES_BLOCK_SIZE);
+
+	if (IS_ERR(cipher)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	keylen = ablkctx->enckey_len / 2;
+	key = ablkctx->key + keylen;
+	ret = crypto_cipher_setkey(cipher, key, keylen);
+	if (ret)
+		goto out1;
+
+	crypto_cipher_encrypt_one(cipher, iv, iv);
+	for (i = 0; i < (reqctx->processed / AES_BLOCK_SIZE); i++)
+		gf128mul_x_ble((le128 *)iv, (le128 *)iv);
+
+	crypto_cipher_decrypt_one(cipher, iv, iv);
+out1:
+	crypto_free_cipher(cipher);
+out:
+	return ret;
+}
+
+static int chcr_update_cipher_iv(struct ablkcipher_request *req,
+				   struct cpl_fw6_pld *fw6_pld, u8 *iv)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	int subtype = get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm));
 	int ret = 0;
 
-	local_bh_disable();
-	txq = &txq_info->uldtxq[idx];
-	spin_lock(&txq->sendq.lock);
-	if (txq->full)
-		ret = -1;
-	spin_unlock(&txq->sendq.lock);
-	local_bh_enable();
+	if (subtype == CRYPTO_ALG_SUB_TYPE_CTR)
+		ctr_add_iv(iv, req->info, (reqctx->processed /
+			   AES_BLOCK_SIZE));
+	else if (subtype == CRYPTO_ALG_SUB_TYPE_CTR_RFC3686)
+		*(__be32 *)(reqctx->iv + CTR_RFC3686_NONCE_SIZE +
+			CTR_RFC3686_IV_SIZE) = cpu_to_be32((reqctx->processed /
+						AES_BLOCK_SIZE) + 1);
+	else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS)
+		ret = chcr_update_tweak(req, iv);
+	else if (subtype == CRYPTO_ALG_SUB_TYPE_CBC) {
+		if (reqctx->op)
+			sg_pcopy_to_buffer(req->src, sg_nents(req->src), iv,
+					   16,
+					   reqctx->processed - AES_BLOCK_SIZE);
+		else
+			memcpy(iv, &fw6_pld->data[2], AES_BLOCK_SIZE);
+	}
+
 	return ret;
+
+}
+
+/* We need separate function for final iv because in rfc3686  Initial counter
+ * starts from 1 and buffer size of iv is 8 byte only which remains constant
+ * for subsequent update requests
+ */
+
+static int chcr_final_cipher_iv(struct ablkcipher_request *req,
+				   struct cpl_fw6_pld *fw6_pld, u8 *iv)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	int subtype = get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm));
+	int ret = 0;
+
+	if (subtype == CRYPTO_ALG_SUB_TYPE_CTR)
+		ctr_add_iv(iv, req->info, (reqctx->processed /
+			   AES_BLOCK_SIZE));
+	else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS)
+		ret = chcr_update_tweak(req, iv);
+	else if (subtype == CRYPTO_ALG_SUB_TYPE_CBC) {
+		if (reqctx->op)
+			sg_pcopy_to_buffer(req->src, sg_nents(req->src), iv,
+					   16,
+					   reqctx->processed - AES_BLOCK_SIZE);
+		else
+			memcpy(iv, &fw6_pld->data[2], AES_BLOCK_SIZE);
+
+	}
+	return ret;
+
+}
+
+
+static int chcr_handle_cipher_resp(struct ablkcipher_request *req,
+				   unsigned char *input, int err)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+	struct uld_ctx *u_ctx = ULD_CTX(ctx);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+	struct sk_buff *skb;
+	struct cpl_fw6_pld *fw6_pld = (struct cpl_fw6_pld *)input;
+	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	struct  cipher_wr_param wrparam;
+	int bytes;
+
+	dma_unmap_sg(&u_ctx->lldi.pdev->dev, reqctx->dst, reqctx->dst_nents,
+		     DMA_FROM_DEVICE);
+
+	if (reqctx->skb) {
+		kfree_skb(reqctx->skb);
+		reqctx->skb = NULL;
+	}
+	if (err)
+		goto complete;
+
+	if (req->nbytes == reqctx->processed) {
+		err = chcr_final_cipher_iv(req, fw6_pld, req->info);
+		goto complete;
+	}
+
+	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
+					    ctx->tx_qidx))) {
+		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+			err = -EBUSY;
+			goto complete;
+		}
+
+	}
+	wrparam.srcsg = scatterwalk_ffwd(reqctx->srcffwd, req->src,
+				       reqctx->processed);
+	reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd, reqctx->dstsg,
+					 reqctx->processed);
+	if (!wrparam.srcsg || !reqctx->dst) {
+		pr_err("Input sg list length less that nbytes\n");
+		err = -EINVAL;
+		goto complete;
+	}
+	bytes = chcr_sg_ent_in_wr(wrparam.srcsg, reqctx->dst, 1,
+				 SPACE_LEFT(ablkctx->enckey_len),
+				 &wrparam.snent, &reqctx->dst_nents);
+	if ((bytes + reqctx->processed) >= req->nbytes)
+		bytes  = req->nbytes - reqctx->processed;
+	else
+		bytes = ROUND_16(bytes);
+	err = chcr_update_cipher_iv(req, fw6_pld, reqctx->iv);
+	if (err)
+		goto complete;
+
+	if (unlikely(bytes == 0)) {
+		err = chcr_cipher_fallback(ablkctx->sw_cipher,
+				     req->base.flags,
+				     wrparam.srcsg,
+				     reqctx->dst,
+				     req->nbytes - reqctx->processed,
+				     reqctx->iv,
+				     reqctx->op);
+		goto complete;
+	}
+
+	if (get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm)) ==
+	    CRYPTO_ALG_SUB_TYPE_CTR)
+		bytes = adjust_ctr_overflow(reqctx->iv, bytes);
+	reqctx->processed += bytes;
+	wrparam.qid = u_ctx->lldi.rxq_ids[ctx->rx_qidx];
+	wrparam.req = req;
+	wrparam.bytes = bytes;
+	skb = create_cipher_wr(&wrparam);
+	if (IS_ERR(skb)) {
+		pr_err("chcr : %s : Failed to form WR. No memory\n", __func__);
+		err = PTR_ERR(skb);
+		goto complete;
+	}
+	skb->dev = u_ctx->lldi.ports[0];
+	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_qidx);
+	chcr_send_wr(skb);
+	return 0;
+complete:
+	free_new_sg(reqctx->newdstsg);
+	reqctx->newdstsg = NULL;
+	req->base.complete(&req->base, err);
+	return err;
+}
+
+static int process_cipher(struct ablkcipher_request *req,
+				  unsigned short qid,
+				  struct sk_buff **skb,
+				  unsigned short op_type)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	unsigned int ivsize = crypto_ablkcipher_ivsize(tfm);
+	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+	struct	cipher_wr_param wrparam;
+	int bytes, nents, err = -EINVAL;
+
+	reqctx->newdstsg = NULL;
+	reqctx->processed = 0;
+	if (!req->info)
+		goto error;
+	if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) ||
+	    (req->nbytes == 0) ||
+	    (req->nbytes % crypto_ablkcipher_blocksize(tfm))) {
+		pr_err("AES: Invalid value of Key Len %d nbytes %d IV Len %d\n",
+		       ablkctx->enckey_len, req->nbytes, ivsize);
+		goto error;
+	}
+	wrparam.srcsg = req->src;
+	if (is_newsg(req->dst, &nents)) {
+		reqctx->newdstsg = alloc_new_sg(req->dst, nents);
+		if (IS_ERR(reqctx->newdstsg))
+			return PTR_ERR(reqctx->newdstsg);
+		reqctx->dstsg = reqctx->newdstsg;
+	} else {
+		reqctx->dstsg = req->dst;
+	}
+	bytes = chcr_sg_ent_in_wr(wrparam.srcsg, reqctx->dstsg, MIN_CIPHER_SG,
+				 SPACE_LEFT(ablkctx->enckey_len),
+				 &wrparam.snent,
+				 &reqctx->dst_nents);
+	if ((bytes + reqctx->processed) >= req->nbytes)
+		bytes  = req->nbytes - reqctx->processed;
+	else
+		bytes = ROUND_16(bytes);
+	if (unlikely(bytes > req->nbytes))
+		bytes = req->nbytes;
+	if (get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm)) ==
+				  CRYPTO_ALG_SUB_TYPE_CTR) {
+		bytes = adjust_ctr_overflow(req->info, bytes);
+	}
+	if (get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm)) ==
+	    CRYPTO_ALG_SUB_TYPE_CTR_RFC3686) {
+		memcpy(reqctx->iv, ablkctx->nonce, CTR_RFC3686_NONCE_SIZE);
+		memcpy(reqctx->iv + CTR_RFC3686_NONCE_SIZE, req->info,
+				CTR_RFC3686_IV_SIZE);
+
+		/* initialize counter portion of counter block */
+		*(__be32 *)(reqctx->iv + CTR_RFC3686_NONCE_SIZE +
+			CTR_RFC3686_IV_SIZE) = cpu_to_be32(1);
+
+	} else {
+
+		memcpy(reqctx->iv, req->info, ivsize);
+	}
+	if (unlikely(bytes == 0)) {
+		err = chcr_cipher_fallback(ablkctx->sw_cipher,
+					   req->base.flags,
+					   req->src,
+					   req->dst,
+					   req->nbytes,
+					   req->info,
+					   op_type);
+		goto error;
+	}
+	reqctx->processed = bytes;
+	reqctx->dst = reqctx->dstsg;
+	reqctx->op = op_type;
+	wrparam.qid = qid;
+	wrparam.req = req;
+	wrparam.bytes = bytes;
+	*skb = create_cipher_wr(&wrparam);
+	if (IS_ERR(*skb)) {
+		err = PTR_ERR(*skb);
+		goto error;
+	}
+
+	return 0;
+error:
+	free_new_sg(reqctx->newdstsg);
+	reqctx->newdstsg = NULL;
+	return err;
 }
 
 static int chcr_aes_encrypt(struct ablkcipher_request *req)
 {
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+	struct sk_buff *skb = NULL;
+	int err;
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
-	struct sk_buff *skb;
 
 	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
 					    ctx->tx_qidx))) {
@@ -726,12 +1180,10 @@ static int chcr_aes_encrypt(struct ablkcipher_request *req)
 			return -EBUSY;
 	}
 
-	skb = create_cipher_wr(req, u_ctx->lldi.rxq_ids[ctx->rx_qidx],
+	err = process_cipher(req, u_ctx->lldi.rxq_ids[ctx->rx_qidx], &skb,
 			       CHCR_ENCRYPT_OP);
-	if (IS_ERR(skb)) {
-		pr_err("chcr : %s : Failed to form WR. No memory\n", __func__);
-		return  PTR_ERR(skb);
-	}
+	if (err || !skb)
+		return  err;
 	skb->dev = u_ctx->lldi.ports[0];
 	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_qidx);
 	chcr_send_wr(skb);
@@ -743,7 +1195,8 @@ static int chcr_aes_decrypt(struct ablkcipher_request *req)
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
-	struct sk_buff *skb;
+	struct sk_buff *skb = NULL;
+	int err;
 
 	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
 					    ctx->tx_qidx))) {
@@ -751,12 +1204,10 @@ static int chcr_aes_decrypt(struct ablkcipher_request *req)
 			return -EBUSY;
 	}
 
-	skb = create_cipher_wr(req, u_ctx->lldi.rxq_ids[ctx->rx_qidx],
+	 err = process_cipher(req, u_ctx->lldi.rxq_ids[ctx->rx_qidx], &skb,
 			       CHCR_DECRYPT_OP);
-	if (IS_ERR(skb)) {
-		pr_err("chcr : %s : Failed to form WR. No memory\n", __func__);
-		return PTR_ERR(skb);
-	}
+	if (err || !skb)
+		return err;
 	skb->dev = u_ctx->lldi.ports[0];
 	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_qidx);
 	chcr_send_wr(skb);
@@ -765,7 +1216,7 @@ static int chcr_aes_decrypt(struct ablkcipher_request *req)
 
 static int chcr_device_init(struct chcr_context *ctx)
 {
-	struct uld_ctx *u_ctx;
+	struct uld_ctx *u_ctx = NULL;
 	struct adapter *adap;
 	unsigned int id;
 	int txq_perchan, txq_idx, ntxq;
@@ -773,12 +1224,12 @@ static int chcr_device_init(struct chcr_context *ctx)
 
 	id = smp_processor_id();
 	if (!ctx->dev) {
-		err = assign_chcr_device(&ctx->dev);
-		if (err) {
+		u_ctx = assign_chcr_device();
+		if (!u_ctx) {
 			pr_err("chcr device assignment fails\n");
 			goto out;
 		}
-		u_ctx = ULD_CTX(ctx);
+		ctx->dev = u_ctx->dev;
 		adap = padap(ctx->dev);
 		ntxq = min_not_zero((unsigned int)u_ctx->lldi.nrxq,
 				    adap->vres.ncrypto_fc);
@@ -801,10 +1252,48 @@ static int chcr_device_init(struct chcr_context *ctx)
 
 static int chcr_cra_init(struct crypto_tfm *tfm)
 {
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+
+	ablkctx->sw_cipher = crypto_alloc_skcipher(alg->cra_name, 0,
+				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(ablkctx->sw_cipher)) {
+		pr_err("failed to allocate fallback for %s\n", alg->cra_name);
+		return PTR_ERR(ablkctx->sw_cipher);
+	}
 	tfm->crt_ablkcipher.reqsize =  sizeof(struct chcr_blkcipher_req_ctx);
 	return chcr_device_init(crypto_tfm_ctx(tfm));
 }
 
+static int chcr_rfc3686_init(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+
+	/*RFC3686 initialises IV counter value to 1, rfc3686(ctr(aes))
+	 * cannot be used as fallback in chcr_handle_cipher_response
+	 */
+	ablkctx->sw_cipher = crypto_alloc_skcipher("ctr(aes)", 0,
+				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(ablkctx->sw_cipher)) {
+		pr_err("failed to allocate fallback for %s\n", alg->cra_name);
+		return PTR_ERR(ablkctx->sw_cipher);
+	}
+	tfm->crt_ablkcipher.reqsize =  sizeof(struct chcr_blkcipher_req_ctx);
+	return chcr_device_init(crypto_tfm_ctx(tfm));
+}
+
+
+static void chcr_cra_exit(struct crypto_tfm *tfm)
+{
+	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+
+	crypto_free_skcipher(ablkctx->sw_cipher);
+}
+
 static int get_alg_config(struct algo_param *params,
 			  unsigned int auth_size)
 {
@@ -865,6 +1354,7 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req,
 	u8 hash_size_in_response = 0;
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 		GFP_ATOMIC;
+	struct adapter *adap = padap(ctx->dev);
 
 	iopad_alignment = KEYCTX_ALIGN_PAD(digestsize);
 	kctx_len = param->alg_prm.result_size + iopad_alignment;
@@ -921,9 +1411,9 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req,
 				    param->bfr_len);
 	if (param->sg_len != 0)
 		write_sg_to_skb(skb, &frags, req->src, param->sg_len);
-
-	create_wreq(ctx, chcr_req, req, skb, kctx_len, hash_size_in_response, 0,
-			DUMMY_BYTES);
+	atomic_inc(&adap->chcr_stats.digest_rqst);
+	create_wreq(ctx, chcr_req, &req->base, skb, kctx_len,
+		    hash_size_in_response, 0, DUMMY_BYTES, 0);
 	req_ctx->skb = skb;
 	skb_get(skb);
 	return skb;
@@ -1226,21 +1716,17 @@ static int chcr_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
 	return err;
 }
 
-static int chcr_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int chcr_aes_xts_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 			       unsigned int key_len)
 {
-	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(cipher);
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 	unsigned short context_size = 0;
+	int err;
 
-	if ((key_len != (AES_KEYSIZE_128 << 1)) &&
-	    (key_len != (AES_KEYSIZE_256 << 1))) {
-		crypto_tfm_set_flags((struct crypto_tfm *)tfm,
-				     CRYPTO_TFM_RES_BAD_KEY_LEN);
-		ablkctx->enckey_len = 0;
-		return -EINVAL;
-
-	}
+	err = chcr_cipher_fallback_setkey(cipher, key, key_len);
+	if (err)
+		goto badkey_err;
 
 	memcpy(ablkctx->key, key, key_len);
 	ablkctx->enckey_len = key_len;
@@ -1254,6 +1740,11 @@ static int chcr_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 				 0, context_size);
 	ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_XTS;
 	return 0;
+badkey_err:
+	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	ablkctx->enckey_len = 0;
+
+	return err;
 }
 
 static int chcr_sha_init(struct ahash_request *areq)
@@ -1330,6 +1821,63 @@ static void chcr_hmac_cra_exit(struct crypto_tfm *tfm)
 	}
 }
 
+static int is_newsg(struct scatterlist *sgl, unsigned int *newents)
+{
+	int nents = 0;
+	int ret = 0;
+
+	while (sgl) {
+		if (sgl->length > CHCR_SG_SIZE)
+			ret = 1;
+		nents += DIV_ROUND_UP(sgl->length, CHCR_SG_SIZE);
+		sgl = sg_next(sgl);
+	}
+	*newents = nents;
+	return ret;
+}
+
+static inline void free_new_sg(struct scatterlist *sgl)
+{
+	kfree(sgl);
+}
+
+static struct scatterlist *alloc_new_sg(struct scatterlist *sgl,
+				       unsigned int nents)
+{
+	struct scatterlist *newsg, *sg;
+	int i, len, processed = 0;
+	struct page *spage;
+	int offset;
+
+	newsg = kmalloc_array(nents, sizeof(struct scatterlist), GFP_KERNEL);
+	if (!newsg)
+		return ERR_PTR(-ENOMEM);
+	sg = newsg;
+	sg_init_table(sg, nents);
+	offset = sgl->offset;
+	spage = sg_page(sgl);
+	for (i = 0; i < nents; i++) {
+		len = min_t(u32, sgl->length - processed, CHCR_SG_SIZE);
+		sg_set_page(sg, spage, len, offset);
+		processed += len;
+		offset += len;
+		if (offset >= PAGE_SIZE) {
+			offset = offset % PAGE_SIZE;
+			spage++;
+		}
+		if (processed == sgl->length) {
+			processed = 0;
+			sgl = sg_next(sgl);
+			if (!sgl)
+				break;
+			spage = sg_page(sgl);
+			offset = sgl->offset;
+		}
+		sg = sg_next(sg);
+	}
+	return newsg;
+}
+
 static int chcr_copy_assoc(struct aead_request *req,
 				struct chcr_aead_ctx *ctx)
 {
@@ -1392,16 +1940,20 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	struct scatterlist *src;
 	unsigned int frags = 0, transhdr_len;
 	unsigned int ivsize = crypto_aead_ivsize(tfm), dst_size = 0;
-	unsigned int   kctx_len = 0;
+	unsigned int   kctx_len = 0, nents;
 	unsigned short stop_offset = 0;
 	unsigned int  assoclen = req->assoclen;
 	unsigned int  authsize = crypto_aead_authsize(tfm);
-	int err = -EINVAL, src_nent;
+	int error = -EINVAL, src_nent;
 	int null = 0;
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 		GFP_ATOMIC;
+	struct adapter *adap = padap(ctx->dev);
 
-	if (aeadctx->enckey_len == 0 || (req->cryptlen == 0))
+	reqctx->newdstsg = NULL;
+	dst_size = req->assoclen + req->cryptlen + (op_type ? -authsize :
+						   authsize);
+	if (aeadctx->enckey_len == 0 || (req->cryptlen <= 0))
 		goto err;
 
 	if (op_type && req->cryptlen < crypto_aead_authsize(tfm))
@@ -1410,14 +1962,24 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	if (src_nent < 0)
 		goto err;
 	src = scatterwalk_ffwd(reqctx->srcffwd, req->src, req->assoclen);
-	reqctx->dst = src;
 
 	if (req->src != req->dst) {
-		err = chcr_copy_assoc(req, aeadctx);
-		if (err)
-			return ERR_PTR(err);
-		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd, req->dst,
-					       req->assoclen);
+		error = chcr_copy_assoc(req, aeadctx);
+		if (error)
+			return ERR_PTR(error);
+	}
+	if (dst_size && is_newsg(req->dst, &nents)) {
+		reqctx->newdstsg = alloc_new_sg(req->dst, nents);
+		if (IS_ERR(reqctx->newdstsg))
+			return ERR_CAST(reqctx->newdstsg);
+		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd,
+					       reqctx->newdstsg, req->assoclen);
+	} else {
+		if (req->src == req->dst)
+			reqctx->dst = src;
+		else
+			reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd,
+						       req->dst, req->assoclen);
 	}
 	if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_NULL) {
 		null = 1;
@@ -1427,6 +1989,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 					     (op_type ? -authsize : authsize));
 	if (reqctx->dst_nents < 0) {
 		pr_err("AUTHENC:Invalid Destination sg entries\n");
+		error = -EINVAL;
 		goto err;
 	}
 	dst_size = get_space_for_phys_dsgl(reqctx->dst_nents);
@@ -1437,11 +2000,16 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 			T6_MAX_AAD_SIZE,
 			transhdr_len + (sgl_len(src_nent + MIN_AUTH_SG) * 8),
 				op_type)) {
+		atomic_inc(&adap->chcr_stats.fallback);
+		free_new_sg(reqctx->newdstsg);
+		reqctx->newdstsg = NULL;
 		return ERR_PTR(chcr_aead_fallback(req, op_type));
 	}
 	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags);
-	if (!skb)
+	if (!skb) {
+		error = -ENOMEM;
 		goto err;
+	}
 
 	/* LLD is going to write the sge hdr. */
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
@@ -1493,9 +2061,9 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	sg_param.nents = reqctx->dst_nents;
 	sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
 	sg_param.qid = qid;
-	sg_param.align = 0;
-	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, reqctx->dst,
-				  &sg_param))
+	error = map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl,
+					reqctx->dst, &sg_param);
+	if (error)
 		goto dstmap_fail;
 
 	skb_set_transport_header(skb, transhdr_len);
@@ -1507,8 +2075,9 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	}
 	write_buffer_to_skb(skb, &frags, req->iv, ivsize);
 	write_sg_to_skb(skb, &frags, src, req->cryptlen);
-	create_wreq(ctx, chcr_req, req, skb, kctx_len, size, 1,
-		   sizeof(struct cpl_rx_phys_dsgl) + dst_size);
+	atomic_inc(&adap->chcr_stats.cipher_rqst);
+	create_wreq(ctx, chcr_req, &req->base, skb, kctx_len, size, 1,
+		   sizeof(struct cpl_rx_phys_dsgl) + dst_size, 0);
 	reqctx->skb = skb;
 	skb_get(skb);
 
@@ -1517,7 +2086,9 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	/* ivmap_fail: */
 	kfree_skb(skb);
 err:
-	return ERR_PTR(-EINVAL);
+	free_new_sg(reqctx->newdstsg);
+	reqctx->newdstsg = NULL;
+	return ERR_PTR(error);
 }
 
 static int set_msg_len(u8 *block, unsigned int msglen, int csize)
@@ -1724,14 +2295,17 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 	struct phys_sge_parm sg_param;
 	struct scatterlist *src;
 	unsigned int frags = 0, transhdr_len, ivsize = AES_BLOCK_SIZE;
-	unsigned int dst_size = 0, kctx_len;
+	unsigned int dst_size = 0, kctx_len, nents;
 	unsigned int sub_type;
 	unsigned int authsize = crypto_aead_authsize(tfm);
-	int err = -EINVAL, src_nent;
+	int error = -EINVAL, src_nent;
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 		GFP_ATOMIC;
+	struct adapter *adap = padap(ctx->dev);
 
-
+	dst_size = req->assoclen + req->cryptlen + (op_type ? -authsize :
+						   authsize);
+	reqctx->newdstsg = NULL;
 	if (op_type && req->cryptlen < crypto_aead_authsize(tfm))
 		goto err;
 	src_nent = sg_nents_for_len(req->src, req->assoclen + req->cryptlen);
@@ -1740,26 +2314,35 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 
 	sub_type = get_aead_subtype(tfm);
 	src = scatterwalk_ffwd(reqctx->srcffwd, req->src, req->assoclen);
-	reqctx->dst = src;
-
 	if (req->src != req->dst) {
-		err = chcr_copy_assoc(req, aeadctx);
-		if (err) {
+		error = chcr_copy_assoc(req, aeadctx);
+		if (error) {
 			pr_err("AAD copy to destination buffer fails\n");
-			return ERR_PTR(err);
+			return ERR_PTR(error);
 		}
-		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd, req->dst,
-					       req->assoclen);
+	}
+	if (dst_size && is_newsg(req->dst, &nents)) {
+		reqctx->newdstsg = alloc_new_sg(req->dst, nents);
+		if (IS_ERR(reqctx->newdstsg))
+			return ERR_CAST(reqctx->newdstsg);
+		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd,
+					       reqctx->newdstsg, req->assoclen);
+	} else {
+		if (req->src == req->dst)
+			reqctx->dst = src;
+		else
+			reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd,
+						       req->dst, req->assoclen);
 	}
 	reqctx->dst_nents = sg_nents_for_len(reqctx->dst, req->cryptlen +
 					     (op_type ? -authsize : authsize));
 	if (reqctx->dst_nents < 0) {
 		pr_err("CCM:Invalid Destination sg entries\n");
+		error = -EINVAL;
 		goto err;
 	}
-
-
-	if (aead_ccm_validate_input(op_type, req, aeadctx, sub_type))
+	error = aead_ccm_validate_input(op_type, req, aeadctx, sub_type);
+	if (error)
 		goto err;
 
 	dst_size = get_space_for_phys_dsgl(reqctx->dst_nents);
@@ -1769,13 +2352,18 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 			    T6_MAX_AAD_SIZE - 18,
 			    transhdr_len + (sgl_len(src_nent + MIN_CCM_SG) * 8),
 			    op_type)) {
+		atomic_inc(&adap->chcr_stats.fallback);
+		free_new_sg(reqctx->newdstsg);
+		reqctx->newdstsg = NULL;
 		return ERR_PTR(chcr_aead_fallback(req, op_type));
 	}
 
 	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)),  flags);
 
-	if (!skb)
+	if (!skb) {
+		error = -ENOMEM;
 		goto err;
+	}
 
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
 
@@ -1790,29 +2378,32 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 					16), aeadctx->key, aeadctx->enckey_len);
 
 	phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
-	if (ccm_format_packet(req, aeadctx, sub_type, op_type))
+	error = ccm_format_packet(req, aeadctx, sub_type, op_type);
+	if (error)
 		goto dstmap_fail;
 
 	sg_param.nents = reqctx->dst_nents;
 	sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
 	sg_param.qid = qid;
-	sg_param.align = 0;
-	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, reqctx->dst,
-				  &sg_param))
+	error = map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl,
+				 reqctx->dst, &sg_param);
+	if (error)
 		goto dstmap_fail;
 
 	skb_set_transport_header(skb, transhdr_len);
 	frags = fill_aead_req_fields(skb, req, src, ivsize, aeadctx);
-	create_wreq(ctx, chcr_req, req, skb, kctx_len, 0, 1,
-		    sizeof(struct cpl_rx_phys_dsgl) + dst_size);
+	atomic_inc(&adap->chcr_stats.aead_rqst);
+	create_wreq(ctx, chcr_req, &req->base, skb, kctx_len, 0, 1,
+		    sizeof(struct cpl_rx_phys_dsgl) + dst_size, 0);
 	reqctx->skb = skb;
 	skb_get(skb);
 	return skb;
 dstmap_fail:
 	kfree_skb(skb);
-	skb = NULL;
 err:
-	return ERR_PTR(-EINVAL);
+	free_new_sg(reqctx->newdstsg);
+	reqctx->newdstsg = NULL;
+	return ERR_PTR(error);
 }
 
 static struct sk_buff *create_gcm_wr(struct aead_request *req,
@@ -1832,45 +2423,53 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 	struct scatterlist *src;
 	unsigned int frags = 0, transhdr_len;
 	unsigned int ivsize = AES_BLOCK_SIZE;
-	unsigned int dst_size = 0, kctx_len;
+	unsigned int dst_size = 0, kctx_len, nents, assoclen = req->assoclen;
 	unsigned char tag_offset = 0;
-	unsigned int crypt_len = 0;
 	unsigned int authsize = crypto_aead_authsize(tfm);
-	int err = -EINVAL, src_nent;
+	int error = -EINVAL, src_nent;
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 		GFP_ATOMIC;
+	struct adapter *adap = padap(ctx->dev);
 
+	reqctx->newdstsg = NULL;
+	dst_size = assoclen + req->cryptlen + (op_type ? -authsize :
+						    authsize);
 	/* validate key size */
 	if (aeadctx->enckey_len == 0)
 		goto err;
 
 	if (op_type && req->cryptlen < crypto_aead_authsize(tfm))
 		goto err;
-	src_nent = sg_nents_for_len(req->src, req->assoclen + req->cryptlen);
+	src_nent = sg_nents_for_len(req->src, assoclen + req->cryptlen);
 	if (src_nent < 0)
 		goto err;
 
-	src = scatterwalk_ffwd(reqctx->srcffwd, req->src, req->assoclen);
-	reqctx->dst = src;
+	src = scatterwalk_ffwd(reqctx->srcffwd, req->src, assoclen);
 	if (req->src != req->dst) {
-		err = chcr_copy_assoc(req, aeadctx);
-		if (err)
-			return	ERR_PTR(err);
-		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd, req->dst,
-					       req->assoclen);
+		error = chcr_copy_assoc(req, aeadctx);
+		if (error)
+			return	ERR_PTR(error);
 	}
 
-	if (!req->cryptlen)
-		/* null-payload is not supported in the hardware.
-		 * software is sending block size
-		 */
-		crypt_len = AES_BLOCK_SIZE;
-	else
-		crypt_len = req->cryptlen;
+	if (dst_size && is_newsg(req->dst, &nents)) {
+		reqctx->newdstsg = alloc_new_sg(req->dst, nents);
+		if (IS_ERR(reqctx->newdstsg))
+			return ERR_CAST(reqctx->newdstsg);
+		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd,
+					       reqctx->newdstsg, assoclen);
+	} else {
+		if (req->src == req->dst)
+			reqctx->dst = src;
+		else
+			reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd,
+						       req->dst, assoclen);
+	}
+
 	reqctx->dst_nents = sg_nents_for_len(reqctx->dst, req->cryptlen +
 					     (op_type ? -authsize : authsize));
 	if (reqctx->dst_nents < 0) {
 		pr_err("GCM:Invalid Destination sg entries\n");
+		error = -EINVAL;
 		goto err;
 	}
 
@@ -1883,11 +2482,16 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 			    T6_MAX_AAD_SIZE,
 			    transhdr_len + (sgl_len(src_nent + MIN_GCM_SG) * 8),
 			    op_type)) {
+		atomic_inc(&adap->chcr_stats.fallback);
+		free_new_sg(reqctx->newdstsg);
+		reqctx->newdstsg = NULL;
 		return ERR_PTR(chcr_aead_fallback(req, op_type));
 	}
 	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags);
-	if (!skb)
+	if (!skb) {
+		error = -ENOMEM;
 		goto err;
+	}
 
 	/* NIC driver is going to write the sge hdr. */
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
@@ -1896,19 +2500,19 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 	memset(chcr_req, 0, transhdr_len);
 
 	if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106)
-		req->assoclen -= 8;
+		assoclen = req->assoclen - 8;
 
 	tag_offset = (op_type == CHCR_ENCRYPT_OP) ? 0 : authsize;
 	chcr_req->sec_cpl.op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(
 					ctx->dev->rx_channel_id, 2, (ivsize ?
-					(req->assoclen + 1) : 0));
+					(assoclen + 1) : 0));
 	chcr_req->sec_cpl.pldlen =
-		htonl(req->assoclen + ivsize + req->cryptlen);
+		htonl(assoclen + ivsize + req->cryptlen);
 	chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
-					req->assoclen ? 1 : 0, req->assoclen,
-					req->assoclen + ivsize + 1, 0);
+					assoclen ? 1 : 0, assoclen,
+					assoclen + ivsize + 1, 0);
 		chcr_req->sec_cpl.cipherstop_lo_authinsert =
-			FILL_SEC_CPL_AUTHINSERT(0, req->assoclen + ivsize + 1,
+			FILL_SEC_CPL_AUTHINSERT(0, assoclen + ivsize + 1,
 						tag_offset, tag_offset);
 		chcr_req->sec_cpl.seqno_numivs =
 			FILL_SEC_CPL_SCMD0_SEQNO(op_type, (op_type ==
@@ -1938,19 +2542,19 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 	sg_param.nents = reqctx->dst_nents;
 	sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
 	sg_param.qid = qid;
-	sg_param.align = 0;
-	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, reqctx->dst,
-				  &sg_param))
+	error = map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl,
+					  reqctx->dst, &sg_param);
+	if (error)
 		goto dstmap_fail;
 
 	skb_set_transport_header(skb, transhdr_len);
-
-	write_sg_to_skb(skb, &frags, req->src, req->assoclen);
-
+	write_sg_to_skb(skb, &frags, req->src, assoclen);
 	write_buffer_to_skb(skb, &frags, reqctx->iv, ivsize);
 	write_sg_to_skb(skb, &frags, src, req->cryptlen);
-	create_wreq(ctx, chcr_req, req, skb, kctx_len, size, 1,
-			sizeof(struct cpl_rx_phys_dsgl) + dst_size);
+	atomic_inc(&adap->chcr_stats.aead_rqst);
+	create_wreq(ctx, chcr_req, &req->base, skb, kctx_len, size, 1,
+			sizeof(struct cpl_rx_phys_dsgl) + dst_size,
+			reqctx->verify);
 	reqctx->skb = skb;
 	skb_get(skb);
 	return skb;
@@ -1958,9 +2562,10 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 dstmap_fail:
 	/* ivmap_fail: */
 	kfree_skb(skb);
-	skb = NULL;
 err:
-	return skb;
+	free_new_sg(reqctx->newdstsg);
+	reqctx->newdstsg = NULL;
+	return ERR_PTR(error);
 }
 
 
@@ -1972,7 +2577,8 @@ static int chcr_aead_cra_init(struct crypto_aead *tfm)
 	struct aead_alg *alg = crypto_aead_alg(tfm);
 
 	aeadctx->sw_cipher = crypto_alloc_aead(alg->base.cra_name, 0,
-					       CRYPTO_ALG_NEED_FALLBACK);
+					       CRYPTO_ALG_NEED_FALLBACK |
+					       CRYPTO_ALG_ASYNC);
 	if  (IS_ERR(aeadctx->sw_cipher))
 		return PTR_ERR(aeadctx->sw_cipher);
 	crypto_aead_set_reqsize(tfm, max(sizeof(struct chcr_aead_reqctx),
@@ -2206,7 +2812,8 @@ static int chcr_aead_rfc4309_setkey(struct crypto_aead *aead, const u8 *key,
 				    unsigned int keylen)
 {
 	struct chcr_context *ctx = crypto_aead_ctx(aead);
-	 struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+	int error;
 
 	if (keylen < 3) {
 		crypto_tfm_set_flags((struct crypto_tfm *)aead,
@@ -2214,6 +2821,15 @@ static int chcr_aead_rfc4309_setkey(struct crypto_aead *aead, const u8 *key,
 		aeadctx->enckey_len = 0;
 		return	-EINVAL;
 	}
+	crypto_aead_clear_flags(aeadctx->sw_cipher, CRYPTO_TFM_REQ_MASK);
+	crypto_aead_set_flags(aeadctx->sw_cipher, crypto_aead_get_flags(aead) &
+			      CRYPTO_TFM_REQ_MASK);
+	error = crypto_aead_setkey(aeadctx->sw_cipher, key, keylen);
+	crypto_aead_clear_flags(aead, CRYPTO_TFM_RES_MASK);
+	crypto_aead_set_flags(aead, crypto_aead_get_flags(aeadctx->sw_cipher) &
+			      CRYPTO_TFM_RES_MASK);
+	if (error)
+		return error;
 	keylen -= 3;
 	memcpy(aeadctx->salt, key + keylen, 3);
 	return chcr_ccm_common_setkey(aead, key, keylen);
@@ -2552,22 +3168,14 @@ static int chcr_aead_op(struct aead_request *req,
 static struct chcr_alg_template driver_algs[] = {
 	/* AES-CBC */
 	{
-		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_SUB_TYPE_CBC,
 		.is_registered = 0,
 		.alg.crypto = {
 			.cra_name		= "cbc(aes)",
 			.cra_driver_name	= "cbc-aes-chcr",
-			.cra_priority		= CHCR_CRA_PRIORITY,
-			.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				CRYPTO_ALG_ASYNC,
 			.cra_blocksize		= AES_BLOCK_SIZE,
-			.cra_ctxsize		= sizeof(struct chcr_context)
-				+ sizeof(struct ablk_ctx),
-			.cra_alignmask		= 0,
-			.cra_type		= &crypto_ablkcipher_type,
-			.cra_module		= THIS_MODULE,
 			.cra_init		= chcr_cra_init,
-			.cra_exit		= NULL,
+			.cra_exit		= chcr_cra_exit,
 			.cra_u.ablkcipher	= {
 				.min_keysize	= AES_MIN_KEY_SIZE,
 				.max_keysize	= AES_MAX_KEY_SIZE,
@@ -2579,24 +3187,15 @@ static struct chcr_alg_template driver_algs[] = {
 		}
 	},
 	{
-		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_SUB_TYPE_XTS,
 		.is_registered = 0,
 		.alg.crypto =   {
 			.cra_name		= "xts(aes)",
 			.cra_driver_name	= "xts-aes-chcr",
-			.cra_priority		= CHCR_CRA_PRIORITY,
-			.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				CRYPTO_ALG_ASYNC,
 			.cra_blocksize		= AES_BLOCK_SIZE,
-			.cra_ctxsize		= sizeof(struct chcr_context) +
-				sizeof(struct ablk_ctx),
-			.cra_alignmask		= 0,
-			.cra_type		= &crypto_ablkcipher_type,
-			.cra_module		= THIS_MODULE,
 			.cra_init		= chcr_cra_init,
 			.cra_exit		= NULL,
-			.cra_u = {
-				.ablkcipher = {
+			.cra_u .ablkcipher = {
 					.min_keysize	= 2 * AES_MIN_KEY_SIZE,
 					.max_keysize	= 2 * AES_MAX_KEY_SIZE,
 					.ivsize		= AES_BLOCK_SIZE,
@@ -2605,6 +3204,47 @@ static struct chcr_alg_template driver_algs[] = {
 					.decrypt	= chcr_aes_decrypt,
 				}
 			}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_SUB_TYPE_CTR,
+		.is_registered = 0,
+		.alg.crypto = {
+			.cra_name		= "ctr(aes)",
+			.cra_driver_name	= "ctr-aes-chcr",
+			.cra_blocksize		= 1,
+			.cra_init		= chcr_cra_init,
+			.cra_exit		= chcr_cra_exit,
+			.cra_u.ablkcipher	= {
+				.min_keysize	= AES_MIN_KEY_SIZE,
+				.max_keysize	= AES_MAX_KEY_SIZE,
+				.ivsize		= AES_BLOCK_SIZE,
+				.setkey		= chcr_aes_ctr_setkey,
+				.encrypt	= chcr_aes_encrypt,
+				.decrypt	= chcr_aes_decrypt,
+			}
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER |
+			CRYPTO_ALG_SUB_TYPE_CTR_RFC3686,
+		.is_registered = 0,
+		.alg.crypto = {
+			.cra_name		= "rfc3686(ctr(aes))",
+			.cra_driver_name	= "rfc3686-ctr-aes-chcr",
+			.cra_blocksize		= 1,
+			.cra_init		= chcr_rfc3686_init,
+			.cra_exit		= chcr_cra_exit,
+			.cra_u.ablkcipher	= {
+				.min_keysize	= AES_MIN_KEY_SIZE +
+					CTR_RFC3686_NONCE_SIZE,
+				.max_keysize	= AES_MAX_KEY_SIZE +
+					CTR_RFC3686_NONCE_SIZE,
+				.ivsize		= CTR_RFC3686_IV_SIZE,
+				.setkey		= chcr_aes_rfc3686_setkey,
+				.encrypt	= chcr_aes_encrypt,
+				.decrypt	= chcr_aes_decrypt,
+				.geniv          = "seqiv",
+			}
 		}
 	},
 	/* SHA */
@@ -2986,6 +3626,18 @@ static int chcr_register_alg(void)
 			continue;
 		switch (driver_algs[i].type & CRYPTO_ALG_TYPE_MASK) {
 		case CRYPTO_ALG_TYPE_ABLKCIPHER:
+			driver_algs[i].alg.crypto.cra_priority =
+				CHCR_CRA_PRIORITY;
+			driver_algs[i].alg.crypto.cra_module = THIS_MODULE;
+			driver_algs[i].alg.crypto.cra_flags =
+				CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC |
+				CRYPTO_ALG_NEED_FALLBACK;
+			driver_algs[i].alg.crypto.cra_ctxsize =
+				sizeof(struct chcr_context) +
+				sizeof(struct ablk_ctx);
+			driver_algs[i].alg.crypto.cra_alignmask = 0;
+			driver_algs[i].alg.crypto.cra_type =
+				&crypto_ablkcipher_type;
 			err = crypto_register_alg(&driver_algs[i].alg.crypto);
 			name = driver_algs[i].alg.crypto.cra_driver_name;
 			break;
diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h
index 751d06a..583008d 100644
--- a/drivers/crypto/chelsio/chcr_algo.h
+++ b/drivers/crypto/chelsio/chcr_algo.h
@@ -185,11 +185,11 @@
 			FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_V(1) | \
 			FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_V((ctx_len)))
 
-#define FILL_WR_RX_Q_ID(cid, qid, wr_iv, fid) \
+#define FILL_WR_RX_Q_ID(cid, qid, wr_iv, lcb, fid) \
 		htonl( \
 			FW_CRYPTO_LOOKASIDE_WR_RX_CHID_V((cid)) | \
 			FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_V((qid)) | \
-			FW_CRYPTO_LOOKASIDE_WR_LCB_V(0) | \
+			FW_CRYPTO_LOOKASIDE_WR_LCB_V((lcb)) | \
 			FW_CRYPTO_LOOKASIDE_WR_IV_V((wr_iv)) | \
 			FW_CRYPTO_LOOKASIDE_WR_FQIDX_V(fid))
 
@@ -219,9 +219,26 @@
 #define MAX_NK 8
 #define CRYPTO_MAX_IMM_TX_PKT_LEN 256
 #define MAX_WR_SIZE			512
+#define ROUND_16(bytes)		((bytes) & 0xFFFFFFF0)
+#define MAX_DSGL_ENT			32
+#define MAX_DIGEST_SKB_SGE	(MAX_SKB_FRAGS - 2)
+#define MIN_CIPHER_SG			1 /* IV */
 #define MIN_AUTH_SG			2 /*IV + AAD*/
 #define MIN_GCM_SG			2 /* IV + AAD*/
+#define MIN_DIGEST_SG			1 /*Partial Buffer*/
 #define MIN_CCM_SG			3 /*IV+AAD+B0*/
+#define SPACE_LEFT(len) \
+	((MAX_WR_SIZE - WR_MIN_LEN - (len)))
+
+unsigned int sgl_ent_len[] = {0, 0, 16, 24, 40,
+				48, 64, 72, 88,
+				96, 112, 120, 136,
+				144, 160, 168, 184,
+				192};
+unsigned int dsgl_ent_len[] = {0, 32, 32, 48, 48, 64, 64, 80, 80,
+				112, 112, 128, 128, 144, 144, 160, 160,
+				192, 192, 208, 208, 224, 224, 240, 240,
+				272, 272, 288, 288, 304, 304, 320, 320};
 
 struct algo_param {
 	unsigned int auth_mode;
@@ -239,6 +256,14 @@ struct hash_wr_param {
 	u64 scmd1;
 };
 
+struct cipher_wr_param {
+	struct ablkcipher_request *req;
+	struct scatterlist *srcsg;
+	char *iv;
+	int bytes;
+	short int snent;
+	unsigned short qid;
+};
 enum {
 	AES_KEYLENGTH_128BIT = 128,
 	AES_KEYLENGTH_192BIT = 192,
@@ -293,7 +318,6 @@ struct phys_sge_parm {
 	unsigned int nents;
 	unsigned int obsize;
 	unsigned short qid;
-	unsigned char align;
 };
 
 struct crypto_result {
diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c
index c28e018..b6dd9cb 100644
--- a/drivers/crypto/chelsio/chcr_core.c
+++ b/drivers/crypto/chelsio/chcr_core.c
@@ -29,6 +29,7 @@
 static LIST_HEAD(uld_ctx_list);
 static DEFINE_MUTEX(dev_mutex);
 static atomic_t dev_count;
+static struct uld_ctx *ctx_rr;
 
 typedef int (*chcr_handler_func)(struct chcr_dev *dev, unsigned char *input);
 static int cpl_fw6_pld_handler(struct chcr_dev *dev, unsigned char *input);
@@ -49,25 +50,28 @@ static struct cxgb4_uld_info chcr_uld_info = {
 	.rx_handler = chcr_uld_rx_handler,
 };
 
-int assign_chcr_device(struct chcr_dev **dev)
+struct uld_ctx *assign_chcr_device(void)
 {
-	struct uld_ctx *u_ctx;
-	int ret = -ENXIO;
+	struct uld_ctx *u_ctx = NULL;
 
 	/*
-	 * Which device to use if multiple devices are available TODO
-	 * May be select the device based on round robin. One session
-	 * must go to the same device to maintain the ordering.
+	 * When multiple devices are present in system select
+	 * device in round-robin fashion for crypto operations
+	 * Although One session must use the same device to
+	 * maintain request-response ordering.
 	 */
-	mutex_lock(&dev_mutex); /* TODO ? */
-	list_for_each_entry(u_ctx, &uld_ctx_list, entry)
-		if (u_ctx->dev) {
-			*dev = u_ctx->dev;
-			ret = 0;
-			break;
+	mutex_lock(&dev_mutex);
+	if (!list_empty(&uld_ctx_list)) {
+		u_ctx = ctx_rr;
+		if (list_is_last(&ctx_rr->entry, &uld_ctx_list))
+			ctx_rr = list_first_entry(&uld_ctx_list,
+						  struct uld_ctx,
+						  entry);
+		else
+			ctx_rr = list_next_entry(ctx_rr, entry);
 	}
 	mutex_unlock(&dev_mutex);
-	return ret;
+	return u_ctx;
 }
 
 static int chcr_dev_add(struct uld_ctx *u_ctx)
@@ -82,11 +86,27 @@ static int chcr_dev_add(struct uld_ctx *u_ctx)
 	u_ctx->dev = dev;
 	dev->u_ctx = u_ctx;
 	atomic_inc(&dev_count);
+	mutex_lock(&dev_mutex);
+	list_add_tail(&u_ctx->entry, &uld_ctx_list);
+	if (!ctx_rr)
+		ctx_rr = u_ctx;
+	mutex_unlock(&dev_mutex);
 	return 0;
 }
 
 static int chcr_dev_remove(struct uld_ctx *u_ctx)
 {
+	if (ctx_rr == u_ctx) {
+		if (list_is_last(&ctx_rr->entry, &uld_ctx_list))
+			ctx_rr = list_first_entry(&uld_ctx_list,
+						  struct uld_ctx,
+						  entry);
+		else
+			ctx_rr = list_next_entry(ctx_rr, entry);
+	}
+	list_del(&u_ctx->entry);
+	if (list_empty(&uld_ctx_list))
+		ctx_rr = NULL;
 	kfree(u_ctx->dev);
 	u_ctx->dev = NULL;
 	atomic_dec(&dev_count);
@@ -100,6 +120,7 @@ static int cpl_fw6_pld_handler(struct chcr_dev *dev,
 	struct cpl_fw6_pld *fw6_pld;
 	u32 ack_err_status = 0;
 	int error_status = 0;
+	struct adapter *adap = padap(dev);
 
 	fw6_pld = (struct cpl_fw6_pld *)input;
 	req = (struct crypto_async_request *)(uintptr_t)be64_to_cpu(
@@ -111,11 +132,11 @@ static int cpl_fw6_pld_handler(struct chcr_dev *dev,
 		if (CHK_MAC_ERR_BIT(ack_err_status) ||
 		    CHK_PAD_ERR_BIT(ack_err_status))
 			error_status = -EBADMSG;
+		atomic_inc(&adap->chcr_stats.error);
 	}
 	/* call completion callback with failure status */
 	if (req) {
 		error_status = chcr_handle_resp(req, input, error_status);
-		req->complete(req, error_status);
 	} else {
 		pr_err("Incorrect request address from the firmware\n");
 		return -EFAULT;
@@ -138,10 +159,11 @@ static void *chcr_uld_add(const struct cxgb4_lld_info *lld)
 		u_ctx = ERR_PTR(-ENOMEM);
 		goto out;
 	}
+	if (!(lld->ulp_crypto & ULP_CRYPTO_LOOKASIDE)) {
+		u_ctx = ERR_PTR(-ENOMEM);
+		goto out;
+	}
 	u_ctx->lldi = *lld;
-	mutex_lock(&dev_mutex);
-	list_add_tail(&u_ctx->entry, &uld_ctx_list);
-	mutex_unlock(&dev_mutex);
 out:
 	return u_ctx;
 }
diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h
index cd0c35a1..c9a19b2 100644
--- a/drivers/crypto/chelsio/chcr_core.h
+++ b/drivers/crypto/chelsio/chcr_core.h
@@ -53,6 +53,9 @@
 #define MAC_ERROR_BIT		0
 #define CHK_MAC_ERR_BIT(x)	(((x) >> MAC_ERROR_BIT) & 1)
 #define MAX_SALT                4
+#define WR_MIN_LEN (sizeof(struct chcr_wr) + \
+		    sizeof(struct cpl_rx_phys_dsgl) + \
+		    sizeof(struct ulptx_sgl))
 
 #define padap(dev) pci_get_drvdata(dev->u_ctx->lldi.pdev)
 
@@ -86,7 +89,7 @@ struct uld_ctx {
 	struct chcr_dev *dev;
 };
 
-int assign_chcr_device(struct chcr_dev **dev);
+struct uld_ctx * assign_chcr_device(void);
 int chcr_send_wr(struct sk_buff *skb);
 int start_crypto(void);
 int stop_crypto(void);
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index 5b2fabb..a4f95b0 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -139,6 +139,9 @@
 #define CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309    0x06000000
 #define CRYPTO_ALG_SUB_TYPE_AEAD_NULL       0x07000000
 #define CRYPTO_ALG_SUB_TYPE_CTR             0x08000000
+#define CRYPTO_ALG_SUB_TYPE_CTR_RFC3686     0x09000000
+#define CRYPTO_ALG_SUB_TYPE_XTS		    0x0a000000
+#define CRYPTO_ALG_SUB_TYPE_CBC		    0x0b000000
 #define CRYPTO_ALG_TYPE_HMAC (CRYPTO_ALG_TYPE_AHASH |\
 			      CRYPTO_ALG_SUB_TYPE_HASH_HMAC)
 
@@ -146,19 +149,23 @@
 
 #define CHCR_HASH_MAX_BLOCK_SIZE_64  64
 #define CHCR_HASH_MAX_BLOCK_SIZE_128 128
+#define CHCR_SG_SIZE 2048
 
 /* Aligned to 128 bit boundary */
 
 struct ablk_ctx {
+	struct crypto_skcipher *sw_cipher;
 	__be32 key_ctx_hdr;
 	unsigned int enckey_len;
-	u8 key[CHCR_AES_MAX_KEY_LEN];
 	unsigned char ciph_mode;
+	u8 key[CHCR_AES_MAX_KEY_LEN];
+	u8 nonce[4];
 	u8 rrkey[AES_MAX_KEY_SIZE];
 };
 struct chcr_aead_reqctx {
 	struct	sk_buff	*skb;
 	struct scatterlist *dst;
+	struct scatterlist *newdstsg;
 	struct scatterlist srcffwd[2];
 	struct scatterlist dstffwd[2];
 	short int dst_nents;
@@ -233,7 +240,14 @@ struct chcr_ahash_req_ctx {
 
 struct chcr_blkcipher_req_ctx {
 	struct sk_buff *skb;
-	unsigned int dst_nents;
+	struct scatterlist srcffwd[2];
+	struct scatterlist dstffwd[2];
+	struct scatterlist *dstsg;
+	struct scatterlist *dst;
+	struct scatterlist *newdstsg;
+	unsigned int processed;
+	unsigned int op;
+	short int dst_nents;
 	u8 iv[CHCR_MAX_CRYPTO_IV_LEN];
 };
 
@@ -275,5 +289,10 @@ static int chcr_aead_op(struct aead_request *req_base,
 			  int size,
 			  create_wr_t create_wr_fn);
 static inline int get_aead_subtype(struct crypto_aead *aead);
-
+static int is_newsg(struct scatterlist *sgl, unsigned int *newents);
+static struct scatterlist *alloc_new_sg(struct scatterlist *sgl,
+					unsigned int nents);
+static inline void free_new_sg(struct scatterlist *sgl);
+static int chcr_handle_cipher_resp(struct ablkcipher_request *req,
+				   unsigned char *input, int err);
 #endif /* __CHCR_CRYPTO_H__ */
diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
index 9b07f3d8..0c6a917 100644
--- a/drivers/crypto/img-hash.c
+++ b/drivers/crypto/img-hash.c
@@ -1088,9 +1088,17 @@ static int img_hash_suspend(struct device *dev)
 static int img_hash_resume(struct device *dev)
 {
 	struct img_hash_dev *hdev = dev_get_drvdata(dev);
+	int ret;
 
-	clk_prepare_enable(hdev->hash_clk);
-	clk_prepare_enable(hdev->sys_clk);
+	ret = clk_prepare_enable(hdev->hash_clk);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(hdev->sys_clk);
+	if (ret) {
+		clk_disable_unprepare(hdev->hash_clk);
+		return ret;
+	}
 
 	return 0;
 }
diff --git a/drivers/crypto/inside-secure/Makefile b/drivers/crypto/inside-secure/Makefile
new file mode 100644
index 0000000..302f07d
--- /dev/null
+++ b/drivers/crypto/inside-secure/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += crypto_safexcel.o
+crypto_safexcel-objs := safexcel.o safexcel_ring.o safexcel_cipher.o safexcel_hash.o
diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
new file mode 100644
index 0000000..e7f87ac
--- /dev/null
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -0,0 +1,926 @@
+/*
+ * Copyright (C) 2017 Marvell
+ *
+ * Antoine Tenart <antoine.tenart@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/firmware.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+
+#include "safexcel.h"
+
+static u32 max_rings = EIP197_MAX_RINGS;
+module_param(max_rings, uint, 0644);
+MODULE_PARM_DESC(max_rings, "Maximum number of rings to use.");
+
+static void eip197_trc_cache_init(struct safexcel_crypto_priv *priv)
+{
+	u32 val, htable_offset;
+	int i;
+
+	/* Enable the record cache memory access */
+	val = readl(priv->base + EIP197_CS_RAM_CTRL);
+	val &= ~EIP197_TRC_ENABLE_MASK;
+	val |= EIP197_TRC_ENABLE_0;
+	writel(val, priv->base + EIP197_CS_RAM_CTRL);
+
+	/* Clear all ECC errors */
+	writel(0, priv->base + EIP197_TRC_ECCCTRL);
+
+	/*
+	 * Make sure the cache memory is accessible by taking record cache into
+	 * reset.
+	 */
+	val = readl(priv->base + EIP197_TRC_PARAMS);
+	val |= EIP197_TRC_PARAMS_SW_RESET;
+	val &= ~EIP197_TRC_PARAMS_DATA_ACCESS;
+	writel(val, priv->base + EIP197_TRC_PARAMS);
+
+	/* Clear all records */
+	for (i = 0; i < EIP197_CS_RC_MAX; i++) {
+		u32 val, offset = EIP197_CLASSIFICATION_RAMS + i * EIP197_CS_RC_SIZE;
+
+		writel(EIP197_CS_RC_NEXT(EIP197_RC_NULL) |
+		       EIP197_CS_RC_PREV(EIP197_RC_NULL),
+		       priv->base + offset);
+
+		val = EIP197_CS_RC_NEXT(i+1) | EIP197_CS_RC_PREV(i-1);
+		if (i == 0)
+			val |= EIP197_CS_RC_PREV(EIP197_RC_NULL);
+		else if (i == EIP197_CS_RC_MAX - 1)
+			val |= EIP197_CS_RC_NEXT(EIP197_RC_NULL);
+		writel(val, priv->base + offset + sizeof(u32));
+	}
+
+	/* Clear the hash table entries */
+	htable_offset = EIP197_CS_RC_MAX * EIP197_CS_RC_SIZE;
+	for (i = 0; i < 64; i++)
+		writel(GENMASK(29, 0),
+		       priv->base + EIP197_CLASSIFICATION_RAMS + htable_offset + i * sizeof(u32));
+
+	/* Disable the record cache memory access */
+	val = readl(priv->base + EIP197_CS_RAM_CTRL);
+	val &= ~EIP197_TRC_ENABLE_MASK;
+	writel(val, priv->base + EIP197_CS_RAM_CTRL);
+
+	/* Write head and tail pointers of the record free chain */
+	val = EIP197_TRC_FREECHAIN_HEAD_PTR(0) |
+	      EIP197_TRC_FREECHAIN_TAIL_PTR(EIP197_CS_RC_MAX - 1);
+	writel(val, priv->base + EIP197_TRC_FREECHAIN);
+
+	/* Configure the record cache #1 */
+	val = EIP197_TRC_PARAMS2_RC_SZ_SMALL(EIP197_CS_TRC_REC_WC) |
+	      EIP197_TRC_PARAMS2_HTABLE_PTR(EIP197_CS_RC_MAX);
+	writel(val, priv->base + EIP197_TRC_PARAMS2);
+
+	/* Configure the record cache #2 */
+	val = EIP197_TRC_PARAMS_RC_SZ_LARGE(EIP197_CS_TRC_LG_REC_WC) |
+	      EIP197_TRC_PARAMS_BLK_TIMER_SPEED(1) |
+	      EIP197_TRC_PARAMS_HTABLE_SZ(2);
+	writel(val, priv->base + EIP197_TRC_PARAMS);
+}
+
+static void eip197_write_firmware(struct safexcel_crypto_priv *priv,
+				  const struct firmware *fw, u32 ctrl,
+				  u32 prog_en)
+{
+	const u32 *data = (const u32 *)fw->data;
+	u32 val;
+	int i;
+
+	/* Reset the engine to make its program memory accessible */
+	writel(EIP197_PE_ICE_x_CTRL_SW_RESET |
+	       EIP197_PE_ICE_x_CTRL_CLR_ECC_CORR |
+	       EIP197_PE_ICE_x_CTRL_CLR_ECC_NON_CORR,
+	       priv->base + ctrl);
+
+	/* Enable access to the program memory */
+	writel(prog_en, priv->base + EIP197_PE_ICE_RAM_CTRL);
+
+	/* Write the firmware */
+	for (i = 0; i < fw->size / sizeof(u32); i++)
+		writel(be32_to_cpu(data[i]),
+		       priv->base + EIP197_CLASSIFICATION_RAMS + i * sizeof(u32));
+
+	/* Disable access to the program memory */
+	writel(0, priv->base + EIP197_PE_ICE_RAM_CTRL);
+
+	/* Release engine from reset */
+	val = readl(priv->base + ctrl);
+	val &= ~EIP197_PE_ICE_x_CTRL_SW_RESET;
+	writel(val, priv->base + ctrl);
+}
+
+static int eip197_load_firmwares(struct safexcel_crypto_priv *priv)
+{
+	const char *fw_name[] = {"ifpp.bin", "ipue.bin"};
+	const struct firmware *fw[FW_NB];
+	int i, j, ret = 0;
+	u32 val;
+
+	for (i = 0; i < FW_NB; i++) {
+		ret = request_firmware(&fw[i], fw_name[i], priv->dev);
+		if (ret) {
+			dev_err(priv->dev,
+				"Failed to request firmware %s (%d)\n",
+				fw_name[i], ret);
+			goto release_fw;
+		}
+	 }
+
+	/* Clear the scratchpad memory */
+	val = readl(priv->base + EIP197_PE_ICE_SCRATCH_CTRL);
+	val |= EIP197_PE_ICE_SCRATCH_CTRL_CHANGE_TIMER |
+	       EIP197_PE_ICE_SCRATCH_CTRL_TIMER_EN |
+	       EIP197_PE_ICE_SCRATCH_CTRL_SCRATCH_ACCESS |
+	       EIP197_PE_ICE_SCRATCH_CTRL_CHANGE_ACCESS;
+	writel(val, priv->base + EIP197_PE_ICE_SCRATCH_CTRL);
+
+	memset(priv->base + EIP197_PE_ICE_SCRATCH_RAM, 0,
+	       EIP197_NUM_OF_SCRATCH_BLOCKS * sizeof(u32));
+
+	eip197_write_firmware(priv, fw[FW_IFPP], EIP197_PE_ICE_FPP_CTRL,
+			      EIP197_PE_ICE_RAM_CTRL_FPP_PROG_EN);
+
+	eip197_write_firmware(priv, fw[FW_IPUE], EIP197_PE_ICE_PUE_CTRL,
+			      EIP197_PE_ICE_RAM_CTRL_PUE_PROG_EN);
+
+release_fw:
+	for (j = 0; j < i; j++)
+		release_firmware(fw[j]);
+
+	return ret;
+}
+
+static int safexcel_hw_setup_cdesc_rings(struct safexcel_crypto_priv *priv)
+{
+	u32 hdw, cd_size_rnd, val;
+	int i;
+
+	hdw = readl(priv->base + EIP197_HIA_OPTIONS);
+	hdw &= GENMASK(27, 25);
+	hdw >>= 25;
+
+	cd_size_rnd = (priv->config.cd_size + (BIT(hdw) - 1)) >> hdw;
+
+	for (i = 0; i < priv->config.rings; i++) {
+		/* ring base address */
+		writel(lower_32_bits(priv->ring[i].cdr.base_dma),
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_RING_BASE_ADDR_LO);
+		writel(upper_32_bits(priv->ring[i].cdr.base_dma),
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_RING_BASE_ADDR_HI);
+
+		writel(EIP197_xDR_DESC_MODE_64BIT | (priv->config.cd_offset << 16) |
+		       priv->config.cd_size,
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_DESC_SIZE);
+		writel(((EIP197_FETCH_COUNT * (cd_size_rnd << hdw)) << 16) |
+		       (EIP197_FETCH_COUNT * priv->config.cd_offset),
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_CFG);
+
+		/* Configure DMA tx control */
+		val = EIP197_HIA_xDR_CFG_WR_CACHE(WR_CACHE_3BITS);
+		val |= EIP197_HIA_xDR_CFG_RD_CACHE(RD_CACHE_3BITS);
+		writel(val,
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_DMA_CFG);
+
+		/* clear any pending interrupt */
+		writel(GENMASK(5, 0),
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_STAT);
+	}
+
+	return 0;
+}
+
+static int safexcel_hw_setup_rdesc_rings(struct safexcel_crypto_priv *priv)
+{
+	u32 hdw, rd_size_rnd, val;
+	int i;
+
+	hdw = readl(priv->base + EIP197_HIA_OPTIONS);
+	hdw &= GENMASK(27, 25);
+	hdw >>= 25;
+
+	rd_size_rnd = (priv->config.rd_size + (BIT(hdw) - 1)) >> hdw;
+
+	for (i = 0; i < priv->config.rings; i++) {
+		/* ring base address */
+		writel(lower_32_bits(priv->ring[i].rdr.base_dma),
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_RING_BASE_ADDR_LO);
+		writel(upper_32_bits(priv->ring[i].rdr.base_dma),
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_RING_BASE_ADDR_HI);
+
+		writel(EIP197_xDR_DESC_MODE_64BIT | (priv->config.rd_offset << 16) |
+		       priv->config.rd_size,
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_DESC_SIZE);
+
+		writel(((EIP197_FETCH_COUNT * (rd_size_rnd << hdw)) << 16) |
+		       (EIP197_FETCH_COUNT * priv->config.rd_offset),
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_CFG);
+
+		/* Configure DMA tx control */
+		val = EIP197_HIA_xDR_CFG_WR_CACHE(WR_CACHE_3BITS);
+		val |= EIP197_HIA_xDR_CFG_RD_CACHE(RD_CACHE_3BITS);
+		val |= EIP197_HIA_xDR_WR_RES_BUF | EIP197_HIA_xDR_WR_CTRL_BUG;
+		writel(val,
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_DMA_CFG);
+
+		/* clear any pending interrupt */
+		writel(GENMASK(7, 0),
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_STAT);
+
+		/* enable ring interrupt */
+		val = readl(priv->base + EIP197_HIA_AIC_R_ENABLE_CTRL(i));
+		val |= EIP197_RDR_IRQ(i);
+		writel(val, priv->base + EIP197_HIA_AIC_R_ENABLE_CTRL(i));
+	}
+
+	return 0;
+}
+
+static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
+{
+	u32 version, val;
+	int i, ret;
+
+	/* Determine endianess and configure byte swap */
+	version = readl(priv->base + EIP197_HIA_VERSION);
+	val = readl(priv->base + EIP197_HIA_MST_CTRL);
+
+	if ((version & 0xffff) == EIP197_HIA_VERSION_BE)
+		val |= EIP197_MST_CTRL_BYTE_SWAP;
+	else if (((version >> 16) & 0xffff) == EIP197_HIA_VERSION_LE)
+		val |= (EIP197_MST_CTRL_NO_BYTE_SWAP >> 24);
+
+	writel(val, priv->base + EIP197_HIA_MST_CTRL);
+
+
+	/* Configure wr/rd cache values */
+	writel(EIP197_MST_CTRL_RD_CACHE(RD_CACHE_4BITS) |
+	       EIP197_MST_CTRL_WD_CACHE(WR_CACHE_4BITS),
+	       priv->base + EIP197_MST_CTRL);
+
+	/* Interrupts reset */
+
+	/* Disable all global interrupts */
+	writel(0, priv->base + EIP197_HIA_AIC_G_ENABLE_CTRL);
+
+	/* Clear any pending interrupt */
+	writel(GENMASK(31, 0), priv->base + EIP197_HIA_AIC_G_ACK);
+
+	/* Data Fetch Engine configuration */
+
+	/* Reset all DFE threads */
+	writel(EIP197_DxE_THR_CTRL_RESET_PE,
+	       priv->base + EIP197_HIA_DFE_THR_CTRL);
+
+	/* Reset HIA input interface arbiter */
+	writel(EIP197_HIA_RA_PE_CTRL_RESET,
+	       priv->base + EIP197_HIA_RA_PE_CTRL);
+
+	/* DMA transfer size to use */
+	val = EIP197_HIA_DFE_CFG_DIS_DEBUG;
+	val |= EIP197_HIA_DxE_CFG_MIN_DATA_SIZE(5) | EIP197_HIA_DxE_CFG_MAX_DATA_SIZE(9);
+	val |= EIP197_HIA_DxE_CFG_MIN_CTRL_SIZE(5) | EIP197_HIA_DxE_CFG_MAX_CTRL_SIZE(7);
+	val |= EIP197_HIA_DxE_CFG_DATA_CACHE_CTRL(RD_CACHE_3BITS);
+	val |= EIP197_HIA_DxE_CFG_CTRL_CACHE_CTRL(RD_CACHE_3BITS);
+	writel(val, priv->base + EIP197_HIA_DFE_CFG);
+
+	/* Leave the DFE threads reset state */
+	writel(0, priv->base + EIP197_HIA_DFE_THR_CTRL);
+
+	/* Configure the procesing engine thresholds */
+	writel(EIP197_PE_IN_xBUF_THRES_MIN(5) | EIP197_PE_IN_xBUF_THRES_MAX(9),
+	      priv->base + EIP197_PE_IN_DBUF_THRES);
+	writel(EIP197_PE_IN_xBUF_THRES_MIN(5) | EIP197_PE_IN_xBUF_THRES_MAX(7),
+	      priv->base + EIP197_PE_IN_TBUF_THRES);
+
+	/* enable HIA input interface arbiter and rings */
+	writel(EIP197_HIA_RA_PE_CTRL_EN | GENMASK(priv->config.rings - 1, 0),
+	       priv->base + EIP197_HIA_RA_PE_CTRL);
+
+	/* Data Store Engine configuration */
+
+	/* Reset all DSE threads */
+	writel(EIP197_DxE_THR_CTRL_RESET_PE,
+	       priv->base + EIP197_HIA_DSE_THR_CTRL);
+
+	/* Wait for all DSE threads to complete */
+	while ((readl(priv->base + EIP197_HIA_DSE_THR_STAT) &
+		GENMASK(15, 12)) != GENMASK(15, 12))
+		;
+
+	/* DMA transfer size to use */
+	val = EIP197_HIA_DSE_CFG_DIS_DEBUG;
+	val |= EIP197_HIA_DxE_CFG_MIN_DATA_SIZE(7) | EIP197_HIA_DxE_CFG_MAX_DATA_SIZE(8);
+	val |= EIP197_HIA_DxE_CFG_DATA_CACHE_CTRL(WR_CACHE_3BITS);
+	val |= EIP197_HIA_DSE_CFG_ALLWAYS_BUFFERABLE;
+	val |= EIP197_HIA_DSE_CFG_EN_SINGLE_WR;
+	writel(val, priv->base + EIP197_HIA_DSE_CFG);
+
+	/* Leave the DSE threads reset state */
+	writel(0, priv->base + EIP197_HIA_DSE_THR_CTRL);
+
+	/* Configure the procesing engine thresholds */
+	writel(EIP197_PE_OUT_DBUF_THRES_MIN(7) | EIP197_PE_OUT_DBUF_THRES_MAX(8),
+	       priv->base + EIP197_PE_OUT_DBUF_THRES);
+
+	/* Processing Engine configuration */
+
+	/* H/W capabilities selection */
+	val = EIP197_FUNCTION_RSVD;
+	val |= EIP197_PROTOCOL_ENCRYPT_ONLY | EIP197_PROTOCOL_HASH_ONLY;
+	val |= EIP197_ALG_AES_ECB | EIP197_ALG_AES_CBC;
+	val |= EIP197_ALG_SHA1 | EIP197_ALG_HMAC_SHA1;
+	val |= EIP197_ALG_SHA2;
+	writel(val, priv->base + EIP197_PE_EIP96_FUNCTION_EN);
+
+	/* Command Descriptor Rings prepare */
+	for (i = 0; i < priv->config.rings; i++) {
+		/* Clear interrupts for this ring */
+		writel(GENMASK(31, 0),
+		       priv->base + EIP197_HIA_AIC_R_ENABLE_CLR(i));
+
+		/* Disable external triggering */
+		writel(0, priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_CFG);
+
+		/* Clear the pending prepared counter */
+		writel(EIP197_xDR_PREP_CLR_COUNT,
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_PREP_COUNT);
+
+		/* Clear the pending processed counter */
+		writel(EIP197_xDR_PROC_CLR_COUNT,
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_PROC_COUNT);
+
+		writel(0,
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_PREP_PNTR);
+		writel(0,
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_PROC_PNTR);
+
+		writel((EIP197_DEFAULT_RING_SIZE * priv->config.cd_offset) << 2,
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_RING_SIZE);
+	}
+
+	/* Result Descriptor Ring prepare */
+	for (i = 0; i < priv->config.rings; i++) {
+		/* Disable external triggering*/
+		writel(0, priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_CFG);
+
+		/* Clear the pending prepared counter */
+		writel(EIP197_xDR_PREP_CLR_COUNT,
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_PREP_COUNT);
+
+		/* Clear the pending processed counter */
+		writel(EIP197_xDR_PROC_CLR_COUNT,
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_PROC_COUNT);
+
+		writel(0,
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_PREP_PNTR);
+		writel(0,
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_PROC_PNTR);
+
+		/* Ring size */
+		writel((EIP197_DEFAULT_RING_SIZE * priv->config.rd_offset) << 2,
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_RING_SIZE);
+	}
+
+	/* Enable command descriptor rings */
+	writel(EIP197_DxE_THR_CTRL_EN | GENMASK(priv->config.rings - 1, 0),
+	       priv->base + EIP197_HIA_DFE_THR_CTRL);
+
+	/* Enable result descriptor rings */
+	writel(EIP197_DxE_THR_CTRL_EN | GENMASK(priv->config.rings - 1, 0),
+	       priv->base + EIP197_HIA_DSE_THR_CTRL);
+
+	/* Clear any HIA interrupt */
+	writel(GENMASK(30, 20), priv->base + EIP197_HIA_AIC_G_ACK);
+
+	eip197_trc_cache_init(priv);
+
+	ret = eip197_load_firmwares(priv);
+	if (ret)
+		return ret;
+
+	safexcel_hw_setup_cdesc_rings(priv);
+	safexcel_hw_setup_rdesc_rings(priv);
+
+	return 0;
+}
+
+void safexcel_dequeue(struct safexcel_crypto_priv *priv, int ring)
+{
+	struct crypto_async_request *req, *backlog;
+	struct safexcel_context *ctx;
+	struct safexcel_request *request;
+	int ret, nreq = 0, cdesc = 0, rdesc = 0, commands, results;
+
+	priv->ring[ring].need_dequeue = false;
+
+	do {
+		spin_lock_bh(&priv->ring[ring].queue_lock);
+		backlog = crypto_get_backlog(&priv->ring[ring].queue);
+		req = crypto_dequeue_request(&priv->ring[ring].queue);
+		spin_unlock_bh(&priv->ring[ring].queue_lock);
+
+		if (!req)
+			goto finalize;
+
+		request = kzalloc(sizeof(*request), EIP197_GFP_FLAGS(*req));
+		if (!request) {
+			spin_lock_bh(&priv->ring[ring].queue_lock);
+			crypto_enqueue_request(&priv->ring[ring].queue, req);
+			spin_unlock_bh(&priv->ring[ring].queue_lock);
+
+			priv->ring[ring].need_dequeue = true;
+			goto finalize;
+		}
+
+		ctx = crypto_tfm_ctx(req->tfm);
+		ret = ctx->send(req, ring, request, &commands, &results);
+		if (ret) {
+			kfree(request);
+			req->complete(req, ret);
+			priv->ring[ring].need_dequeue = true;
+			goto finalize;
+		}
+
+		if (backlog)
+			backlog->complete(backlog, -EINPROGRESS);
+
+		spin_lock_bh(&priv->ring[ring].egress_lock);
+		list_add_tail(&request->list, &priv->ring[ring].list);
+		spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+		cdesc += commands;
+		rdesc += results;
+	} while (nreq++ < EIP197_MAX_BATCH_SZ);
+
+finalize:
+	if (nreq == EIP197_MAX_BATCH_SZ)
+		priv->ring[ring].need_dequeue = true;
+	else if (!nreq)
+		return;
+
+	spin_lock_bh(&priv->ring[ring].lock);
+
+	/* Configure when we want an interrupt */
+	writel(EIP197_HIA_RDR_THRESH_PKT_MODE |
+	       EIP197_HIA_RDR_THRESH_PROC_PKT(nreq),
+	       priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_THRESH);
+
+	/* let the RDR know we have pending descriptors */
+	writel((rdesc * priv->config.rd_offset) << 2,
+	       priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_PREP_COUNT);
+
+	/* let the CDR know we have pending descriptors */
+	writel((cdesc * priv->config.cd_offset) << 2,
+	       priv->base + EIP197_HIA_CDR(ring) + EIP197_HIA_xDR_PREP_COUNT);
+
+	spin_unlock_bh(&priv->ring[ring].lock);
+}
+
+void safexcel_free_context(struct safexcel_crypto_priv *priv,
+			   struct crypto_async_request *req,
+			   int result_sz)
+{
+	struct safexcel_context *ctx = crypto_tfm_ctx(req->tfm);
+
+	if (ctx->result_dma)
+		dma_unmap_single(priv->dev, ctx->result_dma, result_sz,
+				 DMA_FROM_DEVICE);
+
+	if (ctx->cache) {
+		dma_unmap_single(priv->dev, ctx->cache_dma, ctx->cache_sz,
+				 DMA_TO_DEVICE);
+		kfree(ctx->cache);
+		ctx->cache = NULL;
+		ctx->cache_sz = 0;
+	}
+}
+
+void safexcel_complete(struct safexcel_crypto_priv *priv, int ring)
+{
+	struct safexcel_command_desc *cdesc;
+
+	/* Acknowledge the command descriptors */
+	do {
+		cdesc = safexcel_ring_next_rptr(priv, &priv->ring[ring].cdr);
+		if (IS_ERR(cdesc)) {
+			dev_err(priv->dev,
+				"Could not retrieve the command descriptor\n");
+			return;
+		}
+	} while (!cdesc->last_seg);
+}
+
+void safexcel_inv_complete(struct crypto_async_request *req, int error)
+{
+	struct safexcel_inv_result *result = req->data;
+
+	if (error == -EINPROGRESS)
+		return;
+
+	result->error = error;
+	complete(&result->completion);
+}
+
+int safexcel_invalidate_cache(struct crypto_async_request *async,
+			      struct safexcel_context *ctx,
+			      struct safexcel_crypto_priv *priv,
+			      dma_addr_t ctxr_dma, int ring,
+			      struct safexcel_request *request)
+{
+	struct safexcel_command_desc *cdesc;
+	struct safexcel_result_desc *rdesc;
+	int ret = 0;
+
+	spin_lock_bh(&priv->ring[ring].egress_lock);
+
+	/* Prepare command descriptor */
+	cdesc = safexcel_add_cdesc(priv, ring, true, true, 0, 0, 0, ctxr_dma);
+	if (IS_ERR(cdesc)) {
+		ret = PTR_ERR(cdesc);
+		goto unlock;
+	}
+
+	cdesc->control_data.type = EIP197_TYPE_EXTENDED;
+	cdesc->control_data.options = 0;
+	cdesc->control_data.refresh = 0;
+	cdesc->control_data.control0 = CONTEXT_CONTROL_INV_TR;
+
+	/* Prepare result descriptor */
+	rdesc = safexcel_add_rdesc(priv, ring, true, true, 0, 0);
+
+	if (IS_ERR(rdesc)) {
+		ret = PTR_ERR(rdesc);
+		goto cdesc_rollback;
+	}
+
+	request->req = async;
+	goto unlock;
+
+cdesc_rollback:
+	safexcel_ring_rollback_wptr(priv, &priv->ring[ring].cdr);
+
+unlock:
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+	return ret;
+}
+
+static inline void safexcel_handle_result_descriptor(struct safexcel_crypto_priv *priv,
+						     int ring)
+{
+	struct safexcel_request *sreq;
+	struct safexcel_context *ctx;
+	int ret, i, nreq, ndesc = 0;
+	bool should_complete;
+
+	nreq = readl(priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_PROC_COUNT);
+	nreq >>= 24;
+	nreq &= GENMASK(6, 0);
+	if (!nreq)
+		return;
+
+	for (i = 0; i < nreq; i++) {
+		spin_lock_bh(&priv->ring[ring].egress_lock);
+		sreq = list_first_entry(&priv->ring[ring].list,
+					struct safexcel_request, list);
+		list_del(&sreq->list);
+		spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+		ctx = crypto_tfm_ctx(sreq->req->tfm);
+		ndesc = ctx->handle_result(priv, ring, sreq->req,
+					   &should_complete, &ret);
+		if (ndesc < 0) {
+			dev_err(priv->dev, "failed to handle result (%d)", ndesc);
+			return;
+		}
+
+		writel(EIP197_xDR_PROC_xD_PKT(1) |
+		       EIP197_xDR_PROC_xD_COUNT(ndesc * priv->config.rd_offset),
+		       priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_PROC_COUNT);
+
+		if (should_complete) {
+			local_bh_disable();
+			sreq->req->complete(sreq->req, ret);
+			local_bh_enable();
+		}
+
+		kfree(sreq);
+	}
+}
+
+static void safexcel_handle_result_work(struct work_struct *work)
+{
+	struct safexcel_work_data *data =
+			container_of(work, struct safexcel_work_data, work);
+	struct safexcel_crypto_priv *priv = data->priv;
+
+	safexcel_handle_result_descriptor(priv, data->ring);
+
+	if (priv->ring[data->ring].need_dequeue)
+		safexcel_dequeue(data->priv, data->ring);
+}
+
+struct safexcel_ring_irq_data {
+	struct safexcel_crypto_priv *priv;
+	int ring;
+};
+
+static irqreturn_t safexcel_irq_ring(int irq, void *data)
+{
+	struct safexcel_ring_irq_data *irq_data = data;
+	struct safexcel_crypto_priv *priv = irq_data->priv;
+	int ring = irq_data->ring;
+	u32 status, stat;
+
+	status = readl(priv->base + EIP197_HIA_AIC_R_ENABLED_STAT(ring));
+	if (!status)
+		return IRQ_NONE;
+
+	/* RDR interrupts */
+	if (status & EIP197_RDR_IRQ(ring)) {
+		stat = readl(priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_STAT);
+
+		if (unlikely(stat & EIP197_xDR_ERR)) {
+			/*
+			 * Fatal error, the RDR is unusable and must be
+			 * reinitialized. This should not happen under
+			 * normal circumstances.
+			 */
+			dev_err(priv->dev, "RDR: fatal error.");
+		} else if (likely(stat & EIP197_xDR_THRESH)) {
+			queue_work(priv->ring[ring].workqueue, &priv->ring[ring].work_data.work);
+		}
+
+		/* ACK the interrupts */
+		writel(stat & 0xff,
+		       priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_STAT);
+	}
+
+	/* ACK the interrupts */
+	writel(status, priv->base + EIP197_HIA_AIC_R_ACK(ring));
+
+	return IRQ_HANDLED;
+}
+
+static int safexcel_request_ring_irq(struct platform_device *pdev, const char *name,
+				     irq_handler_t handler,
+				     struct safexcel_ring_irq_data *ring_irq_priv)
+{
+	int ret, irq = platform_get_irq_byname(pdev, name);
+
+	if (irq < 0) {
+		dev_err(&pdev->dev, "unable to get IRQ '%s'\n", name);
+		return irq;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq, handler, 0,
+			       dev_name(&pdev->dev), ring_irq_priv);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to request IRQ %d\n", irq);
+		return ret;
+	}
+
+	return irq;
+}
+
+static struct safexcel_alg_template *safexcel_algs[] = {
+	&safexcel_alg_ecb_aes,
+	&safexcel_alg_cbc_aes,
+	&safexcel_alg_sha1,
+	&safexcel_alg_sha224,
+	&safexcel_alg_sha256,
+	&safexcel_alg_hmac_sha1,
+};
+
+static int safexcel_register_algorithms(struct safexcel_crypto_priv *priv)
+{
+	int i, j, ret = 0;
+
+	for (i = 0; i < ARRAY_SIZE(safexcel_algs); i++) {
+		safexcel_algs[i]->priv = priv;
+
+		if (safexcel_algs[i]->type == SAFEXCEL_ALG_TYPE_SKCIPHER)
+			ret = crypto_register_skcipher(&safexcel_algs[i]->alg.skcipher);
+		else
+			ret = crypto_register_ahash(&safexcel_algs[i]->alg.ahash);
+
+		if (ret)
+			goto fail;
+	}
+
+	return 0;
+
+fail:
+	for (j = 0; j < i; j++) {
+		if (safexcel_algs[j]->type == SAFEXCEL_ALG_TYPE_SKCIPHER)
+			crypto_unregister_skcipher(&safexcel_algs[j]->alg.skcipher);
+		else
+			crypto_unregister_ahash(&safexcel_algs[j]->alg.ahash);
+	}
+
+	return ret;
+}
+
+static void safexcel_unregister_algorithms(struct safexcel_crypto_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(safexcel_algs); i++) {
+		if (safexcel_algs[i]->type == SAFEXCEL_ALG_TYPE_SKCIPHER)
+			crypto_unregister_skcipher(&safexcel_algs[i]->alg.skcipher);
+		else
+			crypto_unregister_ahash(&safexcel_algs[i]->alg.ahash);
+	}
+}
+
+static void safexcel_configure(struct safexcel_crypto_priv *priv)
+{
+	u32 val, mask;
+
+	val = readl(priv->base + EIP197_HIA_OPTIONS);
+	val = (val & GENMASK(27, 25)) >> 25;
+	mask = BIT(val) - 1;
+
+	val = readl(priv->base + EIP197_HIA_OPTIONS);
+	priv->config.rings = min_t(u32, val & GENMASK(3, 0), max_rings);
+
+	priv->config.cd_size = (sizeof(struct safexcel_command_desc) / sizeof(u32));
+	priv->config.cd_offset = (priv->config.cd_size + mask) & ~mask;
+
+	priv->config.rd_size = (sizeof(struct safexcel_result_desc) / sizeof(u32));
+	priv->config.rd_offset = (priv->config.rd_size + mask) & ~mask;
+}
+
+static int safexcel_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	struct safexcel_crypto_priv *priv;
+	u64 dma_mask;
+	int i, ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->dev = dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(priv->base)) {
+		dev_err(dev, "failed to get resource\n");
+		return PTR_ERR(priv->base);
+	}
+
+	priv->clk = of_clk_get(dev->of_node, 0);
+	if (!IS_ERR(priv->clk)) {
+		ret = clk_prepare_enable(priv->clk);
+		if (ret) {
+			dev_err(dev, "unable to enable clk (%d)\n", ret);
+			return ret;
+		}
+	} else {
+		/* The clock isn't mandatory */
+		if (PTR_ERR(priv->clk) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+	}
+
+	if (of_property_read_u64(dev->of_node, "dma-mask", &dma_mask))
+		dma_mask = DMA_BIT_MASK(64);
+	ret = dma_set_mask_and_coherent(dev, dma_mask);
+	if (ret)
+		goto err_clk;
+
+	priv->context_pool = dmam_pool_create("safexcel-context", dev,
+					      sizeof(struct safexcel_context_record),
+					      1, 0);
+	if (!priv->context_pool) {
+		ret = -ENOMEM;
+		goto err_clk;
+	}
+
+	safexcel_configure(priv);
+
+	for (i = 0; i < priv->config.rings; i++) {
+		char irq_name[6] = {0}; /* "ringX\0" */
+		char wq_name[9] = {0}; /* "wq_ringX\0" */
+		int irq;
+		struct safexcel_ring_irq_data *ring_irq;
+
+		ret = safexcel_init_ring_descriptors(priv,
+						     &priv->ring[i].cdr,
+						     &priv->ring[i].rdr);
+		if (ret)
+			goto err_clk;
+
+		ring_irq = devm_kzalloc(dev, sizeof(*ring_irq), GFP_KERNEL);
+		if (!ring_irq) {
+			ret = -ENOMEM;
+			goto err_clk;
+		}
+
+		ring_irq->priv = priv;
+		ring_irq->ring = i;
+
+		snprintf(irq_name, 6, "ring%d", i);
+		irq = safexcel_request_ring_irq(pdev, irq_name, safexcel_irq_ring,
+						ring_irq);
+
+		if (irq < 0)
+			goto err_clk;
+
+		priv->ring[i].work_data.priv = priv;
+		priv->ring[i].work_data.ring = i;
+		INIT_WORK(&priv->ring[i].work_data.work, safexcel_handle_result_work);
+
+		snprintf(wq_name, 9, "wq_ring%d", i);
+		priv->ring[i].workqueue = create_singlethread_workqueue(wq_name);
+		if (!priv->ring[i].workqueue) {
+			ret = -ENOMEM;
+			goto err_clk;
+		}
+
+		crypto_init_queue(&priv->ring[i].queue,
+				  EIP197_DEFAULT_RING_SIZE);
+
+		INIT_LIST_HEAD(&priv->ring[i].list);
+		spin_lock_init(&priv->ring[i].lock);
+		spin_lock_init(&priv->ring[i].egress_lock);
+		spin_lock_init(&priv->ring[i].queue_lock);
+	}
+
+	platform_set_drvdata(pdev, priv);
+	atomic_set(&priv->ring_used, 0);
+
+	ret = safexcel_hw_init(priv);
+	if (ret) {
+		dev_err(dev, "EIP h/w init failed (%d)\n", ret);
+		goto err_clk;
+	}
+
+	ret = safexcel_register_algorithms(priv);
+	if (ret) {
+		dev_err(dev, "Failed to register algorithms (%d)\n", ret);
+		goto err_clk;
+	}
+
+	return 0;
+
+err_clk:
+	clk_disable_unprepare(priv->clk);
+	return ret;
+}
+
+
+static int safexcel_remove(struct platform_device *pdev)
+{
+	struct safexcel_crypto_priv *priv = platform_get_drvdata(pdev);
+	int i;
+
+	safexcel_unregister_algorithms(priv);
+	clk_disable_unprepare(priv->clk);
+
+	for (i = 0; i < priv->config.rings; i++)
+		destroy_workqueue(priv->ring[i].workqueue);
+
+	return 0;
+}
+
+static const struct of_device_id safexcel_of_match_table[] = {
+	{ .compatible = "inside-secure,safexcel-eip197" },
+	{},
+};
+
+
+static struct platform_driver  crypto_safexcel = {
+	.probe		= safexcel_probe,
+	.remove		= safexcel_remove,
+	.driver		= {
+		.name	= "crypto-safexcel",
+		.of_match_table = safexcel_of_match_table,
+	},
+};
+module_platform_driver(crypto_safexcel);
+
+MODULE_AUTHOR("Antoine Tenart <antoine.tenart@free-electrons.com>");
+MODULE_AUTHOR("Ofer Heifetz <oferh@marvell.com>");
+MODULE_AUTHOR("Igal Liberman <igall@marvell.com>");
+MODULE_DESCRIPTION("Support for SafeXcel cryptographic engine EIP197");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h
new file mode 100644
index 0000000..304c583
--- /dev/null
+++ b/drivers/crypto/inside-secure/safexcel.h
@@ -0,0 +1,574 @@
+/*
+ * Copyright (C) 2017 Marvell
+ *
+ * Antoine Tenart <antoine.tenart@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __SAFEXCEL_H__
+#define __SAFEXCEL_H__
+
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/skcipher.h>
+
+#define EIP197_HIA_VERSION_LE			0xca35
+#define EIP197_HIA_VERSION_BE			0x35ca
+
+/* Static configuration */
+#define EIP197_DEFAULT_RING_SIZE		64
+#define EIP197_MAX_TOKENS			5
+#define EIP197_MAX_RINGS			4
+#define EIP197_FETCH_COUNT			1
+#define EIP197_MAX_BATCH_SZ			EIP197_DEFAULT_RING_SIZE
+
+#define EIP197_GFP_FLAGS(base)	((base).flags & CRYPTO_TFM_REQ_MAY_SLEEP ? \
+				 GFP_KERNEL : GFP_ATOMIC)
+
+/* CDR/RDR register offsets */
+#define EIP197_HIA_xDR_OFF(r)			(0x80000 + (r) * 0x1000)
+#define EIP197_HIA_CDR(r)			(EIP197_HIA_xDR_OFF(r))
+#define EIP197_HIA_RDR(r)			(EIP197_HIA_xDR_OFF(r) + 0x800)
+#define EIP197_HIA_xDR_RING_BASE_ADDR_LO	0x0
+#define EIP197_HIA_xDR_RING_BASE_ADDR_HI	0x4
+#define EIP197_HIA_xDR_RING_SIZE		0x18
+#define EIP197_HIA_xDR_DESC_SIZE		0x1c
+#define EIP197_HIA_xDR_CFG			0x20
+#define EIP197_HIA_xDR_DMA_CFG			0x24
+#define EIP197_HIA_xDR_THRESH			0x28
+#define EIP197_HIA_xDR_PREP_COUNT		0x2c
+#define EIP197_HIA_xDR_PROC_COUNT		0x30
+#define EIP197_HIA_xDR_PREP_PNTR		0x34
+#define EIP197_HIA_xDR_PROC_PNTR		0x38
+#define EIP197_HIA_xDR_STAT			0x3c
+
+/* register offsets */
+#define EIP197_HIA_DFE_CFG			0x8c000
+#define EIP197_HIA_DFE_THR_CTRL			0x8c040
+#define EIP197_HIA_DFE_THR_STAT			0x8c044
+#define EIP197_HIA_DSE_CFG			0x8d000
+#define EIP197_HIA_DSE_THR_CTRL			0x8d040
+#define EIP197_HIA_DSE_THR_STAT			0x8d044
+#define EIP197_HIA_RA_PE_CTRL			0x90010
+#define EIP197_HIA_RA_PE_STAT			0x90014
+#define EIP197_HIA_AIC_R_OFF(r)			((r) * 0x1000)
+#define EIP197_HIA_AIC_R_ENABLE_CTRL(r)		(0x9e808 - EIP197_HIA_AIC_R_OFF(r))
+#define EIP197_HIA_AIC_R_ENABLED_STAT(r)	(0x9e810 - EIP197_HIA_AIC_R_OFF(r))
+#define EIP197_HIA_AIC_R_ACK(r)			(0x9e810 - EIP197_HIA_AIC_R_OFF(r))
+#define EIP197_HIA_AIC_R_ENABLE_CLR(r)		(0x9e814 - EIP197_HIA_AIC_R_OFF(r))
+#define EIP197_HIA_AIC_G_ENABLE_CTRL		0x9f808
+#define EIP197_HIA_AIC_G_ENABLED_STAT		0x9f810
+#define EIP197_HIA_AIC_G_ACK			0x9f810
+#define EIP197_HIA_MST_CTRL			0x9fff4
+#define EIP197_HIA_OPTIONS			0x9fff8
+#define EIP197_HIA_VERSION			0x9fffc
+#define EIP197_PE_IN_DBUF_THRES			0xa0000
+#define EIP197_PE_IN_TBUF_THRES			0xa0100
+#define EIP197_PE_ICE_SCRATCH_RAM		0xa0800
+#define EIP197_PE_ICE_PUE_CTRL			0xa0c80
+#define EIP197_PE_ICE_SCRATCH_CTRL		0xa0d04
+#define EIP197_PE_ICE_FPP_CTRL			0xa0d80
+#define EIP197_PE_ICE_RAM_CTRL			0xa0ff0
+#define EIP197_PE_EIP96_FUNCTION_EN		0xa1004
+#define EIP197_PE_EIP96_CONTEXT_CTRL		0xa1008
+#define EIP197_PE_EIP96_CONTEXT_STAT		0xa100c
+#define EIP197_PE_OUT_DBUF_THRES		0xa1c00
+#define EIP197_PE_OUT_TBUF_THRES		0xa1d00
+#define EIP197_CLASSIFICATION_RAMS		0xe0000
+#define EIP197_TRC_CTRL				0xf0800
+#define EIP197_TRC_LASTRES			0xf0804
+#define EIP197_TRC_REGINDEX			0xf0808
+#define EIP197_TRC_PARAMS			0xf0820
+#define EIP197_TRC_FREECHAIN			0xf0824
+#define EIP197_TRC_PARAMS2			0xf0828
+#define EIP197_TRC_ECCCTRL			0xf0830
+#define EIP197_TRC_ECCSTAT			0xf0834
+#define EIP197_TRC_ECCADMINSTAT			0xf0838
+#define EIP197_TRC_ECCDATASTAT			0xf083c
+#define EIP197_TRC_ECCDATA			0xf0840
+#define EIP197_CS_RAM_CTRL			0xf7ff0
+#define EIP197_MST_CTRL				0xffff4
+
+/* EIP197_HIA_xDR_DESC_SIZE */
+#define EIP197_xDR_DESC_MODE_64BIT		BIT(31)
+
+/* EIP197_HIA_xDR_DMA_CFG */
+#define EIP197_HIA_xDR_WR_RES_BUF		BIT(22)
+#define EIP197_HIA_xDR_WR_CTRL_BUG		BIT(23)
+#define EIP197_HIA_xDR_WR_OWN_BUF		BIT(24)
+#define EIP197_HIA_xDR_CFG_WR_CACHE(n)		(((n) & 0x7) << 25)
+#define EIP197_HIA_xDR_CFG_RD_CACHE(n)		(((n) & 0x7) << 29)
+
+/* EIP197_HIA_CDR_THRESH */
+#define EIP197_HIA_CDR_THRESH_PROC_PKT(n)	(n)
+#define EIP197_HIA_CDR_THRESH_PROC_MODE		BIT(22)
+#define EIP197_HIA_CDR_THRESH_PKT_MODE		BIT(23)
+#define EIP197_HIA_CDR_THRESH_TIMEOUT(n)	((n) << 24) /* x256 clk cycles */
+
+/* EIP197_HIA_RDR_THRESH */
+#define EIP197_HIA_RDR_THRESH_PROC_PKT(n)	(n)
+#define EIP197_HIA_RDR_THRESH_PKT_MODE		BIT(23)
+#define EIP197_HIA_RDR_THRESH_TIMEOUT(n)	((n) << 24) /* x256 clk cycles */
+
+/* EIP197_HIA_xDR_PREP_COUNT */
+#define EIP197_xDR_PREP_CLR_COUNT		BIT(31)
+
+/* EIP197_HIA_xDR_PROC_COUNT */
+#define EIP197_xDR_PROC_xD_COUNT(n)		((n) << 2)
+#define EIP197_xDR_PROC_xD_PKT(n)		((n) << 24)
+#define EIP197_xDR_PROC_CLR_COUNT		BIT(31)
+
+/* EIP197_HIA_xDR_STAT */
+#define EIP197_xDR_DMA_ERR			BIT(0)
+#define EIP197_xDR_PREP_CMD_THRES		BIT(1)
+#define EIP197_xDR_ERR				BIT(2)
+#define EIP197_xDR_THRESH			BIT(4)
+#define EIP197_xDR_TIMEOUT			BIT(5)
+
+#define EIP197_HIA_RA_PE_CTRL_RESET		BIT(31)
+#define EIP197_HIA_RA_PE_CTRL_EN		BIT(30)
+
+/* EIP197_HIA_AIC_R_ENABLE_CTRL */
+#define EIP197_CDR_IRQ(n)			BIT((n) * 2)
+#define EIP197_RDR_IRQ(n)			BIT((n) * 2 + 1)
+
+/* EIP197_HIA_DFE/DSE_CFG */
+#define EIP197_HIA_DxE_CFG_MIN_DATA_SIZE(n)	((n) << 0)
+#define EIP197_HIA_DxE_CFG_DATA_CACHE_CTRL(n)	(((n) & 0x7) << 4)
+#define EIP197_HIA_DxE_CFG_MAX_DATA_SIZE(n)	((n) << 8)
+#define EIP197_HIA_DSE_CFG_ALLWAYS_BUFFERABLE	GENMASK(15, 14)
+#define EIP197_HIA_DxE_CFG_MIN_CTRL_SIZE(n)	((n) << 16)
+#define EIP197_HIA_DxE_CFG_CTRL_CACHE_CTRL(n)	(((n) & 0x7) << 20)
+#define EIP197_HIA_DxE_CFG_MAX_CTRL_SIZE(n)	((n) << 24)
+#define EIP197_HIA_DFE_CFG_DIS_DEBUG		(BIT(31) | BIT(29))
+#define EIP197_HIA_DSE_CFG_EN_SINGLE_WR		BIT(29)
+#define EIP197_HIA_DSE_CFG_DIS_DEBUG		BIT(31)
+
+/* EIP197_HIA_DFE/DSE_THR_CTRL */
+#define EIP197_DxE_THR_CTRL_EN			BIT(30)
+#define EIP197_DxE_THR_CTRL_RESET_PE		BIT(31)
+
+/* EIP197_HIA_AIC_G_ENABLED_STAT */
+#define EIP197_G_IRQ_DFE(n)			BIT((n) << 1)
+#define EIP197_G_IRQ_DSE(n)			BIT(((n) << 1) + 1)
+#define EIP197_G_IRQ_RING			BIT(16)
+#define EIP197_G_IRQ_PE(n)			BIT((n) + 20)
+
+/* EIP197_HIA_MST_CTRL */
+#define RD_CACHE_3BITS				0x5
+#define WR_CACHE_3BITS				0x3
+#define RD_CACHE_4BITS				(RD_CACHE_3BITS << 1 | BIT(0))
+#define WR_CACHE_4BITS				(WR_CACHE_3BITS << 1 | BIT(0))
+#define EIP197_MST_CTRL_RD_CACHE(n)		(((n) & 0xf) << 0)
+#define EIP197_MST_CTRL_WD_CACHE(n)		(((n) & 0xf) << 4)
+#define EIP197_MST_CTRL_BYTE_SWAP		BIT(24)
+#define EIP197_MST_CTRL_NO_BYTE_SWAP		BIT(25)
+
+/* EIP197_PE_IN_DBUF/TBUF_THRES */
+#define EIP197_PE_IN_xBUF_THRES_MIN(n)		((n) << 8)
+#define EIP197_PE_IN_xBUF_THRES_MAX(n)		((n) << 12)
+
+/* EIP197_PE_OUT_DBUF_THRES */
+#define EIP197_PE_OUT_DBUF_THRES_MIN(n)		((n) << 0)
+#define EIP197_PE_OUT_DBUF_THRES_MAX(n)		((n) << 4)
+
+/* EIP197_PE_ICE_SCRATCH_CTRL */
+#define EIP197_PE_ICE_SCRATCH_CTRL_CHANGE_TIMER		BIT(2)
+#define EIP197_PE_ICE_SCRATCH_CTRL_TIMER_EN		BIT(3)
+#define EIP197_PE_ICE_SCRATCH_CTRL_CHANGE_ACCESS	BIT(24)
+#define EIP197_PE_ICE_SCRATCH_CTRL_SCRATCH_ACCESS	BIT(25)
+
+/* EIP197_PE_ICE_SCRATCH_RAM */
+#define EIP197_NUM_OF_SCRATCH_BLOCKS		32
+
+/* EIP197_PE_ICE_PUE/FPP_CTRL */
+#define EIP197_PE_ICE_x_CTRL_SW_RESET			BIT(0)
+#define EIP197_PE_ICE_x_CTRL_CLR_ECC_NON_CORR		BIT(14)
+#define EIP197_PE_ICE_x_CTRL_CLR_ECC_CORR		BIT(15)
+
+/* EIP197_PE_ICE_RAM_CTRL */
+#define EIP197_PE_ICE_RAM_CTRL_PUE_PROG_EN	BIT(0)
+#define EIP197_PE_ICE_RAM_CTRL_FPP_PROG_EN	BIT(1)
+
+/* EIP197_PE_EIP96_FUNCTION_EN */
+#define EIP197_FUNCTION_RSVD			(BIT(6) | BIT(15) | BIT(20) | BIT(23))
+#define EIP197_PROTOCOL_HASH_ONLY		BIT(0)
+#define EIP197_PROTOCOL_ENCRYPT_ONLY		BIT(1)
+#define EIP197_PROTOCOL_HASH_ENCRYPT		BIT(2)
+#define EIP197_PROTOCOL_HASH_DECRYPT		BIT(3)
+#define EIP197_PROTOCOL_ENCRYPT_HASH		BIT(4)
+#define EIP197_PROTOCOL_DECRYPT_HASH		BIT(5)
+#define EIP197_ALG_ARC4				BIT(7)
+#define EIP197_ALG_AES_ECB			BIT(8)
+#define EIP197_ALG_AES_CBC			BIT(9)
+#define EIP197_ALG_AES_CTR_ICM			BIT(10)
+#define EIP197_ALG_AES_OFB			BIT(11)
+#define EIP197_ALG_AES_CFB			BIT(12)
+#define EIP197_ALG_DES_ECB			BIT(13)
+#define EIP197_ALG_DES_CBC			BIT(14)
+#define EIP197_ALG_DES_OFB			BIT(16)
+#define EIP197_ALG_DES_CFB			BIT(17)
+#define EIP197_ALG_3DES_ECB			BIT(18)
+#define EIP197_ALG_3DES_CBC			BIT(19)
+#define EIP197_ALG_3DES_OFB			BIT(21)
+#define EIP197_ALG_3DES_CFB			BIT(22)
+#define EIP197_ALG_MD5				BIT(24)
+#define EIP197_ALG_HMAC_MD5			BIT(25)
+#define EIP197_ALG_SHA1				BIT(26)
+#define EIP197_ALG_HMAC_SHA1			BIT(27)
+#define EIP197_ALG_SHA2				BIT(28)
+#define EIP197_ALG_HMAC_SHA2			BIT(29)
+#define EIP197_ALG_AES_XCBC_MAC			BIT(30)
+#define EIP197_ALG_GCM_HASH			BIT(31)
+
+/* EIP197_PE_EIP96_CONTEXT_CTRL */
+#define EIP197_CONTEXT_SIZE(n)			(n)
+#define EIP197_ADDRESS_MODE			BIT(8)
+#define EIP197_CONTROL_MODE			BIT(9)
+
+/* Context Control */
+struct safexcel_context_record {
+	u32 control0;
+	u32 control1;
+
+	__le32 data[12];
+} __packed;
+
+/* control0 */
+#define CONTEXT_CONTROL_TYPE_NULL_OUT		0x0
+#define CONTEXT_CONTROL_TYPE_NULL_IN		0x1
+#define CONTEXT_CONTROL_TYPE_HASH_OUT		0x2
+#define CONTEXT_CONTROL_TYPE_HASH_IN		0x3
+#define CONTEXT_CONTROL_TYPE_CRYPTO_OUT		0x4
+#define CONTEXT_CONTROL_TYPE_CRYPTO_IN		0x5
+#define CONTEXT_CONTROL_TYPE_ENCRYPT_HASH_OUT	0x6
+#define CONTEXT_CONTROL_TYPE_DECRYPT_HASH_IN	0x7
+#define CONTEXT_CONTROL_TYPE_HASH_ENCRYPT_OUT	0x14
+#define CONTEXT_CONTROL_TYPE_HASH_DECRYPT_OUT	0x15
+#define CONTEXT_CONTROL_RESTART_HASH		BIT(4)
+#define CONTEXT_CONTROL_NO_FINISH_HASH		BIT(5)
+#define CONTEXT_CONTROL_SIZE(n)			((n) << 8)
+#define CONTEXT_CONTROL_KEY_EN			BIT(16)
+#define CONTEXT_CONTROL_CRYPTO_ALG_AES128	(0x5 << 17)
+#define CONTEXT_CONTROL_CRYPTO_ALG_AES192	(0x6 << 17)
+#define CONTEXT_CONTROL_CRYPTO_ALG_AES256	(0x7 << 17)
+#define CONTEXT_CONTROL_DIGEST_PRECOMPUTED	(0x1 << 21)
+#define CONTEXT_CONTROL_DIGEST_HMAC		(0x3 << 21)
+#define CONTEXT_CONTROL_CRYPTO_ALG_SHA1		(0x2 << 23)
+#define CONTEXT_CONTROL_CRYPTO_ALG_SHA224	(0x4 << 23)
+#define CONTEXT_CONTROL_CRYPTO_ALG_SHA256	(0x3 << 23)
+#define CONTEXT_CONTROL_INV_FR			(0x5 << 24)
+#define CONTEXT_CONTROL_INV_TR			(0x6 << 24)
+
+/* control1 */
+#define CONTEXT_CONTROL_CRYPTO_MODE_ECB		(0 << 0)
+#define CONTEXT_CONTROL_CRYPTO_MODE_CBC		(1 << 0)
+#define CONTEXT_CONTROL_IV0			BIT(5)
+#define CONTEXT_CONTROL_IV1			BIT(6)
+#define CONTEXT_CONTROL_IV2			BIT(7)
+#define CONTEXT_CONTROL_IV3			BIT(8)
+#define CONTEXT_CONTROL_DIGEST_CNT		BIT(9)
+#define CONTEXT_CONTROL_COUNTER_MODE		BIT(10)
+#define CONTEXT_CONTROL_HASH_STORE		BIT(19)
+
+/* EIP197_CS_RAM_CTRL */
+#define EIP197_TRC_ENABLE_0			BIT(4)
+#define EIP197_TRC_ENABLE_1			BIT(5)
+#define EIP197_TRC_ENABLE_2			BIT(6)
+#define EIP197_TRC_ENABLE_MASK			GENMASK(6, 4)
+
+/* EIP197_TRC_PARAMS */
+#define EIP197_TRC_PARAMS_SW_RESET		BIT(0)
+#define EIP197_TRC_PARAMS_DATA_ACCESS		BIT(2)
+#define EIP197_TRC_PARAMS_HTABLE_SZ(x)		((x) << 4)
+#define EIP197_TRC_PARAMS_BLK_TIMER_SPEED(x)	((x) << 10)
+#define EIP197_TRC_PARAMS_RC_SZ_LARGE(n)	((n) << 18)
+
+/* EIP197_TRC_FREECHAIN */
+#define EIP197_TRC_FREECHAIN_HEAD_PTR(p)	(p)
+#define EIP197_TRC_FREECHAIN_TAIL_PTR(p)	((p) << 16)
+
+/* EIP197_TRC_PARAMS2 */
+#define EIP197_TRC_PARAMS2_HTABLE_PTR(p)	(p)
+#define EIP197_TRC_PARAMS2_RC_SZ_SMALL(n)	((n) << 18)
+
+/* Cache helpers */
+#define EIP197_CS_RC_MAX			52
+#define EIP197_CS_RC_SIZE			(4 * sizeof(u32))
+#define EIP197_CS_RC_NEXT(x)			(x)
+#define EIP197_CS_RC_PREV(x)			((x) << 10)
+#define EIP197_RC_NULL				0x3ff
+#define EIP197_CS_TRC_REC_WC			59
+#define EIP197_CS_TRC_LG_REC_WC			73
+
+/* Result data */
+struct result_data_desc {
+	u32 packet_length:17;
+	u32 error_code:15;
+
+	u8 bypass_length:4;
+	u8 e15:1;
+	u16 rsvd0;
+	u8 hash_bytes:1;
+	u8 hash_length:6;
+	u8 generic_bytes:1;
+	u8 checksum:1;
+	u8 next_header:1;
+	u8 length:1;
+
+	u16 application_id;
+	u16 rsvd1;
+
+	u32 rsvd2;
+} __packed;
+
+
+/* Basic Result Descriptor format */
+struct safexcel_result_desc {
+	u32 particle_size:17;
+	u8 rsvd0:3;
+	u8 descriptor_overflow:1;
+	u8 buffer_overflow:1;
+	u8 last_seg:1;
+	u8 first_seg:1;
+	u16 result_size:8;
+
+	u32 rsvd1;
+
+	u32 data_lo;
+	u32 data_hi;
+
+	struct result_data_desc result_data;
+} __packed;
+
+struct safexcel_token {
+	u32 packet_length:17;
+	u8 stat:2;
+	u16 instructions:9;
+	u8 opcode:4;
+} __packed;
+
+#define EIP197_TOKEN_STAT_LAST_HASH		BIT(0)
+#define EIP197_TOKEN_STAT_LAST_PACKET		BIT(1)
+#define EIP197_TOKEN_OPCODE_DIRECTION		0x0
+#define EIP197_TOKEN_OPCODE_INSERT		0x2
+#define EIP197_TOKEN_OPCODE_NOOP		EIP197_TOKEN_OPCODE_INSERT
+#define EIP197_TOKEN_OPCODE_BYPASS		GENMASK(3, 0)
+
+static inline void eip197_noop_token(struct safexcel_token *token)
+{
+	token->opcode = EIP197_TOKEN_OPCODE_NOOP;
+	token->packet_length = BIT(2);
+}
+
+/* Instructions */
+#define EIP197_TOKEN_INS_INSERT_HASH_DIGEST	0x1c
+#define EIP197_TOKEN_INS_TYPE_OUTPUT		BIT(5)
+#define EIP197_TOKEN_INS_TYPE_HASH		BIT(6)
+#define EIP197_TOKEN_INS_TYPE_CRYTO		BIT(7)
+#define EIP197_TOKEN_INS_LAST			BIT(8)
+
+/* Processing Engine Control Data  */
+struct safexcel_control_data_desc {
+	u32 packet_length:17;
+	u16 options:13;
+	u8 type:2;
+
+	u16 application_id;
+	u16 rsvd;
+
+	u8 refresh:2;
+	u32 context_lo:30;
+	u32 context_hi;
+
+	u32 control0;
+	u32 control1;
+
+	u32 token[EIP197_MAX_TOKENS];
+} __packed;
+
+#define EIP197_OPTION_MAGIC_VALUE	BIT(0)
+#define EIP197_OPTION_64BIT_CTX		BIT(1)
+#define EIP197_OPTION_CTX_CTRL_IN_CMD	BIT(8)
+#define EIP197_OPTION_4_TOKEN_IV_CMD	GENMASK(11, 9)
+
+#define EIP197_TYPE_EXTENDED		0x3
+
+/* Basic Command Descriptor format */
+struct safexcel_command_desc {
+	u32 particle_size:17;
+	u8 rsvd0:5;
+	u8 last_seg:1;
+	u8 first_seg:1;
+	u16 additional_cdata_size:8;
+
+	u32 rsvd1;
+
+	u32 data_lo;
+	u32 data_hi;
+
+	struct safexcel_control_data_desc control_data;
+} __packed;
+
+/*
+ * Internal structures & functions
+ */
+
+enum eip197_fw {
+	FW_IFPP = 0,
+	FW_IPUE,
+	FW_NB
+};
+
+struct safexcel_ring {
+	void *base;
+	void *base_end;
+	dma_addr_t base_dma;
+
+	/* write and read pointers */
+	void *write;
+	void *read;
+
+	/* number of elements used in the ring */
+	unsigned nr;
+	unsigned offset;
+};
+
+enum safexcel_alg_type {
+	SAFEXCEL_ALG_TYPE_SKCIPHER,
+	SAFEXCEL_ALG_TYPE_AHASH,
+};
+
+struct safexcel_request {
+	struct list_head list;
+	struct crypto_async_request *req;
+};
+
+struct safexcel_config {
+	u32 rings;
+
+	u32 cd_size;
+	u32 cd_offset;
+
+	u32 rd_size;
+	u32 rd_offset;
+};
+
+struct safexcel_work_data {
+	struct work_struct work;
+	struct safexcel_crypto_priv *priv;
+	int ring;
+};
+
+struct safexcel_crypto_priv {
+	void __iomem *base;
+	struct device *dev;
+	struct clk *clk;
+	struct safexcel_config config;
+
+	/* context DMA pool */
+	struct dma_pool *context_pool;
+
+	atomic_t ring_used;
+
+	struct {
+		spinlock_t lock;
+		spinlock_t egress_lock;
+
+		struct list_head list;
+		struct workqueue_struct *workqueue;
+		struct safexcel_work_data work_data;
+
+		/* command/result rings */
+		struct safexcel_ring cdr;
+		struct safexcel_ring rdr;
+
+		/* queue */
+		struct crypto_queue queue;
+		spinlock_t queue_lock;
+		bool need_dequeue;
+	} ring[EIP197_MAX_RINGS];
+};
+
+struct safexcel_context {
+	int (*send)(struct crypto_async_request *req, int ring,
+		    struct safexcel_request *request, int *commands,
+		    int *results);
+	int (*handle_result)(struct safexcel_crypto_priv *priv, int ring,
+			     struct crypto_async_request *req, bool *complete,
+			     int *ret);
+	struct safexcel_context_record *ctxr;
+	dma_addr_t ctxr_dma;
+
+	int ring;
+	bool needs_inv;
+	bool exit_inv;
+
+	/* Used for ahash requests */
+	dma_addr_t result_dma;
+	void *cache;
+	dma_addr_t cache_dma;
+	unsigned int cache_sz;
+};
+
+/*
+ * Template structure to describe the algorithms in order to register them.
+ * It also has the purpose to contain our private structure and is actually
+ * the only way I know in this framework to avoid having global pointers...
+ */
+struct safexcel_alg_template {
+	struct safexcel_crypto_priv *priv;
+	enum safexcel_alg_type type;
+	union {
+		struct skcipher_alg skcipher;
+		struct ahash_alg ahash;
+	} alg;
+};
+
+struct safexcel_inv_result {
+	struct completion completion;
+	int error;
+};
+
+void safexcel_dequeue(struct safexcel_crypto_priv *priv, int ring);
+void safexcel_complete(struct safexcel_crypto_priv *priv, int ring);
+void safexcel_free_context(struct safexcel_crypto_priv *priv,
+				  struct crypto_async_request *req,
+				  int result_sz);
+int safexcel_invalidate_cache(struct crypto_async_request *async,
+			      struct safexcel_context *ctx,
+			      struct safexcel_crypto_priv *priv,
+			      dma_addr_t ctxr_dma, int ring,
+			      struct safexcel_request *request);
+int safexcel_init_ring_descriptors(struct safexcel_crypto_priv *priv,
+				   struct safexcel_ring *cdr,
+				   struct safexcel_ring *rdr);
+int safexcel_select_ring(struct safexcel_crypto_priv *priv);
+void *safexcel_ring_next_rptr(struct safexcel_crypto_priv *priv,
+			      struct safexcel_ring *ring);
+void safexcel_ring_rollback_wptr(struct safexcel_crypto_priv *priv,
+				 struct safexcel_ring *ring);
+struct safexcel_command_desc *safexcel_add_cdesc(struct safexcel_crypto_priv *priv,
+						 int ring_id,
+						 bool first, bool last,
+						 dma_addr_t data, u32 len,
+						 u32 full_data_len,
+						 dma_addr_t context);
+struct safexcel_result_desc *safexcel_add_rdesc(struct safexcel_crypto_priv *priv,
+						 int ring_id,
+						bool first, bool last,
+						dma_addr_t data, u32 len);
+void safexcel_inv_complete(struct crypto_async_request *req, int error);
+
+/* available algorithms */
+extern struct safexcel_alg_template safexcel_alg_ecb_aes;
+extern struct safexcel_alg_template safexcel_alg_cbc_aes;
+extern struct safexcel_alg_template safexcel_alg_sha1;
+extern struct safexcel_alg_template safexcel_alg_sha224;
+extern struct safexcel_alg_template safexcel_alg_sha256;
+extern struct safexcel_alg_template safexcel_alg_hmac_sha1;
+
+#endif
diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c
new file mode 100644
index 0000000..d2207ac
--- /dev/null
+++ b/drivers/crypto/inside-secure/safexcel_cipher.c
@@ -0,0 +1,561 @@
+/*
+ * Copyright (C) 2017 Marvell
+ *
+ * Antoine Tenart <antoine.tenart@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+
+#include <crypto/aes.h>
+#include <crypto/skcipher.h>
+
+#include "safexcel.h"
+
+enum safexcel_cipher_direction {
+	SAFEXCEL_ENCRYPT,
+	SAFEXCEL_DECRYPT,
+};
+
+struct safexcel_cipher_ctx {
+	struct safexcel_context base;
+	struct safexcel_crypto_priv *priv;
+
+	enum safexcel_cipher_direction direction;
+	u32 mode;
+
+	__le32 key[8];
+	unsigned int key_len;
+};
+
+static void safexcel_cipher_token(struct safexcel_cipher_ctx *ctx,
+				  struct crypto_async_request *async,
+				  struct safexcel_command_desc *cdesc,
+				  u32 length)
+{
+	struct skcipher_request *req = skcipher_request_cast(async);
+	struct safexcel_token *token;
+	unsigned offset = 0;
+
+	if (ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CBC) {
+		offset = AES_BLOCK_SIZE / sizeof(u32);
+		memcpy(cdesc->control_data.token, req->iv, AES_BLOCK_SIZE);
+
+		cdesc->control_data.options |= EIP197_OPTION_4_TOKEN_IV_CMD;
+	}
+
+	token = (struct safexcel_token *)(cdesc->control_data.token + offset);
+
+	token[0].opcode = EIP197_TOKEN_OPCODE_DIRECTION;
+	token[0].packet_length = length;
+	token[0].stat = EIP197_TOKEN_STAT_LAST_PACKET;
+	token[0].instructions = EIP197_TOKEN_INS_LAST |
+				EIP197_TOKEN_INS_TYPE_CRYTO |
+				EIP197_TOKEN_INS_TYPE_OUTPUT;
+}
+
+static int safexcel_aes_setkey(struct crypto_skcipher *ctfm, const u8 *key,
+			       unsigned int len)
+{
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(ctfm);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_aes_ctx aes;
+	int ret, i;
+
+	ret = crypto_aes_expand_key(&aes, key, len);
+	if (ret) {
+		crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return ret;
+	}
+
+	for (i = 0; i < len / sizeof(u32); i++) {
+		if (ctx->key[i] != cpu_to_le32(aes.key_enc[i])) {
+			ctx->base.needs_inv = true;
+			break;
+		}
+	}
+
+	for (i = 0; i < len / sizeof(u32); i++)
+		ctx->key[i] = cpu_to_le32(aes.key_enc[i]);
+
+	ctx->key_len = len;
+
+	memzero_explicit(&aes, sizeof(aes));
+	return 0;
+}
+
+static int safexcel_context_control(struct safexcel_cipher_ctx *ctx,
+				    struct safexcel_command_desc *cdesc)
+{
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	int ctrl_size;
+
+	if (ctx->direction == SAFEXCEL_ENCRYPT)
+		cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_CRYPTO_OUT;
+	else
+		cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_CRYPTO_IN;
+
+	cdesc->control_data.control0 |= CONTEXT_CONTROL_KEY_EN;
+	cdesc->control_data.control1 |= ctx->mode;
+
+	switch (ctx->key_len) {
+	case AES_KEYSIZE_128:
+		cdesc->control_data.control0 |= CONTEXT_CONTROL_CRYPTO_ALG_AES128;
+		ctrl_size = 4;
+		break;
+	case AES_KEYSIZE_192:
+		cdesc->control_data.control0 |= CONTEXT_CONTROL_CRYPTO_ALG_AES192;
+		ctrl_size = 6;
+		break;
+	case AES_KEYSIZE_256:
+		cdesc->control_data.control0 |= CONTEXT_CONTROL_CRYPTO_ALG_AES256;
+		ctrl_size = 8;
+		break;
+	default:
+		dev_err(priv->dev, "aes keysize not supported: %u\n",
+			ctx->key_len);
+		return -EINVAL;
+	}
+	cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(ctrl_size);
+
+	return 0;
+}
+
+static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
+				  struct crypto_async_request *async,
+				  bool *should_complete, int *ret)
+{
+	struct skcipher_request *req = skcipher_request_cast(async);
+	struct safexcel_result_desc *rdesc;
+	int ndesc = 0;
+
+	*ret = 0;
+
+	spin_lock_bh(&priv->ring[ring].egress_lock);
+	do {
+		rdesc = safexcel_ring_next_rptr(priv, &priv->ring[ring].rdr);
+		if (IS_ERR(rdesc)) {
+			dev_err(priv->dev,
+				"cipher: result: could not retrieve the result descriptor\n");
+			*ret = PTR_ERR(rdesc);
+			break;
+		}
+
+		if (rdesc->result_data.error_code) {
+			dev_err(priv->dev,
+				"cipher: result: result descriptor error (%d)\n",
+				rdesc->result_data.error_code);
+			*ret = -EIO;
+		}
+
+		ndesc++;
+	} while (!rdesc->last_seg);
+
+	safexcel_complete(priv, ring);
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+	if (req->src == req->dst) {
+		dma_unmap_sg(priv->dev, req->src,
+			     sg_nents_for_len(req->src, req->cryptlen),
+			     DMA_BIDIRECTIONAL);
+	} else {
+		dma_unmap_sg(priv->dev, req->src,
+			     sg_nents_for_len(req->src, req->cryptlen),
+			     DMA_TO_DEVICE);
+		dma_unmap_sg(priv->dev, req->dst,
+			     sg_nents_for_len(req->dst, req->cryptlen),
+			     DMA_FROM_DEVICE);
+	}
+
+	*should_complete = true;
+
+	return ndesc;
+}
+
+static int safexcel_aes_send(struct crypto_async_request *async,
+			     int ring, struct safexcel_request *request,
+			     int *commands, int *results)
+{
+	struct skcipher_request *req = skcipher_request_cast(async);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	struct safexcel_command_desc *cdesc;
+	struct safexcel_result_desc *rdesc;
+	struct scatterlist *sg;
+	int nr_src, nr_dst, n_cdesc = 0, n_rdesc = 0, queued = req->cryptlen;
+	int i, ret = 0;
+
+	if (req->src == req->dst) {
+		nr_src = dma_map_sg(priv->dev, req->src,
+				    sg_nents_for_len(req->src, req->cryptlen),
+				    DMA_BIDIRECTIONAL);
+		nr_dst = nr_src;
+		if (!nr_src)
+			return -EINVAL;
+	} else {
+		nr_src = dma_map_sg(priv->dev, req->src,
+				    sg_nents_for_len(req->src, req->cryptlen),
+				    DMA_TO_DEVICE);
+		if (!nr_src)
+			return -EINVAL;
+
+		nr_dst = dma_map_sg(priv->dev, req->dst,
+				    sg_nents_for_len(req->dst, req->cryptlen),
+				    DMA_FROM_DEVICE);
+		if (!nr_dst) {
+			dma_unmap_sg(priv->dev, req->src,
+				     sg_nents_for_len(req->src, req->cryptlen),
+				     DMA_TO_DEVICE);
+			return -EINVAL;
+		}
+	}
+
+	memcpy(ctx->base.ctxr->data, ctx->key, ctx->key_len);
+
+	spin_lock_bh(&priv->ring[ring].egress_lock);
+
+	/* command descriptors */
+	for_each_sg(req->src, sg, nr_src, i) {
+		int len = sg_dma_len(sg);
+
+		/* Do not overflow the request */
+		if (queued - len < 0)
+			len = queued;
+
+		cdesc = safexcel_add_cdesc(priv, ring, !n_cdesc, !(queued - len),
+					   sg_dma_address(sg), len, req->cryptlen,
+					   ctx->base.ctxr_dma);
+		if (IS_ERR(cdesc)) {
+			/* No space left in the command descriptor ring */
+			ret = PTR_ERR(cdesc);
+			goto cdesc_rollback;
+		}
+		n_cdesc++;
+
+		if (n_cdesc == 1) {
+			safexcel_context_control(ctx, cdesc);
+			safexcel_cipher_token(ctx, async, cdesc, req->cryptlen);
+		}
+
+		queued -= len;
+		if (!queued)
+			break;
+	}
+
+	/* result descriptors */
+	for_each_sg(req->dst, sg, nr_dst, i) {
+		bool first = !i, last = (i == nr_dst - 1);
+		u32 len = sg_dma_len(sg);
+
+		rdesc = safexcel_add_rdesc(priv, ring, first, last,
+					   sg_dma_address(sg), len);
+		if (IS_ERR(rdesc)) {
+			/* No space left in the result descriptor ring */
+			ret = PTR_ERR(rdesc);
+			goto rdesc_rollback;
+		}
+		n_rdesc++;
+	}
+
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+	request->req = &req->base;
+	ctx->base.handle_result = safexcel_handle_result;
+
+	*commands = n_cdesc;
+	*results = n_rdesc;
+	return 0;
+
+rdesc_rollback:
+	for (i = 0; i < n_rdesc; i++)
+		safexcel_ring_rollback_wptr(priv, &priv->ring[ring].rdr);
+cdesc_rollback:
+	for (i = 0; i < n_cdesc; i++)
+		safexcel_ring_rollback_wptr(priv, &priv->ring[ring].cdr);
+
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+	if (req->src == req->dst) {
+		dma_unmap_sg(priv->dev, req->src,
+			     sg_nents_for_len(req->src, req->cryptlen),
+			     DMA_BIDIRECTIONAL);
+	} else {
+		dma_unmap_sg(priv->dev, req->src,
+			     sg_nents_for_len(req->src, req->cryptlen),
+			     DMA_TO_DEVICE);
+		dma_unmap_sg(priv->dev, req->dst,
+			     sg_nents_for_len(req->dst, req->cryptlen),
+			     DMA_FROM_DEVICE);
+	}
+
+	return ret;
+}
+
+static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
+				      int ring,
+				      struct crypto_async_request *async,
+				      bool *should_complete, int *ret)
+{
+	struct skcipher_request *req = skcipher_request_cast(async);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct safexcel_result_desc *rdesc;
+	int ndesc = 0, enq_ret;
+
+	*ret = 0;
+
+	spin_lock_bh(&priv->ring[ring].egress_lock);
+	do {
+		rdesc = safexcel_ring_next_rptr(priv, &priv->ring[ring].rdr);
+		if (IS_ERR(rdesc)) {
+			dev_err(priv->dev,
+				"cipher: invalidate: could not retrieve the result descriptor\n");
+			*ret = PTR_ERR(rdesc);
+			break;
+		}
+
+		if (rdesc->result_data.error_code) {
+			dev_err(priv->dev, "cipher: invalidate: result descriptor error (%d)\n",
+				rdesc->result_data.error_code);
+			*ret = -EIO;
+		}
+
+		ndesc++;
+	} while (!rdesc->last_seg);
+
+	safexcel_complete(priv, ring);
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+	if (ctx->base.exit_inv) {
+		dma_pool_free(priv->context_pool, ctx->base.ctxr,
+			      ctx->base.ctxr_dma);
+
+		*should_complete = true;
+
+		return ndesc;
+	}
+
+	ring = safexcel_select_ring(priv);
+	ctx->base.ring = ring;
+	ctx->base.needs_inv = false;
+	ctx->base.send = safexcel_aes_send;
+
+	spin_lock_bh(&priv->ring[ring].queue_lock);
+	enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async);
+	spin_unlock_bh(&priv->ring[ring].queue_lock);
+
+	if (enq_ret != -EINPROGRESS)
+		*ret = enq_ret;
+
+	if (!priv->ring[ring].need_dequeue)
+		safexcel_dequeue(priv, ring);
+
+	*should_complete = false;
+
+	return ndesc;
+}
+
+static int safexcel_cipher_send_inv(struct crypto_async_request *async,
+				    int ring, struct safexcel_request *request,
+				    int *commands, int *results)
+{
+	struct skcipher_request *req = skcipher_request_cast(async);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	int ret;
+
+	ctx->base.handle_result = safexcel_handle_inv_result;
+
+	ret = safexcel_invalidate_cache(async, &ctx->base, priv,
+					ctx->base.ctxr_dma, ring, request);
+	if (unlikely(ret))
+		return ret;
+
+	*commands = 1;
+	*results = 1;
+
+	return 0;
+}
+
+static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	struct skcipher_request req;
+	struct safexcel_inv_result result = { 0 };
+	int ring = ctx->base.ring;
+
+	memset(&req, 0, sizeof(struct skcipher_request));
+
+	/* create invalidation request */
+	init_completion(&result.completion);
+	skcipher_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+					safexcel_inv_complete, &result);
+
+	skcipher_request_set_tfm(&req, __crypto_skcipher_cast(tfm));
+	ctx = crypto_tfm_ctx(req.base.tfm);
+	ctx->base.exit_inv = true;
+	ctx->base.send = safexcel_cipher_send_inv;
+
+	spin_lock_bh(&priv->ring[ring].queue_lock);
+	crypto_enqueue_request(&priv->ring[ring].queue, &req.base);
+	spin_unlock_bh(&priv->ring[ring].queue_lock);
+
+	if (!priv->ring[ring].need_dequeue)
+		safexcel_dequeue(priv, ring);
+
+	wait_for_completion_interruptible(&result.completion);
+
+	if (result.error) {
+		dev_warn(priv->dev,
+			"cipher: sync: invalidate: completion error %d\n",
+			 result.error);
+		return result.error;
+	}
+
+	return 0;
+}
+
+static int safexcel_aes(struct skcipher_request *req,
+			enum safexcel_cipher_direction dir, u32 mode)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	int ret, ring;
+
+	ctx->direction = dir;
+	ctx->mode = mode;
+
+	if (ctx->base.ctxr) {
+		if (ctx->base.needs_inv)
+			ctx->base.send = safexcel_cipher_send_inv;
+	} else {
+		ctx->base.ring = safexcel_select_ring(priv);
+		ctx->base.send = safexcel_aes_send;
+
+		ctx->base.ctxr = dma_pool_zalloc(priv->context_pool,
+						 EIP197_GFP_FLAGS(req->base),
+						 &ctx->base.ctxr_dma);
+		if (!ctx->base.ctxr)
+			return -ENOMEM;
+	}
+
+	ring = ctx->base.ring;
+
+	spin_lock_bh(&priv->ring[ring].queue_lock);
+	ret = crypto_enqueue_request(&priv->ring[ring].queue, &req->base);
+	spin_unlock_bh(&priv->ring[ring].queue_lock);
+
+	if (!priv->ring[ring].need_dequeue)
+		safexcel_dequeue(priv, ring);
+
+	return ret;
+}
+
+static int safexcel_ecb_aes_encrypt(struct skcipher_request *req)
+{
+	return safexcel_aes(req, SAFEXCEL_ENCRYPT,
+			    CONTEXT_CONTROL_CRYPTO_MODE_ECB);
+}
+
+static int safexcel_ecb_aes_decrypt(struct skcipher_request *req)
+{
+	return safexcel_aes(req, SAFEXCEL_DECRYPT,
+			    CONTEXT_CONTROL_CRYPTO_MODE_ECB);
+}
+
+static int safexcel_skcipher_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_alg_template *tmpl =
+		container_of(tfm->__crt_alg, struct safexcel_alg_template,
+			     alg.skcipher.base);
+
+	ctx->priv = tmpl->priv;
+
+	return 0;
+}
+
+static void safexcel_skcipher_cra_exit(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	int ret;
+
+	memzero_explicit(ctx->key, 8 * sizeof(u32));
+
+	/* context not allocated, skip invalidation */
+	if (!ctx->base.ctxr)
+		return;
+
+	memzero_explicit(ctx->base.ctxr->data, 8 * sizeof(u32));
+
+	ret = safexcel_cipher_exit_inv(tfm);
+	if (ret)
+		dev_warn(priv->dev, "cipher: invalidation error %d\n", ret);
+}
+
+struct safexcel_alg_template safexcel_alg_ecb_aes = {
+	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
+	.alg.skcipher = {
+		.setkey = safexcel_aes_setkey,
+		.encrypt = safexcel_ecb_aes_encrypt,
+		.decrypt = safexcel_ecb_aes_decrypt,
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.base = {
+			.cra_name = "ecb(aes)",
+			.cra_driver_name = "safexcel-ecb-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_skcipher_cra_init,
+			.cra_exit = safexcel_skcipher_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_cbc_aes_encrypt(struct skcipher_request *req)
+{
+	return safexcel_aes(req, SAFEXCEL_ENCRYPT,
+			    CONTEXT_CONTROL_CRYPTO_MODE_CBC);
+}
+
+static int safexcel_cbc_aes_decrypt(struct skcipher_request *req)
+{
+	return safexcel_aes(req, SAFEXCEL_DECRYPT,
+			    CONTEXT_CONTROL_CRYPTO_MODE_CBC);
+}
+
+struct safexcel_alg_template safexcel_alg_cbc_aes = {
+	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
+	.alg.skcipher = {
+		.setkey = safexcel_aes_setkey,
+		.encrypt = safexcel_cbc_aes_encrypt,
+		.decrypt = safexcel_cbc_aes_decrypt,
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.ivsize = AES_BLOCK_SIZE,
+		.base = {
+			.cra_name = "cbc(aes)",
+			.cra_driver_name = "safexcel-cbc-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_skcipher_cra_init,
+			.cra_exit = safexcel_skcipher_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
new file mode 100644
index 0000000..8527a58
--- /dev/null
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -0,0 +1,1052 @@
+/*
+ * Copyright (C) 2017 Marvell
+ *
+ * Antoine Tenart <antoine.tenart@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <crypto/hmac.h>
+#include <crypto/sha.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+
+
+#include "safexcel.h"
+
+struct safexcel_ahash_ctx {
+	struct safexcel_context base;
+	struct safexcel_crypto_priv *priv;
+
+	u32 alg;
+	u32 digest;
+
+	u32 ipad[SHA1_DIGEST_SIZE / sizeof(u32)];
+	u32 opad[SHA1_DIGEST_SIZE / sizeof(u32)];
+};
+
+struct safexcel_ahash_req {
+	bool last_req;
+	bool finish;
+	bool hmac;
+
+	u8 state_sz;    /* expected sate size, only set once */
+	u32 state[SHA256_DIGEST_SIZE / sizeof(u32)];
+
+	u64 len;
+	u64 processed;
+
+	u8 cache[SHA256_BLOCK_SIZE] __aligned(sizeof(u32));
+	u8 cache_next[SHA256_BLOCK_SIZE] __aligned(sizeof(u32));
+};
+
+struct safexcel_ahash_export_state {
+	u64 len;
+	u64 processed;
+
+	u32 state[SHA256_DIGEST_SIZE / sizeof(u32)];
+	u8 cache[SHA256_BLOCK_SIZE];
+};
+
+static void safexcel_hash_token(struct safexcel_command_desc *cdesc,
+				u32 input_length, u32 result_length)
+{
+	struct safexcel_token *token =
+		(struct safexcel_token *)cdesc->control_data.token;
+
+	token[0].opcode = EIP197_TOKEN_OPCODE_DIRECTION;
+	token[0].packet_length = input_length;
+	token[0].stat = EIP197_TOKEN_STAT_LAST_HASH;
+	token[0].instructions = EIP197_TOKEN_INS_TYPE_HASH;
+
+	token[1].opcode = EIP197_TOKEN_OPCODE_INSERT;
+	token[1].packet_length = result_length;
+	token[1].stat = EIP197_TOKEN_STAT_LAST_HASH |
+			EIP197_TOKEN_STAT_LAST_PACKET;
+	token[1].instructions = EIP197_TOKEN_INS_TYPE_OUTPUT |
+				EIP197_TOKEN_INS_INSERT_HASH_DIGEST;
+}
+
+static void safexcel_context_control(struct safexcel_ahash_ctx *ctx,
+				     struct safexcel_ahash_req *req,
+				     struct safexcel_command_desc *cdesc,
+				     unsigned int digestsize,
+				     unsigned int blocksize)
+{
+	int i;
+
+	cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_HASH_OUT;
+	cdesc->control_data.control0 |= ctx->alg;
+	cdesc->control_data.control0 |= ctx->digest;
+
+	if (ctx->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED) {
+		if (req->processed) {
+			if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA1)
+				cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(6);
+			else if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA224 ||
+				 ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA256)
+				cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(9);
+
+			cdesc->control_data.control1 |= CONTEXT_CONTROL_DIGEST_CNT;
+		} else {
+			cdesc->control_data.control0 |= CONTEXT_CONTROL_RESTART_HASH;
+		}
+
+		if (!req->finish)
+			cdesc->control_data.control0 |= CONTEXT_CONTROL_NO_FINISH_HASH;
+
+		/*
+		 * Copy the input digest if needed, and setup the context
+		 * fields. Do this now as we need it to setup the first command
+		 * descriptor.
+		 */
+		if (req->processed) {
+			for (i = 0; i < digestsize / sizeof(u32); i++)
+				ctx->base.ctxr->data[i] = cpu_to_le32(req->state[i]);
+
+			if (req->finish)
+				ctx->base.ctxr->data[i] = cpu_to_le32(req->processed / blocksize);
+		}
+	} else if (ctx->digest == CONTEXT_CONTROL_DIGEST_HMAC) {
+		cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(10);
+
+		memcpy(ctx->base.ctxr->data, ctx->ipad, digestsize);
+		memcpy(ctx->base.ctxr->data + digestsize / sizeof(u32),
+		       ctx->opad, digestsize);
+	}
+}
+
+static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
+				  struct crypto_async_request *async,
+				  bool *should_complete, int *ret)
+{
+	struct safexcel_result_desc *rdesc;
+	struct ahash_request *areq = ahash_request_cast(async);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_req *sreq = ahash_request_ctx(areq);
+	int cache_len, result_sz = sreq->state_sz;
+
+	*ret = 0;
+
+	spin_lock_bh(&priv->ring[ring].egress_lock);
+	rdesc = safexcel_ring_next_rptr(priv, &priv->ring[ring].rdr);
+	if (IS_ERR(rdesc)) {
+		dev_err(priv->dev,
+			"hash: result: could not retrieve the result descriptor\n");
+		*ret = PTR_ERR(rdesc);
+	} else if (rdesc->result_data.error_code) {
+		dev_err(priv->dev,
+			"hash: result: result descriptor error (%d)\n",
+			rdesc->result_data.error_code);
+		*ret = -EINVAL;
+	}
+
+	safexcel_complete(priv, ring);
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+	if (sreq->finish)
+		result_sz = crypto_ahash_digestsize(ahash);
+	memcpy(sreq->state, areq->result, result_sz);
+
+	dma_unmap_sg(priv->dev, areq->src,
+		     sg_nents_for_len(areq->src, areq->nbytes), DMA_TO_DEVICE);
+
+	safexcel_free_context(priv, async, sreq->state_sz);
+
+	cache_len = sreq->len - sreq->processed;
+	if (cache_len)
+		memcpy(sreq->cache, sreq->cache_next, cache_len);
+
+	*should_complete = true;
+
+	return 1;
+}
+
+static int safexcel_ahash_send(struct crypto_async_request *async, int ring,
+			       struct safexcel_request *request, int *commands,
+			       int *results)
+{
+	struct ahash_request *areq = ahash_request_cast(async);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	struct safexcel_command_desc *cdesc, *first_cdesc = NULL;
+	struct safexcel_result_desc *rdesc;
+	struct scatterlist *sg;
+	int i, nents, queued, len, cache_len, extra, n_cdesc = 0, ret = 0;
+
+	queued = len = req->len - req->processed;
+	if (queued < crypto_ahash_blocksize(ahash))
+		cache_len = queued;
+	else
+		cache_len = queued - areq->nbytes;
+
+	/*
+	 * If this is not the last request and the queued data does not fit
+	 * into full blocks, cache it for the next send() call.
+	 */
+	extra = queued & (crypto_ahash_blocksize(ahash) - 1);
+	if (!req->last_req && extra) {
+		sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
+				   req->cache_next, extra, areq->nbytes - extra);
+
+		queued -= extra;
+		len -= extra;
+	}
+
+	spin_lock_bh(&priv->ring[ring].egress_lock);
+
+	/* Add a command descriptor for the cached data, if any */
+	if (cache_len) {
+		ctx->base.cache = kzalloc(cache_len, EIP197_GFP_FLAGS(*async));
+		if (!ctx->base.cache) {
+			ret = -ENOMEM;
+			goto unlock;
+		}
+		memcpy(ctx->base.cache, req->cache, cache_len);
+		ctx->base.cache_dma = dma_map_single(priv->dev, ctx->base.cache,
+						     cache_len, DMA_TO_DEVICE);
+		if (dma_mapping_error(priv->dev, ctx->base.cache_dma)) {
+			ret = -EINVAL;
+			goto free_cache;
+		}
+
+		ctx->base.cache_sz = cache_len;
+		first_cdesc = safexcel_add_cdesc(priv, ring, 1,
+						 (cache_len == len),
+						 ctx->base.cache_dma,
+						 cache_len, len,
+						 ctx->base.ctxr_dma);
+		if (IS_ERR(first_cdesc)) {
+			ret = PTR_ERR(first_cdesc);
+			goto unmap_cache;
+		}
+		n_cdesc++;
+
+		queued -= cache_len;
+		if (!queued)
+			goto send_command;
+	}
+
+	/* Now handle the current ahash request buffer(s) */
+	nents = dma_map_sg(priv->dev, areq->src,
+		       sg_nents_for_len(areq->src, areq->nbytes),
+		       DMA_TO_DEVICE);
+	if (!nents) {
+		ret = -ENOMEM;
+		goto cdesc_rollback;
+	}
+
+	for_each_sg(areq->src, sg, nents, i) {
+		int sglen = sg_dma_len(sg);
+
+		/* Do not overflow the request */
+		if (queued - sglen < 0)
+			sglen = queued;
+
+		cdesc = safexcel_add_cdesc(priv, ring, !n_cdesc,
+					   !(queued - sglen), sg_dma_address(sg),
+					   sglen, len, ctx->base.ctxr_dma);
+		if (IS_ERR(cdesc)) {
+			ret = PTR_ERR(cdesc);
+			goto cdesc_rollback;
+		}
+		n_cdesc++;
+
+		if (n_cdesc == 1)
+			first_cdesc = cdesc;
+
+		queued -= sglen;
+		if (!queued)
+			break;
+	}
+
+send_command:
+	/* Setup the context options */
+	safexcel_context_control(ctx, req, first_cdesc, req->state_sz,
+				 crypto_ahash_blocksize(ahash));
+
+	/* Add the token */
+	safexcel_hash_token(first_cdesc, len, req->state_sz);
+
+	ctx->base.result_dma = dma_map_single(priv->dev, areq->result,
+					      req->state_sz, DMA_FROM_DEVICE);
+	if (dma_mapping_error(priv->dev, ctx->base.result_dma)) {
+		ret = -EINVAL;
+		goto cdesc_rollback;
+	}
+
+	/* Add a result descriptor */
+	rdesc = safexcel_add_rdesc(priv, ring, 1, 1, ctx->base.result_dma,
+				   req->state_sz);
+	if (IS_ERR(rdesc)) {
+		ret = PTR_ERR(rdesc);
+		goto cdesc_rollback;
+	}
+
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+	req->processed += len;
+	request->req = &areq->base;
+	ctx->base.handle_result = safexcel_handle_result;
+
+	*commands = n_cdesc;
+	*results = 1;
+	return 0;
+
+cdesc_rollback:
+	for (i = 0; i < n_cdesc; i++)
+		safexcel_ring_rollback_wptr(priv, &priv->ring[ring].cdr);
+unmap_cache:
+	if (ctx->base.cache_dma) {
+		dma_unmap_single(priv->dev, ctx->base.cache_dma,
+				 ctx->base.cache_sz, DMA_TO_DEVICE);
+		ctx->base.cache_sz = 0;
+	}
+free_cache:
+	if (ctx->base.cache) {
+		kfree(ctx->base.cache);
+		ctx->base.cache = NULL;
+	}
+
+unlock:
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+	return ret;
+}
+
+static inline bool safexcel_ahash_needs_inv_get(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+	unsigned int state_w_sz = req->state_sz / sizeof(u32);
+	int i;
+
+	for (i = 0; i < state_w_sz; i++)
+		if (ctx->base.ctxr->data[i] != cpu_to_le32(req->state[i]))
+			return true;
+
+	if (ctx->base.ctxr->data[state_w_sz] !=
+	    cpu_to_le32(req->processed / crypto_ahash_blocksize(ahash)))
+		return true;
+
+	return false;
+}
+
+static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
+				      int ring,
+				      struct crypto_async_request *async,
+				      bool *should_complete, int *ret)
+{
+	struct safexcel_result_desc *rdesc;
+	struct ahash_request *areq = ahash_request_cast(async);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(ahash);
+	int enq_ret;
+
+	*ret = 0;
+
+	spin_lock_bh(&priv->ring[ring].egress_lock);
+	rdesc = safexcel_ring_next_rptr(priv, &priv->ring[ring].rdr);
+	if (IS_ERR(rdesc)) {
+		dev_err(priv->dev,
+			"hash: invalidate: could not retrieve the result descriptor\n");
+		*ret = PTR_ERR(rdesc);
+	} else if (rdesc->result_data.error_code) {
+		dev_err(priv->dev,
+			"hash: invalidate: result descriptor error (%d)\n",
+			rdesc->result_data.error_code);
+		*ret = -EINVAL;
+	}
+
+	safexcel_complete(priv, ring);
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+	if (ctx->base.exit_inv) {
+		dma_pool_free(priv->context_pool, ctx->base.ctxr,
+			      ctx->base.ctxr_dma);
+
+		*should_complete = true;
+		return 1;
+	}
+
+	ring = safexcel_select_ring(priv);
+	ctx->base.ring = ring;
+	ctx->base.needs_inv = false;
+	ctx->base.send = safexcel_ahash_send;
+
+	spin_lock_bh(&priv->ring[ring].queue_lock);
+	enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async);
+	spin_unlock_bh(&priv->ring[ring].queue_lock);
+
+	if (enq_ret != -EINPROGRESS)
+		*ret = enq_ret;
+
+	if (!priv->ring[ring].need_dequeue)
+		safexcel_dequeue(priv, ring);
+
+	*should_complete = false;
+
+	return 1;
+}
+
+static int safexcel_ahash_send_inv(struct crypto_async_request *async,
+				   int ring, struct safexcel_request *request,
+				   int *commands, int *results)
+{
+	struct ahash_request *areq = ahash_request_cast(async);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	int ret;
+
+	ctx->base.handle_result = safexcel_handle_inv_result;
+	ret = safexcel_invalidate_cache(async, &ctx->base, ctx->priv,
+					ctx->base.ctxr_dma, ring, request);
+	if (unlikely(ret))
+		return ret;
+
+	*commands = 1;
+	*results = 1;
+
+	return 0;
+}
+
+static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	struct ahash_request req;
+	struct safexcel_inv_result result = { 0 };
+	int ring = ctx->base.ring;
+
+	memset(&req, 0, sizeof(struct ahash_request));
+
+	/* create invalidation request */
+	init_completion(&result.completion);
+	ahash_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				   safexcel_inv_complete, &result);
+
+	ahash_request_set_tfm(&req, __crypto_ahash_cast(tfm));
+	ctx = crypto_tfm_ctx(req.base.tfm);
+	ctx->base.exit_inv = true;
+	ctx->base.send = safexcel_ahash_send_inv;
+
+	spin_lock_bh(&priv->ring[ring].queue_lock);
+	crypto_enqueue_request(&priv->ring[ring].queue, &req.base);
+	spin_unlock_bh(&priv->ring[ring].queue_lock);
+
+	if (!priv->ring[ring].need_dequeue)
+		safexcel_dequeue(priv, ring);
+
+	wait_for_completion_interruptible(&result.completion);
+
+	if (result.error) {
+		dev_warn(priv->dev, "hash: completion error (%d)\n",
+			 result.error);
+		return result.error;
+	}
+
+	return 0;
+}
+
+static int safexcel_ahash_cache(struct ahash_request *areq)
+{
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+	int queued, cache_len;
+
+	cache_len = req->len - areq->nbytes - req->processed;
+	queued = req->len - req->processed;
+
+	/*
+	 * In case there isn't enough bytes to proceed (less than a
+	 * block size), cache the data until we have enough.
+	 */
+	if (cache_len + areq->nbytes <= crypto_ahash_blocksize(ahash)) {
+		sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
+				   req->cache + cache_len,
+				   areq->nbytes, 0);
+		return areq->nbytes;
+	}
+
+	/* We could'nt cache all the data */
+	return -E2BIG;
+}
+
+static int safexcel_ahash_enqueue(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	int ret, ring;
+
+	ctx->base.send = safexcel_ahash_send;
+
+	if (req->processed && ctx->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED)
+		ctx->base.needs_inv = safexcel_ahash_needs_inv_get(areq);
+
+	if (ctx->base.ctxr) {
+		if (ctx->base.needs_inv)
+			ctx->base.send = safexcel_ahash_send_inv;
+	} else {
+		ctx->base.ring = safexcel_select_ring(priv);
+		ctx->base.ctxr = dma_pool_zalloc(priv->context_pool,
+						 EIP197_GFP_FLAGS(areq->base),
+						 &ctx->base.ctxr_dma);
+		if (!ctx->base.ctxr)
+			return -ENOMEM;
+	}
+
+	ring = ctx->base.ring;
+
+	spin_lock_bh(&priv->ring[ring].queue_lock);
+	ret = crypto_enqueue_request(&priv->ring[ring].queue, &areq->base);
+	spin_unlock_bh(&priv->ring[ring].queue_lock);
+
+	if (!priv->ring[ring].need_dequeue)
+		safexcel_dequeue(priv, ring);
+
+	return ret;
+}
+
+static int safexcel_ahash_update(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+
+	/* If the request is 0 length, do nothing */
+	if (!areq->nbytes)
+		return 0;
+
+	req->len += areq->nbytes;
+
+	safexcel_ahash_cache(areq);
+
+	/*
+	 * We're not doing partial updates when performing an hmac request.
+	 * Everything will be handled by the final() call.
+	 */
+	if (ctx->digest == CONTEXT_CONTROL_DIGEST_HMAC)
+		return 0;
+
+	if (req->hmac)
+		return safexcel_ahash_enqueue(areq);
+
+	if (!req->last_req &&
+	    req->len - req->processed > crypto_ahash_blocksize(ahash))
+		return safexcel_ahash_enqueue(areq);
+
+	return 0;
+}
+
+static int safexcel_ahash_final(struct ahash_request *areq)
+{
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+
+	req->last_req = true;
+	req->finish = true;
+
+	/* If we have an overall 0 length request */
+	if (!(req->len + areq->nbytes)) {
+		if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA1)
+			memcpy(areq->result, sha1_zero_message_hash,
+			       SHA1_DIGEST_SIZE);
+		else if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA224)
+			memcpy(areq->result, sha224_zero_message_hash,
+			       SHA224_DIGEST_SIZE);
+		else if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA256)
+			memcpy(areq->result, sha256_zero_message_hash,
+			       SHA256_DIGEST_SIZE);
+
+		return 0;
+	}
+
+	return safexcel_ahash_enqueue(areq);
+}
+
+static int safexcel_ahash_finup(struct ahash_request *areq)
+{
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	req->last_req = true;
+	req->finish = true;
+
+	safexcel_ahash_update(areq);
+	return safexcel_ahash_final(areq);
+}
+
+static int safexcel_ahash_export(struct ahash_request *areq, void *out)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_export_state *export = out;
+
+	export->len = req->len;
+	export->processed = req->processed;
+
+	memcpy(export->state, req->state, req->state_sz);
+	memset(export->cache, 0, crypto_ahash_blocksize(ahash));
+	memcpy(export->cache, req->cache, crypto_ahash_blocksize(ahash));
+
+	return 0;
+}
+
+static int safexcel_ahash_import(struct ahash_request *areq, const void *in)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	const struct safexcel_ahash_export_state *export = in;
+	int ret;
+
+	ret = crypto_ahash_init(areq);
+	if (ret)
+		return ret;
+
+	req->len = export->len;
+	req->processed = export->processed;
+
+	memcpy(req->cache, export->cache, crypto_ahash_blocksize(ahash));
+	memcpy(req->state, export->state, req->state_sz);
+
+	return 0;
+}
+
+static int safexcel_ahash_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_alg_template *tmpl =
+		container_of(__crypto_ahash_alg(tfm->__crt_alg),
+			     struct safexcel_alg_template, alg.ahash);
+
+	ctx->priv = tmpl->priv;
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct safexcel_ahash_req));
+	return 0;
+}
+
+static int safexcel_sha1_init(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	req->state[0] = SHA1_H0;
+	req->state[1] = SHA1_H1;
+	req->state[2] = SHA1_H2;
+	req->state[3] = SHA1_H3;
+	req->state[4] = SHA1_H4;
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA1;
+	ctx->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+	req->state_sz = SHA1_DIGEST_SIZE;
+
+	return 0;
+}
+
+static int safexcel_sha1_digest(struct ahash_request *areq)
+{
+	int ret = safexcel_sha1_init(areq);
+
+	if (ret)
+		return ret;
+
+	return safexcel_ahash_finup(areq);
+}
+
+static void safexcel_ahash_cra_exit(struct crypto_tfm *tfm)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	int ret;
+
+	/* context not allocated, skip invalidation */
+	if (!ctx->base.ctxr)
+		return;
+
+	ret = safexcel_ahash_exit_inv(tfm);
+	if (ret)
+		dev_warn(priv->dev, "hash: invalidation error %d\n", ret);
+}
+
+struct safexcel_alg_template safexcel_alg_sha1 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.alg.ahash = {
+		.init = safexcel_sha1_init,
+		.update = safexcel_ahash_update,
+		.final = safexcel_ahash_final,
+		.finup = safexcel_ahash_finup,
+		.digest = safexcel_sha1_digest,
+		.export = safexcel_ahash_export,
+		.import = safexcel_ahash_import,
+		.halg = {
+			.digestsize = SHA1_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "sha1",
+				.cra_driver_name = "safexcel-sha1",
+				.cra_priority = 300,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA1_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_ahash_cra_init,
+				.cra_exit = safexcel_ahash_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_hmac_sha1_init(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+
+	safexcel_sha1_init(areq);
+	ctx->digest = CONTEXT_CONTROL_DIGEST_HMAC;
+	return 0;
+}
+
+static int safexcel_hmac_sha1_digest(struct ahash_request *areq)
+{
+	int ret = safexcel_hmac_sha1_init(areq);
+
+	if (ret)
+		return ret;
+
+	return safexcel_ahash_finup(areq);
+}
+
+struct safexcel_ahash_result {
+	struct completion completion;
+	int error;
+};
+
+static void safexcel_ahash_complete(struct crypto_async_request *req, int error)
+{
+	struct safexcel_ahash_result *result = req->data;
+
+	if (error == -EINPROGRESS)
+		return;
+
+	result->error = error;
+	complete(&result->completion);
+}
+
+static int safexcel_hmac_init_pad(struct ahash_request *areq,
+				  unsigned int blocksize, const u8 *key,
+				  unsigned int keylen, u8 *ipad, u8 *opad)
+{
+	struct safexcel_ahash_result result;
+	struct scatterlist sg;
+	int ret, i;
+	u8 *keydup;
+
+	if (keylen <= blocksize) {
+		memcpy(ipad, key, keylen);
+	} else {
+		keydup = kmemdup(key, keylen, GFP_KERNEL);
+		if (!keydup)
+			return -ENOMEM;
+
+		ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_BACKLOG,
+					   safexcel_ahash_complete, &result);
+		sg_init_one(&sg, keydup, keylen);
+		ahash_request_set_crypt(areq, &sg, ipad, keylen);
+		init_completion(&result.completion);
+
+		ret = crypto_ahash_digest(areq);
+		if (ret == -EINPROGRESS) {
+			wait_for_completion_interruptible(&result.completion);
+			ret = result.error;
+		}
+
+		/* Avoid leaking */
+		memzero_explicit(keydup, keylen);
+		kfree(keydup);
+
+		if (ret)
+			return ret;
+
+		keylen = crypto_ahash_digestsize(crypto_ahash_reqtfm(areq));
+	}
+
+	memset(ipad + keylen, 0, blocksize - keylen);
+	memcpy(opad, ipad, blocksize);
+
+	for (i = 0; i < blocksize; i++) {
+		ipad[i] ^= HMAC_IPAD_VALUE;
+		opad[i] ^= HMAC_OPAD_VALUE;
+	}
+
+	return 0;
+}
+
+static int safexcel_hmac_init_iv(struct ahash_request *areq,
+				 unsigned int blocksize, u8 *pad, void *state)
+{
+	struct safexcel_ahash_result result;
+	struct safexcel_ahash_req *req;
+	struct scatterlist sg;
+	int ret;
+
+	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				   safexcel_ahash_complete, &result);
+	sg_init_one(&sg, pad, blocksize);
+	ahash_request_set_crypt(areq, &sg, pad, blocksize);
+	init_completion(&result.completion);
+
+	ret = crypto_ahash_init(areq);
+	if (ret)
+		return ret;
+
+	req = ahash_request_ctx(areq);
+	req->hmac = true;
+	req->last_req = true;
+
+	ret = crypto_ahash_update(areq);
+	if (ret && ret != -EINPROGRESS)
+		return ret;
+
+	wait_for_completion_interruptible(&result.completion);
+	if (result.error)
+		return result.error;
+
+	return crypto_ahash_export(areq, state);
+}
+
+static int safexcel_hmac_setkey(const char *alg, const u8 *key,
+				unsigned int keylen, void *istate, void *ostate)
+{
+	struct ahash_request *areq;
+	struct crypto_ahash *tfm;
+	unsigned int blocksize;
+	u8 *ipad, *opad;
+	int ret;
+
+	tfm = crypto_alloc_ahash(alg, CRYPTO_ALG_TYPE_AHASH,
+				 CRYPTO_ALG_TYPE_AHASH_MASK);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	areq = ahash_request_alloc(tfm, GFP_KERNEL);
+	if (!areq) {
+		ret = -ENOMEM;
+		goto free_ahash;
+	}
+
+	crypto_ahash_clear_flags(tfm, ~0);
+	blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+
+	ipad = kzalloc(2 * blocksize, GFP_KERNEL);
+	if (!ipad) {
+		ret = -ENOMEM;
+		goto free_request;
+	}
+
+	opad = ipad + blocksize;
+
+	ret = safexcel_hmac_init_pad(areq, blocksize, key, keylen, ipad, opad);
+	if (ret)
+		goto free_ipad;
+
+	ret = safexcel_hmac_init_iv(areq, blocksize, ipad, istate);
+	if (ret)
+		goto free_ipad;
+
+	ret = safexcel_hmac_init_iv(areq, blocksize, opad, ostate);
+
+free_ipad:
+	kfree(ipad);
+free_request:
+	ahash_request_free(areq);
+free_ahash:
+	crypto_free_ahash(tfm);
+
+	return ret;
+}
+
+static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key,
+				     unsigned int keylen)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+	struct safexcel_ahash_export_state istate, ostate;
+	int ret, i;
+
+	ret = safexcel_hmac_setkey("safexcel-sha1", key, keylen, &istate, &ostate);
+	if (ret)
+		return ret;
+
+	memcpy(ctx->ipad, &istate.state, SHA1_DIGEST_SIZE);
+	memcpy(ctx->opad, &ostate.state, SHA1_DIGEST_SIZE);
+
+	for (i = 0; i < ARRAY_SIZE(istate.state); i++) {
+		if (ctx->ipad[i] != le32_to_cpu(istate.state[i]) ||
+		    ctx->opad[i] != le32_to_cpu(ostate.state[i])) {
+			ctx->base.needs_inv = true;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_hmac_sha1 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.alg.ahash = {
+		.init = safexcel_hmac_sha1_init,
+		.update = safexcel_ahash_update,
+		.final = safexcel_ahash_final,
+		.finup = safexcel_ahash_finup,
+		.digest = safexcel_hmac_sha1_digest,
+		.setkey = safexcel_hmac_sha1_setkey,
+		.export = safexcel_ahash_export,
+		.import = safexcel_ahash_import,
+		.halg = {
+			.digestsize = SHA1_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "hmac(sha1)",
+				.cra_driver_name = "safexcel-hmac-sha1",
+				.cra_priority = 300,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA1_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_ahash_cra_init,
+				.cra_exit = safexcel_ahash_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_sha256_init(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	req->state[0] = SHA256_H0;
+	req->state[1] = SHA256_H1;
+	req->state[2] = SHA256_H2;
+	req->state[3] = SHA256_H3;
+	req->state[4] = SHA256_H4;
+	req->state[5] = SHA256_H5;
+	req->state[6] = SHA256_H6;
+	req->state[7] = SHA256_H7;
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA256;
+	ctx->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+	req->state_sz = SHA256_DIGEST_SIZE;
+
+	return 0;
+}
+
+static int safexcel_sha256_digest(struct ahash_request *areq)
+{
+	int ret = safexcel_sha256_init(areq);
+
+	if (ret)
+		return ret;
+
+	return safexcel_ahash_finup(areq);
+}
+
+struct safexcel_alg_template safexcel_alg_sha256 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.alg.ahash = {
+		.init = safexcel_sha256_init,
+		.update = safexcel_ahash_update,
+		.final = safexcel_ahash_final,
+		.finup = safexcel_ahash_finup,
+		.digest = safexcel_sha256_digest,
+		.export = safexcel_ahash_export,
+		.import = safexcel_ahash_import,
+		.halg = {
+			.digestsize = SHA256_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "sha256",
+				.cra_driver_name = "safexcel-sha256",
+				.cra_priority = 300,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA256_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_ahash_cra_init,
+				.cra_exit = safexcel_ahash_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_sha224_init(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	req->state[0] = SHA224_H0;
+	req->state[1] = SHA224_H1;
+	req->state[2] = SHA224_H2;
+	req->state[3] = SHA224_H3;
+	req->state[4] = SHA224_H4;
+	req->state[5] = SHA224_H5;
+	req->state[6] = SHA224_H6;
+	req->state[7] = SHA224_H7;
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA224;
+	ctx->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+	req->state_sz = SHA256_DIGEST_SIZE;
+
+	return 0;
+}
+
+static int safexcel_sha224_digest(struct ahash_request *areq)
+{
+	int ret = safexcel_sha224_init(areq);
+
+	if (ret)
+		return ret;
+
+	return safexcel_ahash_finup(areq);
+}
+
+struct safexcel_alg_template safexcel_alg_sha224 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.alg.ahash = {
+		.init = safexcel_sha224_init,
+		.update = safexcel_ahash_update,
+		.final = safexcel_ahash_final,
+		.finup = safexcel_ahash_finup,
+		.digest = safexcel_sha224_digest,
+		.export = safexcel_ahash_export,
+		.import = safexcel_ahash_import,
+		.halg = {
+			.digestsize = SHA224_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "sha224",
+				.cra_driver_name = "safexcel-sha224",
+				.cra_priority = 300,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA224_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_ahash_cra_init,
+				.cra_exit = safexcel_ahash_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
diff --git a/drivers/crypto/inside-secure/safexcel_ring.c b/drivers/crypto/inside-secure/safexcel_ring.c
new file mode 100644
index 0000000..c9d2a87
--- /dev/null
+++ b/drivers/crypto/inside-secure/safexcel_ring.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2017 Marvell
+ *
+ * Antoine Tenart <antoine.tenart@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+
+#include "safexcel.h"
+
+int safexcel_init_ring_descriptors(struct safexcel_crypto_priv *priv,
+				   struct safexcel_ring *cdr,
+				   struct safexcel_ring *rdr)
+{
+	cdr->offset = sizeof(u32) * priv->config.cd_offset;
+	cdr->base = dmam_alloc_coherent(priv->dev,
+					cdr->offset * EIP197_DEFAULT_RING_SIZE,
+					&cdr->base_dma, GFP_KERNEL);
+	if (!cdr->base)
+		return -ENOMEM;
+	cdr->write = cdr->base;
+	cdr->base_end = cdr->base + cdr->offset * EIP197_DEFAULT_RING_SIZE;
+	cdr->read = cdr->base;
+
+	rdr->offset = sizeof(u32) * priv->config.rd_offset;
+	rdr->base = dmam_alloc_coherent(priv->dev,
+					rdr->offset * EIP197_DEFAULT_RING_SIZE,
+					&rdr->base_dma, GFP_KERNEL);
+	if (!rdr->base)
+		return -ENOMEM;
+	rdr->write = rdr->base;
+	rdr->base_end = rdr->base + rdr->offset * EIP197_DEFAULT_RING_SIZE;
+	rdr->read = rdr->base;
+
+	return 0;
+}
+
+inline int safexcel_select_ring(struct safexcel_crypto_priv *priv)
+{
+	return (atomic_inc_return(&priv->ring_used) % priv->config.rings);
+}
+
+static void *safexcel_ring_next_wptr(struct safexcel_crypto_priv *priv,
+				     struct safexcel_ring *ring)
+{
+	void *ptr = ring->write;
+
+	if (ring->nr == EIP197_DEFAULT_RING_SIZE - 1)
+		return ERR_PTR(-ENOMEM);
+
+	ring->write += ring->offset;
+	if (ring->write == ring->base_end)
+		ring->write = ring->base;
+
+	ring->nr++;
+	return ptr;
+}
+
+void *safexcel_ring_next_rptr(struct safexcel_crypto_priv *priv,
+			      struct safexcel_ring *ring)
+{
+	void *ptr = ring->read;
+
+	if (!ring->nr)
+		return ERR_PTR(-ENOENT);
+
+	ring->read += ring->offset;
+	if (ring->read == ring->base_end)
+		ring->read = ring->base;
+
+	ring->nr--;
+	return ptr;
+}
+
+void safexcel_ring_rollback_wptr(struct safexcel_crypto_priv *priv,
+				 struct safexcel_ring *ring)
+{
+	if (!ring->nr)
+		return;
+
+	if (ring->write == ring->base)
+		ring->write = ring->base_end - ring->offset;
+	else
+		ring->write -= ring->offset;
+
+	ring->nr--;
+}
+
+struct safexcel_command_desc *safexcel_add_cdesc(struct safexcel_crypto_priv *priv,
+						 int ring_id,
+						 bool first, bool last,
+						 dma_addr_t data, u32 data_len,
+						 u32 full_data_len,
+						 dma_addr_t context) {
+	struct safexcel_command_desc *cdesc;
+	int i;
+
+	cdesc = safexcel_ring_next_wptr(priv, &priv->ring[ring_id].cdr);
+	if (IS_ERR(cdesc))
+		return cdesc;
+
+	memset(cdesc, 0, sizeof(struct safexcel_command_desc));
+
+	cdesc->first_seg = first;
+	cdesc->last_seg = last;
+	cdesc->particle_size = data_len;
+	cdesc->data_lo = lower_32_bits(data);
+	cdesc->data_hi = upper_32_bits(data);
+
+	if (first && context) {
+		struct safexcel_token *token =
+			(struct safexcel_token *)cdesc->control_data.token;
+
+		cdesc->control_data.packet_length = full_data_len;
+		cdesc->control_data.options = EIP197_OPTION_MAGIC_VALUE |
+					      EIP197_OPTION_64BIT_CTX |
+					      EIP197_OPTION_CTX_CTRL_IN_CMD;
+		cdesc->control_data.context_lo =
+			(lower_32_bits(context) & GENMASK(31, 2)) >> 2;
+		cdesc->control_data.context_hi = upper_32_bits(context);
+
+		/* TODO: large xform HMAC with SHA-384/512 uses refresh = 3 */
+		cdesc->control_data.refresh = 2;
+
+		for (i = 0; i < EIP197_MAX_TOKENS; i++)
+			eip197_noop_token(&token[i]);
+	}
+
+	return cdesc;
+}
+
+struct safexcel_result_desc *safexcel_add_rdesc(struct safexcel_crypto_priv *priv,
+						int ring_id,
+						bool first, bool last,
+						dma_addr_t data, u32 len)
+{
+	struct safexcel_result_desc *rdesc;
+
+	rdesc = safexcel_ring_next_wptr(priv, &priv->ring[ring_id].rdr);
+	if (IS_ERR(rdesc))
+		return rdesc;
+
+	memset(rdesc, 0, sizeof(struct safexcel_result_desc));
+
+	rdesc->first_seg = first;
+	rdesc->last_seg = last;
+	rdesc->particle_size = len;
+	rdesc->data_lo = lower_32_bits(data);
+	rdesc->data_hi = upper_32_bits(data);
+
+	return rdesc;
+}
diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index 771dd26..427cbe0 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -23,6 +23,7 @@
 #include <crypto/ctr.h>
 #include <crypto/des.h>
 #include <crypto/aes.h>
+#include <crypto/hmac.h>
 #include <crypto/sha.h>
 #include <crypto/algapi.h>
 #include <crypto/internal/aead.h>
@@ -90,8 +91,6 @@
 #define CTL_FLAG_PERFORM_AEAD	0x0008
 #define CTL_FLAG_MASK		0x000f
 
-#define HMAC_IPAD_VALUE   0x36
-#define HMAC_OPAD_VALUE   0x5C
 #define HMAC_PAD_BLOCKLEN SHA1_BLOCK_SIZE
 
 #define MD5_DIGEST_SIZE   16
diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
index 77c0fb9..e61b085 100644
--- a/drivers/crypto/marvell/hash.c
+++ b/drivers/crypto/marvell/hash.c
@@ -12,6 +12,7 @@
  * by the Free Software Foundation.
  */
 
+#include <crypto/hmac.h>
 #include <crypto/md5.h>
 #include <crypto/sha.h>
 
@@ -1164,8 +1165,8 @@ static int mv_cesa_ahmac_pad_init(struct ahash_request *req,
 	memcpy(opad, ipad, blocksize);
 
 	for (i = 0; i < blocksize; i++) {
-		ipad[i] ^= 0x36;
-		opad[i] ^= 0x5c;
+		ipad[i] ^= HMAC_IPAD_VALUE;
+		opad[i] ^= HMAC_OPAD_VALUE;
 	}
 
 	return 0;
diff --git a/drivers/crypto/mediatek/mtk-platform.c b/drivers/crypto/mediatek/mtk-platform.c
index b6ecc28..000b650 100644
--- a/drivers/crypto/mediatek/mtk-platform.c
+++ b/drivers/crypto/mediatek/mtk-platform.c
@@ -504,19 +504,14 @@ static int mtk_crypto_probe(struct platform_device *pdev)
 		}
 	}
 
-	cryp->clk_ethif = devm_clk_get(&pdev->dev, "ethif");
 	cryp->clk_cryp = devm_clk_get(&pdev->dev, "cryp");
-	if (IS_ERR(cryp->clk_ethif) || IS_ERR(cryp->clk_cryp))
+	if (IS_ERR(cryp->clk_cryp))
 		return -EPROBE_DEFER;
 
 	cryp->dev = &pdev->dev;
 	pm_runtime_enable(cryp->dev);
 	pm_runtime_get_sync(cryp->dev);
 
-	err = clk_prepare_enable(cryp->clk_ethif);
-	if (err)
-		goto err_clk_ethif;
-
 	err = clk_prepare_enable(cryp->clk_cryp);
 	if (err)
 		goto err_clk_cryp;
@@ -559,8 +554,6 @@ static int mtk_crypto_probe(struct platform_device *pdev)
 err_resource:
 	clk_disable_unprepare(cryp->clk_cryp);
 err_clk_cryp:
-	clk_disable_unprepare(cryp->clk_ethif);
-err_clk_ethif:
 	pm_runtime_put_sync(cryp->dev);
 	pm_runtime_disable(cryp->dev);
 
@@ -576,7 +569,6 @@ static int mtk_crypto_remove(struct platform_device *pdev)
 	mtk_desc_dma_free(cryp);
 
 	clk_disable_unprepare(cryp->clk_cryp);
-	clk_disable_unprepare(cryp->clk_ethif);
 
 	pm_runtime_put_sync(cryp->dev);
 	pm_runtime_disable(cryp->dev);
@@ -596,7 +588,6 @@ static struct platform_driver mtk_crypto_driver = {
 	.remove = mtk_crypto_remove,
 	.driver = {
 		   .name = "mtk-crypto",
-		   .owner = THIS_MODULE,
 		   .of_match_table = of_crypto_id,
 	},
 };
diff --git a/drivers/crypto/mediatek/mtk-platform.h b/drivers/crypto/mediatek/mtk-platform.h
index 303c152..f0831f1 100644
--- a/drivers/crypto/mediatek/mtk-platform.h
+++ b/drivers/crypto/mediatek/mtk-platform.h
@@ -200,7 +200,6 @@ struct mtk_sha_rec {
  * struct mtk_cryp - Cryptographic device
  * @base:	pointer to mapped register I/O base
  * @dev:	pointer to device
- * @clk_ethif:	pointer to ethif clock
  * @clk_cryp:	pointer to crypto clock
  * @irq:	global system and rings IRQ
  * @ring:	pointer to descriptor rings
@@ -215,7 +214,6 @@ struct mtk_sha_rec {
 struct mtk_cryp {
 	void __iomem *base;
 	struct device *dev;
-	struct clk *clk_ethif;
 	struct clk *clk_cryp;
 	int irq[MTK_IRQ_NUM];
 
diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c
index 2226f12..5f4f845 100644
--- a/drivers/crypto/mediatek/mtk-sha.c
+++ b/drivers/crypto/mediatek/mtk-sha.c
@@ -12,6 +12,7 @@
  * Some ideas are from atmel-sha.c and omap-sham.c drivers.
  */
 
+#include <crypto/hmac.h>
 #include <crypto/sha.h>
 #include "mtk-platform.h"
 
@@ -825,8 +826,8 @@ static int mtk_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
 	memcpy(bctx->opad, bctx->ipad, bs);
 
 	for (i = 0; i < bs; i++) {
-		bctx->ipad[i] ^= 0x36;
-		bctx->opad[i] ^= 0x5c;
+		bctx->ipad[i] ^= HMAC_IPAD_VALUE;
+		bctx->opad[i] ^= HMAC_OPAD_VALUE;
 	}
 
 	return 0;
diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index 451fa18..bf25f41 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/clk.h>
+#include <crypto/hmac.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
 #include <linux/of.h>
@@ -822,8 +823,8 @@ static int mv_hash_setkey(struct crypto_ahash *tfm, const u8 * key,
 		memcpy(opad, ipad, bs);
 
 		for (i = 0; i < bs; i++) {
-			ipad[i] ^= 0x36;
-			opad[i] ^= 0x5c;
+			ipad[i] ^= HMAC_IPAD_VALUE;
+			opad[i] ^= HMAC_OPAD_VALUE;
 		}
 
 		rc = crypto_shash_init(shash) ? :
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 4ecb77a..2694513 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -2169,7 +2169,7 @@ static int n2_mau_remove(struct platform_device *dev)
 	return 0;
 }
 
-static struct of_device_id n2_crypto_match[] = {
+static const struct of_device_id n2_crypto_match[] = {
 	{
 		.name = "n2cp",
 		.compatible = "SUNW,n2-cwq",
@@ -2196,7 +2196,7 @@ static struct platform_driver n2_crypto_driver = {
 	.remove		=	n2_crypto_remove,
 };
 
-static struct of_device_id n2_mau_match[] = {
+static const struct of_device_id n2_mau_match[] = {
 	{
 		.name = "ncp",
 		.compatible = "SUNW,n2-mau",
diff --git a/drivers/crypto/omap-aes-gcm.c b/drivers/crypto/omap-aes-gcm.c
new file mode 100644
index 0000000..7d4f8a4
--- /dev/null
+++ b/drivers/crypto/omap-aes-gcm.c
@@ -0,0 +1,408 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for OMAP AES GCM HW acceleration.
+ *
+ * Copyright (c) 2016 Texas Instruments Incorporated
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/omap-dma.h>
+#include <linux/interrupt.h>
+#include <crypto/aes.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/skcipher.h>
+#include <crypto/internal/aead.h>
+
+#include "omap-crypto.h"
+#include "omap-aes.h"
+
+static int omap_aes_gcm_handle_queue(struct omap_aes_dev *dd,
+				     struct aead_request *req);
+
+static void omap_aes_gcm_finish_req(struct omap_aes_dev *dd, int ret)
+{
+	struct aead_request *req = dd->aead_req;
+
+	dd->flags &= ~FLAGS_BUSY;
+	dd->in_sg = NULL;
+	dd->out_sg = NULL;
+
+	req->base.complete(&req->base, ret);
+}
+
+static void omap_aes_gcm_done_task(struct omap_aes_dev *dd)
+{
+	u8 *tag;
+	int alen, clen, i, ret = 0, nsg;
+	struct omap_aes_reqctx *rctx;
+
+	alen = ALIGN(dd->assoc_len, AES_BLOCK_SIZE);
+	clen = ALIGN(dd->total, AES_BLOCK_SIZE);
+	rctx = aead_request_ctx(dd->aead_req);
+
+	nsg = !!(dd->assoc_len && dd->total);
+
+	dma_sync_sg_for_device(dd->dev, dd->out_sg, dd->out_sg_len,
+			       DMA_FROM_DEVICE);
+	dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE);
+	dma_unmap_sg(dd->dev, dd->out_sg, dd->out_sg_len, DMA_FROM_DEVICE);
+	omap_aes_crypt_dma_stop(dd);
+
+	omap_crypto_cleanup(dd->out_sg, dd->orig_out,
+			    dd->aead_req->assoclen, dd->total,
+			    FLAGS_OUT_DATA_ST_SHIFT, dd->flags);
+
+	if (dd->flags & FLAGS_ENCRYPT)
+		scatterwalk_map_and_copy(rctx->auth_tag,
+					 dd->aead_req->dst,
+					 dd->total + dd->aead_req->assoclen,
+					 dd->authsize, 1);
+
+	omap_crypto_cleanup(&dd->in_sgl[0], NULL, 0, alen,
+			    FLAGS_ASSOC_DATA_ST_SHIFT, dd->flags);
+
+	omap_crypto_cleanup(&dd->in_sgl[nsg], NULL, 0, clen,
+			    FLAGS_IN_DATA_ST_SHIFT, dd->flags);
+
+	if (!(dd->flags & FLAGS_ENCRYPT)) {
+		tag = (u8 *)rctx->auth_tag;
+		for (i = 0; i < dd->authsize; i++) {
+			if (tag[i]) {
+				dev_err(dd->dev, "GCM decryption: Tag Message is wrong\n");
+				ret = -EBADMSG;
+			}
+		}
+	}
+
+	omap_aes_gcm_finish_req(dd, ret);
+	omap_aes_gcm_handle_queue(dd, NULL);
+}
+
+static int omap_aes_gcm_copy_buffers(struct omap_aes_dev *dd,
+				     struct aead_request *req)
+{
+	int alen, clen, cryptlen, assoclen, ret;
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	unsigned int authlen = crypto_aead_authsize(aead);
+	struct scatterlist *tmp, sg_arr[2];
+	int nsg;
+	u16 flags;
+
+	assoclen = req->assoclen;
+	cryptlen = req->cryptlen;
+
+	if (dd->flags & FLAGS_RFC4106_GCM)
+		assoclen -= 8;
+
+	if (!(dd->flags & FLAGS_ENCRYPT))
+		cryptlen -= authlen;
+
+	alen = ALIGN(assoclen, AES_BLOCK_SIZE);
+	clen = ALIGN(cryptlen, AES_BLOCK_SIZE);
+
+	nsg = !!(assoclen && cryptlen);
+
+	omap_aes_clear_copy_flags(dd);
+
+	sg_init_table(dd->in_sgl, nsg + 1);
+	if (assoclen) {
+		tmp = req->src;
+		ret = omap_crypto_align_sg(&tmp, assoclen,
+					   AES_BLOCK_SIZE, dd->in_sgl,
+					   OMAP_CRYPTO_COPY_DATA |
+					   OMAP_CRYPTO_ZERO_BUF |
+					   OMAP_CRYPTO_FORCE_SINGLE_ENTRY,
+					   FLAGS_ASSOC_DATA_ST_SHIFT,
+					   &dd->flags);
+	}
+
+	if (cryptlen) {
+		tmp = scatterwalk_ffwd(sg_arr, req->src, req->assoclen);
+
+		ret = omap_crypto_align_sg(&tmp, cryptlen,
+					   AES_BLOCK_SIZE, &dd->in_sgl[nsg],
+					   OMAP_CRYPTO_COPY_DATA |
+					   OMAP_CRYPTO_ZERO_BUF |
+					   OMAP_CRYPTO_FORCE_SINGLE_ENTRY,
+					   FLAGS_IN_DATA_ST_SHIFT,
+					   &dd->flags);
+	}
+
+	dd->in_sg = dd->in_sgl;
+	dd->total = cryptlen;
+	dd->assoc_len = assoclen;
+	dd->authsize = authlen;
+
+	dd->out_sg = req->dst;
+	dd->orig_out = req->dst;
+
+	dd->out_sg = scatterwalk_ffwd(sg_arr, req->dst, assoclen);
+
+	flags = 0;
+	if (req->src == req->dst || dd->out_sg == sg_arr)
+		flags |= OMAP_CRYPTO_FORCE_COPY;
+
+	ret = omap_crypto_align_sg(&dd->out_sg, cryptlen,
+				   AES_BLOCK_SIZE, &dd->out_sgl,
+				   flags,
+				   FLAGS_OUT_DATA_ST_SHIFT, &dd->flags);
+	if (ret)
+		return ret;
+
+	dd->in_sg_len = sg_nents_for_len(dd->in_sg, alen + clen);
+	dd->out_sg_len = sg_nents_for_len(dd->out_sg, clen);
+
+	return 0;
+}
+
+static void omap_aes_gcm_complete(struct crypto_async_request *req, int err)
+{
+	struct omap_aes_gcm_result *res = req->data;
+
+	if (err == -EINPROGRESS)
+		return;
+
+	res->err = err;
+	complete(&res->completion);
+}
+
+static int do_encrypt_iv(struct aead_request *req, u32 *tag, u32 *iv)
+{
+	struct scatterlist iv_sg, tag_sg;
+	struct skcipher_request *sk_req;
+	struct omap_aes_gcm_result result;
+	struct omap_aes_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	int ret = 0;
+
+	sk_req = skcipher_request_alloc(ctx->ctr, GFP_KERNEL);
+	if (!sk_req) {
+		pr_err("skcipher: Failed to allocate request\n");
+		return -1;
+	}
+
+	init_completion(&result.completion);
+
+	sg_init_one(&iv_sg, iv, AES_BLOCK_SIZE);
+	sg_init_one(&tag_sg, tag, AES_BLOCK_SIZE);
+	skcipher_request_set_callback(sk_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      omap_aes_gcm_complete, &result);
+	ret = crypto_skcipher_setkey(ctx->ctr, (u8 *)ctx->key, ctx->keylen);
+	skcipher_request_set_crypt(sk_req, &iv_sg, &tag_sg, AES_BLOCK_SIZE,
+				   NULL);
+	ret = crypto_skcipher_encrypt(sk_req);
+	switch (ret) {
+	case 0:
+		break;
+	case -EINPROGRESS:
+	case -EBUSY:
+		ret = wait_for_completion_interruptible(&result.completion);
+		if (!ret) {
+			ret = result.err;
+			if (!ret) {
+				reinit_completion(&result.completion);
+				break;
+			}
+		}
+		/* fall through */
+	default:
+		pr_err("Encryption of IV failed for GCM mode");
+		break;
+	}
+
+	skcipher_request_free(sk_req);
+	return ret;
+}
+
+void omap_aes_gcm_dma_out_callback(void *data)
+{
+	struct omap_aes_dev *dd = data;
+	struct omap_aes_reqctx *rctx;
+	int i, val;
+	u32 *auth_tag, tag[4];
+
+	if (!(dd->flags & FLAGS_ENCRYPT))
+		scatterwalk_map_and_copy(tag, dd->aead_req->src,
+					 dd->total + dd->aead_req->assoclen,
+					 dd->authsize, 0);
+
+	rctx = aead_request_ctx(dd->aead_req);
+	auth_tag = (u32 *)rctx->auth_tag;
+	for (i = 0; i < 4; i++) {
+		val = omap_aes_read(dd, AES_REG_TAG_N(dd, i));
+		auth_tag[i] = val ^ auth_tag[i];
+		if (!(dd->flags & FLAGS_ENCRYPT))
+			auth_tag[i] = auth_tag[i] ^ tag[i];
+	}
+
+	omap_aes_gcm_done_task(dd);
+}
+
+static int omap_aes_gcm_handle_queue(struct omap_aes_dev *dd,
+				     struct aead_request *req)
+{
+	struct omap_aes_ctx *ctx;
+	struct aead_request *backlog;
+	struct omap_aes_reqctx *rctx;
+	unsigned long flags;
+	int err, ret = 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (req)
+		ret = aead_enqueue_request(&dd->aead_queue, req);
+	if (dd->flags & FLAGS_BUSY) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return ret;
+	}
+
+	backlog = aead_get_backlog(&dd->aead_queue);
+	req = aead_dequeue_request(&dd->aead_queue);
+	if (req)
+		dd->flags |= FLAGS_BUSY;
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (!req)
+		return ret;
+
+	if (backlog)
+		backlog->base.complete(&backlog->base, -EINPROGRESS);
+
+	ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	rctx = aead_request_ctx(req);
+
+	dd->ctx = ctx;
+	rctx->dd = dd;
+	dd->aead_req = req;
+
+	rctx->mode &= FLAGS_MODE_MASK;
+	dd->flags = (dd->flags & ~FLAGS_MODE_MASK) | rctx->mode;
+
+	err = omap_aes_gcm_copy_buffers(dd, req);
+	if (err)
+		return err;
+
+	err = omap_aes_write_ctrl(dd);
+	if (!err)
+		err = omap_aes_crypt_dma_start(dd);
+
+	if (err) {
+		omap_aes_gcm_finish_req(dd, err);
+		omap_aes_gcm_handle_queue(dd, NULL);
+	}
+
+	return ret;
+}
+
+static int omap_aes_gcm_crypt(struct aead_request *req, unsigned long mode)
+{
+	struct omap_aes_reqctx *rctx = aead_request_ctx(req);
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	unsigned int authlen = crypto_aead_authsize(aead);
+	struct omap_aes_dev *dd;
+	__be32 counter = cpu_to_be32(1);
+	int err, assoclen;
+
+	memset(rctx->auth_tag, 0, sizeof(rctx->auth_tag));
+	memcpy(rctx->iv + 12, &counter, 4);
+
+	err = do_encrypt_iv(req, (u32 *)rctx->auth_tag, (u32 *)rctx->iv);
+	if (err)
+		return err;
+
+	if (mode & FLAGS_RFC4106_GCM)
+		assoclen = req->assoclen - 8;
+	else
+		assoclen = req->assoclen;
+	if (assoclen + req->cryptlen == 0) {
+		scatterwalk_map_and_copy(rctx->auth_tag, req->dst, 0, authlen,
+					 1);
+		return 0;
+	}
+
+	dd = omap_aes_find_dev(rctx);
+	if (!dd)
+		return -ENODEV;
+	rctx->mode = mode;
+
+	return omap_aes_gcm_handle_queue(dd, req);
+}
+
+int omap_aes_gcm_encrypt(struct aead_request *req)
+{
+	struct omap_aes_reqctx *rctx = aead_request_ctx(req);
+
+	memcpy(rctx->iv, req->iv, 12);
+	return omap_aes_gcm_crypt(req, FLAGS_ENCRYPT | FLAGS_GCM);
+}
+
+int omap_aes_gcm_decrypt(struct aead_request *req)
+{
+	struct omap_aes_reqctx *rctx = aead_request_ctx(req);
+
+	memcpy(rctx->iv, req->iv, 12);
+	return omap_aes_gcm_crypt(req, FLAGS_GCM);
+}
+
+int omap_aes_4106gcm_encrypt(struct aead_request *req)
+{
+	struct omap_aes_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct omap_aes_reqctx *rctx = aead_request_ctx(req);
+
+	memcpy(rctx->iv, ctx->nonce, 4);
+	memcpy(rctx->iv + 4, req->iv, 8);
+	return omap_aes_gcm_crypt(req, FLAGS_ENCRYPT | FLAGS_GCM |
+				  FLAGS_RFC4106_GCM);
+}
+
+int omap_aes_4106gcm_decrypt(struct aead_request *req)
+{
+	struct omap_aes_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct omap_aes_reqctx *rctx = aead_request_ctx(req);
+
+	memcpy(rctx->iv, ctx->nonce, 4);
+	memcpy(rctx->iv + 4, req->iv, 8);
+	return omap_aes_gcm_crypt(req, FLAGS_GCM | FLAGS_RFC4106_GCM);
+}
+
+int omap_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
+			unsigned int keylen)
+{
+	struct omap_aes_ctx *ctx = crypto_aead_ctx(tfm);
+
+	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
+	    keylen != AES_KEYSIZE_256)
+		return -EINVAL;
+
+	memcpy(ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+int omap_aes_4106gcm_setkey(struct crypto_aead *tfm, const u8 *key,
+			    unsigned int keylen)
+{
+	struct omap_aes_ctx *ctx = crypto_aead_ctx(tfm);
+
+	if (keylen < 4)
+		return -EINVAL;
+
+	keylen -= 4;
+	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
+	    keylen != AES_KEYSIZE_256)
+		return -EINVAL;
+
+	memcpy(ctx->key, key, keylen);
+	memcpy(ctx->nonce, key + keylen, 4);
+	ctx->keylen = keylen;
+
+	return 0;
+}
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index fe32dd9..5120a17 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -37,155 +37,10 @@
 #include <crypto/aes.h>
 #include <crypto/engine.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/internal/aead.h>
 
-#define DST_MAXBURST			4
-#define DMA_MIN				(DST_MAXBURST * sizeof(u32))
-
-#define _calc_walked(inout) (dd->inout##_walk.offset - dd->inout##_sg->offset)
-
-/* OMAP TRM gives bitfields as start:end, where start is the higher bit
-   number. For example 7:0 */
-#define FLD_MASK(start, end)	(((1 << ((start) - (end) + 1)) - 1) << (end))
-#define FLD_VAL(val, start, end) (((val) << (end)) & FLD_MASK(start, end))
-
-#define AES_REG_KEY(dd, x)		((dd)->pdata->key_ofs - \
-						((x ^ 0x01) * 0x04))
-#define AES_REG_IV(dd, x)		((dd)->pdata->iv_ofs + ((x) * 0x04))
-
-#define AES_REG_CTRL(dd)		((dd)->pdata->ctrl_ofs)
-#define AES_REG_CTRL_CTR_WIDTH_MASK	GENMASK(8, 7)
-#define AES_REG_CTRL_CTR_WIDTH_32	0
-#define AES_REG_CTRL_CTR_WIDTH_64	BIT(7)
-#define AES_REG_CTRL_CTR_WIDTH_96	BIT(8)
-#define AES_REG_CTRL_CTR_WIDTH_128	GENMASK(8, 7)
-#define AES_REG_CTRL_CTR		BIT(6)
-#define AES_REG_CTRL_CBC		BIT(5)
-#define AES_REG_CTRL_KEY_SIZE		GENMASK(4, 3)
-#define AES_REG_CTRL_DIRECTION		BIT(2)
-#define AES_REG_CTRL_INPUT_READY	BIT(1)
-#define AES_REG_CTRL_OUTPUT_READY	BIT(0)
-#define AES_REG_CTRL_MASK		GENMASK(24, 2)
-
-#define AES_REG_DATA_N(dd, x)		((dd)->pdata->data_ofs + ((x) * 0x04))
-
-#define AES_REG_REV(dd)			((dd)->pdata->rev_ofs)
-
-#define AES_REG_MASK(dd)		((dd)->pdata->mask_ofs)
-#define AES_REG_MASK_SIDLE		BIT(6)
-#define AES_REG_MASK_START		BIT(5)
-#define AES_REG_MASK_DMA_OUT_EN		BIT(3)
-#define AES_REG_MASK_DMA_IN_EN		BIT(2)
-#define AES_REG_MASK_SOFTRESET		BIT(1)
-#define AES_REG_AUTOIDLE		BIT(0)
-
-#define AES_REG_LENGTH_N(x)		(0x54 + ((x) * 0x04))
-
-#define AES_REG_IRQ_STATUS(dd)         ((dd)->pdata->irq_status_ofs)
-#define AES_REG_IRQ_ENABLE(dd)         ((dd)->pdata->irq_enable_ofs)
-#define AES_REG_IRQ_DATA_IN            BIT(1)
-#define AES_REG_IRQ_DATA_OUT           BIT(2)
-#define DEFAULT_TIMEOUT		(5*HZ)
-
-#define DEFAULT_AUTOSUSPEND_DELAY	1000
-
-#define FLAGS_MODE_MASK		0x000f
-#define FLAGS_ENCRYPT		BIT(0)
-#define FLAGS_CBC		BIT(1)
-#define FLAGS_GIV		BIT(2)
-#define FLAGS_CTR		BIT(3)
-
-#define FLAGS_INIT		BIT(4)
-#define FLAGS_FAST		BIT(5)
-#define FLAGS_BUSY		BIT(6)
-
-#define AES_BLOCK_WORDS		(AES_BLOCK_SIZE >> 2)
-
-struct omap_aes_ctx {
-	struct omap_aes_dev *dd;
-
-	int		keylen;
-	u32		key[AES_KEYSIZE_256 / sizeof(u32)];
-	unsigned long	flags;
-	struct crypto_skcipher	*fallback;
-};
-
-struct omap_aes_reqctx {
-	unsigned long mode;
-};
-
-#define OMAP_AES_QUEUE_LENGTH	1
-#define OMAP_AES_CACHE_SIZE	0
-
-struct omap_aes_algs_info {
-	struct crypto_alg	*algs_list;
-	unsigned int		size;
-	unsigned int		registered;
-};
-
-struct omap_aes_pdata {
-	struct omap_aes_algs_info	*algs_info;
-	unsigned int	algs_info_size;
-
-	void		(*trigger)(struct omap_aes_dev *dd, int length);
-
-	u32		key_ofs;
-	u32		iv_ofs;
-	u32		ctrl_ofs;
-	u32		data_ofs;
-	u32		rev_ofs;
-	u32		mask_ofs;
-	u32             irq_enable_ofs;
-	u32             irq_status_ofs;
-
-	u32		dma_enable_in;
-	u32		dma_enable_out;
-	u32		dma_start;
-
-	u32		major_mask;
-	u32		major_shift;
-	u32		minor_mask;
-	u32		minor_shift;
-};
-
-struct omap_aes_dev {
-	struct list_head	list;
-	unsigned long		phys_base;
-	void __iomem		*io_base;
-	struct omap_aes_ctx	*ctx;
-	struct device		*dev;
-	unsigned long		flags;
-	int			err;
-
-	struct tasklet_struct	done_task;
-
-	struct ablkcipher_request	*req;
-	struct crypto_engine		*engine;
-
-	/*
-	 * total is used by PIO mode for book keeping so introduce
-	 * variable total_save as need it to calc page_order
-	 */
-	size_t				total;
-	size_t				total_save;
-
-	struct scatterlist		*in_sg;
-	struct scatterlist		*out_sg;
-
-	/* Buffers for copying for unaligned cases */
-	struct scatterlist		in_sgl;
-	struct scatterlist		out_sgl;
-	struct scatterlist		*orig_out;
-	int				sgs_copied;
-
-	struct scatter_walk		in_walk;
-	struct scatter_walk		out_walk;
-	struct dma_chan		*dma_lch_in;
-	struct dma_chan		*dma_lch_out;
-	int			in_sg_len;
-	int			out_sg_len;
-	int			pio_only;
-	const struct omap_aes_pdata	*pdata;
-};
+#include "omap-crypto.h"
+#include "omap-aes.h"
 
 /* keep registered devices data here */
 static LIST_HEAD(dev_list);
@@ -201,7 +56,7 @@ static DEFINE_SPINLOCK(list_lock);
 	_read_ret;						\
 })
 #else
-static inline u32 omap_aes_read(struct omap_aes_dev *dd, u32 offset)
+inline u32 omap_aes_read(struct omap_aes_dev *dd, u32 offset)
 {
 	return __raw_readl(dd->io_base + offset);
 }
@@ -215,7 +70,7 @@ static inline u32 omap_aes_read(struct omap_aes_dev *dd, u32 offset)
 		__raw_writel(value, dd->io_base + offset);		\
 	} while (0)
 #else
-static inline void omap_aes_write(struct omap_aes_dev *dd, u32 offset,
+inline void omap_aes_write(struct omap_aes_dev *dd, u32 offset,
 				  u32 value)
 {
 	__raw_writel(value, dd->io_base + offset);
@@ -258,8 +113,16 @@ static int omap_aes_hw_init(struct omap_aes_dev *dd)
 	return 0;
 }
 
-static int omap_aes_write_ctrl(struct omap_aes_dev *dd)
+void omap_aes_clear_copy_flags(struct omap_aes_dev *dd)
 {
+	dd->flags &= ~(OMAP_CRYPTO_COPY_MASK << FLAGS_IN_DATA_ST_SHIFT);
+	dd->flags &= ~(OMAP_CRYPTO_COPY_MASK << FLAGS_OUT_DATA_ST_SHIFT);
+	dd->flags &= ~(OMAP_CRYPTO_COPY_MASK << FLAGS_ASSOC_DATA_ST_SHIFT);
+}
+
+int omap_aes_write_ctrl(struct omap_aes_dev *dd)
+{
+	struct omap_aes_reqctx *rctx;
 	unsigned int key32;
 	int i, err;
 	u32 val;
@@ -270,7 +133,11 @@ static int omap_aes_write_ctrl(struct omap_aes_dev *dd)
 
 	key32 = dd->ctx->keylen / sizeof(u32);
 
-	/* it seems a key should always be set even if it has not changed */
+	/* RESET the key as previous HASH keys should not get affected*/
+	if (dd->flags & FLAGS_GCM)
+		for (i = 0; i < 0x40; i = i + 4)
+			omap_aes_write(dd, i, 0x0);
+
 	for (i = 0; i < key32; i++) {
 		omap_aes_write(dd, AES_REG_KEY(dd, i),
 			__le32_to_cpu(dd->ctx->key[i]));
@@ -279,12 +146,21 @@ static int omap_aes_write_ctrl(struct omap_aes_dev *dd)
 	if ((dd->flags & (FLAGS_CBC | FLAGS_CTR)) && dd->req->info)
 		omap_aes_write_n(dd, AES_REG_IV(dd, 0), dd->req->info, 4);
 
+	if ((dd->flags & (FLAGS_GCM)) && dd->aead_req->iv) {
+		rctx = aead_request_ctx(dd->aead_req);
+		omap_aes_write_n(dd, AES_REG_IV(dd, 0), (u32 *)rctx->iv, 4);
+	}
+
 	val = FLD_VAL(((dd->ctx->keylen >> 3) - 1), 4, 3);
 	if (dd->flags & FLAGS_CBC)
 		val |= AES_REG_CTRL_CBC;
-	if (dd->flags & FLAGS_CTR)
+
+	if (dd->flags & (FLAGS_CTR | FLAGS_GCM))
 		val |= AES_REG_CTRL_CTR | AES_REG_CTRL_CTR_WIDTH_128;
 
+	if (dd->flags & FLAGS_GCM)
+		val |= AES_REG_CTRL_GCM;
+
 	if (dd->flags & FLAGS_ENCRYPT)
 		val |= AES_REG_CTRL_DIRECTION;
 
@@ -315,6 +191,8 @@ static void omap_aes_dma_trigger_omap4(struct omap_aes_dev *dd, int length)
 {
 	omap_aes_write(dd, AES_REG_LENGTH_N(0), length);
 	omap_aes_write(dd, AES_REG_LENGTH_N(1), 0);
+	if (dd->flags & FLAGS_GCM)
+		omap_aes_write(dd, AES_REG_A_LEN, dd->assoc_len);
 
 	omap_aes_dma_trigger_omap2(dd, length);
 }
@@ -329,14 +207,14 @@ static void omap_aes_dma_stop(struct omap_aes_dev *dd)
 	omap_aes_write_mask(dd, AES_REG_MASK(dd), 0, mask);
 }
 
-static struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_ctx *ctx)
+struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_reqctx *rctx)
 {
 	struct omap_aes_dev *dd;
 
 	spin_lock_bh(&list_lock);
 	dd = list_first_entry(&dev_list, struct omap_aes_dev, list);
 	list_move_tail(&dd->list, &dev_list);
-	ctx->dd = dd;
+	rctx->dd = dd;
 	spin_unlock_bh(&list_lock);
 
 	return dd;
@@ -387,26 +265,11 @@ static void omap_aes_dma_cleanup(struct omap_aes_dev *dd)
 	dma_release_channel(dd->dma_lch_in);
 }
 
-static void sg_copy_buf(void *buf, struct scatterlist *sg,
-			      unsigned int start, unsigned int nbytes, int out)
+static int omap_aes_crypt_dma(struct omap_aes_dev *dd,
+			      struct scatterlist *in_sg,
+			      struct scatterlist *out_sg,
+			      int in_sg_len, int out_sg_len)
 {
-	struct scatter_walk walk;
-
-	if (!nbytes)
-		return;
-
-	scatterwalk_start(&walk, sg);
-	scatterwalk_advance(&walk, start);
-	scatterwalk_copychunks(buf, &walk, nbytes, out);
-	scatterwalk_done(&walk, out, 0);
-}
-
-static int omap_aes_crypt_dma(struct crypto_tfm *tfm,
-		struct scatterlist *in_sg, struct scatterlist *out_sg,
-		int in_sg_len, int out_sg_len)
-{
-	struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct omap_aes_dev *dd = ctx->dd;
 	struct dma_async_tx_descriptor *tx_in, *tx_out;
 	struct dma_slave_config cfg;
 	int ret;
@@ -467,7 +330,10 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm,
 		return -EINVAL;
 	}
 
-	tx_out->callback = omap_aes_dma_out_callback;
+	if (dd->flags & FLAGS_GCM)
+		tx_out->callback = omap_aes_gcm_dma_out_callback;
+	else
+		tx_out->callback = omap_aes_dma_out_callback;
 	tx_out->callback_param = dd;
 
 	dmaengine_submit(tx_in);
@@ -482,10 +348,8 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm,
 	return 0;
 }
 
-static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
+int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(
-					crypto_ablkcipher_reqtfm(dd->req));
 	int err;
 
 	pr_debug("total: %d\n", dd->total);
@@ -506,7 +370,7 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
 		}
 	}
 
-	err = omap_aes_crypt_dma(tfm, dd->in_sg, dd->out_sg, dd->in_sg_len,
+	err = omap_aes_crypt_dma(dd, dd->in_sg, dd->out_sg, dd->in_sg_len,
 				 dd->out_sg_len);
 	if (err && !dd->pio_only) {
 		dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE);
@@ -529,7 +393,7 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err)
 	pm_runtime_put_autosuspend(dd->dev);
 }
 
-static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
+int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
 {
 	pr_debug("total: %d\n", dd->total);
 
@@ -539,62 +403,6 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
 	return 0;
 }
 
-static int omap_aes_check_aligned(struct scatterlist *sg, int total)
-{
-	int len = 0;
-
-	if (!IS_ALIGNED(total, AES_BLOCK_SIZE))
-		return -EINVAL;
-
-	while (sg) {
-		if (!IS_ALIGNED(sg->offset, 4))
-			return -1;
-		if (!IS_ALIGNED(sg->length, AES_BLOCK_SIZE))
-			return -1;
-
-		len += sg->length;
-		sg = sg_next(sg);
-	}
-
-	if (len != total)
-		return -1;
-
-	return 0;
-}
-
-static int omap_aes_copy_sgs(struct omap_aes_dev *dd)
-{
-	void *buf_in, *buf_out;
-	int pages, total;
-
-	total = ALIGN(dd->total, AES_BLOCK_SIZE);
-	pages = get_order(total);
-
-	buf_in = (void *)__get_free_pages(GFP_ATOMIC, pages);
-	buf_out = (void *)__get_free_pages(GFP_ATOMIC, pages);
-
-	if (!buf_in || !buf_out) {
-		pr_err("Couldn't allocated pages for unaligned cases.\n");
-		return -1;
-	}
-
-	dd->orig_out = dd->out_sg;
-
-	sg_copy_buf(buf_in, dd->in_sg, 0, dd->total, 0);
-
-	sg_init_table(&dd->in_sgl, 1);
-	sg_set_buf(&dd->in_sgl, buf_in, total);
-	dd->in_sg = &dd->in_sgl;
-	dd->in_sg_len = 1;
-
-	sg_init_table(&dd->out_sgl, 1);
-	sg_set_buf(&dd->out_sgl, buf_out, total);
-	dd->out_sg = &dd->out_sgl;
-	dd->out_sg_len = 1;
-
-	return 0;
-}
-
 static int omap_aes_handle_queue(struct omap_aes_dev *dd,
 				 struct ablkcipher_request *req)
 {
@@ -609,8 +417,10 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
 {
 	struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
 			crypto_ablkcipher_reqtfm(req));
-	struct omap_aes_dev *dd = ctx->dd;
-	struct omap_aes_reqctx *rctx;
+	struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct omap_aes_dev *dd = rctx->dd;
+	int ret;
+	u16 flags;
 
 	if (!dd)
 		return -ENODEV;
@@ -621,6 +431,23 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
 	dd->total_save = req->nbytes;
 	dd->in_sg = req->src;
 	dd->out_sg = req->dst;
+	dd->orig_out = req->dst;
+
+	flags = OMAP_CRYPTO_COPY_DATA;
+	if (req->src == req->dst)
+		flags |= OMAP_CRYPTO_FORCE_COPY;
+
+	ret = omap_crypto_align_sg(&dd->in_sg, dd->total, AES_BLOCK_SIZE,
+				   dd->in_sgl, flags,
+				   FLAGS_IN_DATA_ST_SHIFT, &dd->flags);
+	if (ret)
+		return ret;
+
+	ret = omap_crypto_align_sg(&dd->out_sg, dd->total, AES_BLOCK_SIZE,
+				   &dd->out_sgl, 0,
+				   FLAGS_OUT_DATA_ST_SHIFT, &dd->flags);
+	if (ret)
+		return ret;
 
 	dd->in_sg_len = sg_nents_for_len(dd->in_sg, dd->total);
 	if (dd->in_sg_len < 0)
@@ -630,22 +457,11 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
 	if (dd->out_sg_len < 0)
 		return dd->out_sg_len;
 
-	if (omap_aes_check_aligned(dd->in_sg, dd->total) ||
-	    omap_aes_check_aligned(dd->out_sg, dd->total)) {
-		if (omap_aes_copy_sgs(dd))
-			pr_err("Failed to copy SGs for unaligned cases\n");
-		dd->sgs_copied = 1;
-	} else {
-		dd->sgs_copied = 0;
-	}
-
-	rctx = ablkcipher_request_ctx(req);
-	ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
 	rctx->mode &= FLAGS_MODE_MASK;
 	dd->flags = (dd->flags & ~FLAGS_MODE_MASK) | rctx->mode;
 
 	dd->ctx = ctx;
-	ctx->dd = dd;
+	rctx->dd = dd;
 
 	return omap_aes_write_ctrl(dd);
 }
@@ -653,9 +469,8 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
 static int omap_aes_crypt_req(struct crypto_engine *engine,
 			      struct ablkcipher_request *req)
 {
-	struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
-			crypto_ablkcipher_reqtfm(req));
-	struct omap_aes_dev *dd = ctx->dd;
+	struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct omap_aes_dev *dd = rctx->dd;
 
 	if (!dd)
 		return -ENODEV;
@@ -666,8 +481,6 @@ static int omap_aes_crypt_req(struct crypto_engine *engine,
 static void omap_aes_done_task(unsigned long data)
 {
 	struct omap_aes_dev *dd = (struct omap_aes_dev *)data;
-	void *buf_in, *buf_out;
-	int pages, len;
 
 	pr_debug("enter done_task\n");
 
@@ -680,17 +493,11 @@ static void omap_aes_done_task(unsigned long data)
 		omap_aes_crypt_dma_stop(dd);
 	}
 
-	if (dd->sgs_copied) {
-		buf_in = sg_virt(&dd->in_sgl);
-		buf_out = sg_virt(&dd->out_sgl);
+	omap_crypto_cleanup(dd->in_sgl, NULL, 0, dd->total_save,
+			    FLAGS_IN_DATA_ST_SHIFT, dd->flags);
 
-		sg_copy_buf(buf_out, dd->orig_out, 0, dd->total_save, 1);
-
-		len = ALIGN(dd->total_save, AES_BLOCK_SIZE);
-		pages = get_order(len);
-		free_pages((unsigned long)buf_in, pages);
-		free_pages((unsigned long)buf_out, pages);
-	}
+	omap_crypto_cleanup(&dd->out_sgl, dd->orig_out, 0, dd->total_save,
+			    FLAGS_OUT_DATA_ST_SHIFT, dd->flags);
 
 	omap_aes_finish_req(dd, 0);
 
@@ -726,7 +533,7 @@ static int omap_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
 		skcipher_request_zero(subreq);
 		return ret;
 	}
-	dd = omap_aes_find_dev(ctx);
+	dd = omap_aes_find_dev(rctx);
 	if (!dd)
 		return -ENODEV;
 
@@ -811,6 +618,36 @@ static int omap_aes_cra_init(struct crypto_tfm *tfm)
 	return 0;
 }
 
+static int omap_aes_gcm_cra_init(struct crypto_aead *tfm)
+{
+	struct omap_aes_dev *dd = NULL;
+	struct omap_aes_ctx *ctx = crypto_aead_ctx(tfm);
+	int err;
+
+	/* Find AES device, currently picks the first device */
+	spin_lock_bh(&list_lock);
+	list_for_each_entry(dd, &dev_list, list) {
+		break;
+	}
+	spin_unlock_bh(&list_lock);
+
+	err = pm_runtime_get_sync(dd->dev);
+	if (err < 0) {
+		dev_err(dd->dev, "%s: failed to get_sync(%d)\n",
+			__func__, err);
+		return err;
+	}
+
+	tfm->reqsize = sizeof(struct omap_aes_reqctx);
+	ctx->ctr = crypto_alloc_skcipher("ecb(aes)", 0, 0);
+	if (IS_ERR(ctx->ctr)) {
+		pr_warn("could not load aes driver for encrypting IV\n");
+		return PTR_ERR(ctx->ctr);
+	}
+
+	return 0;
+}
+
 static void omap_aes_cra_exit(struct crypto_tfm *tfm)
 {
 	struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -821,6 +658,16 @@ static void omap_aes_cra_exit(struct crypto_tfm *tfm)
 	ctx->fallback = NULL;
 }
 
+static void omap_aes_gcm_cra_exit(struct crypto_aead *tfm)
+{
+	struct omap_aes_ctx *ctx = crypto_aead_ctx(tfm);
+
+	omap_aes_cra_exit(crypto_aead_tfm(tfm));
+
+	if (ctx->ctr)
+		crypto_free_skcipher(ctx->ctr);
+}
+
 /* ********************** ALGS ************************************ */
 
 static struct crypto_alg algs_ecb_cbc[] = {
@@ -905,6 +752,54 @@ static struct omap_aes_algs_info omap_aes_algs_info_ecb_cbc[] = {
 	},
 };
 
+static struct aead_alg algs_aead_gcm[] = {
+{
+	.base = {
+		.cra_name		= "gcm(aes)",
+		.cra_driver_name	= "gcm-aes-omap",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_KERN_DRIVER_ONLY,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct omap_aes_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+	.init		= omap_aes_gcm_cra_init,
+	.exit		= omap_aes_gcm_cra_exit,
+	.ivsize		= 12,
+	.maxauthsize	= AES_BLOCK_SIZE,
+	.setkey		= omap_aes_gcm_setkey,
+	.encrypt	= omap_aes_gcm_encrypt,
+	.decrypt	= omap_aes_gcm_decrypt,
+},
+{
+	.base = {
+		.cra_name		= "rfc4106(gcm(aes))",
+		.cra_driver_name	= "rfc4106-gcm-aes-omap",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_KERN_DRIVER_ONLY,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct omap_aes_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+	.init		= omap_aes_gcm_cra_init,
+	.exit		= omap_aes_gcm_cra_exit,
+	.maxauthsize	= AES_BLOCK_SIZE,
+	.ivsize		= 8,
+	.setkey		= omap_aes_4106gcm_setkey,
+	.encrypt	= omap_aes_4106gcm_encrypt,
+	.decrypt	= omap_aes_4106gcm_decrypt,
+},
+};
+
+static struct omap_aes_aead_algs omap_aes_aead_info = {
+	.algs_list	=	algs_aead_gcm,
+	.size		=	ARRAY_SIZE(algs_aead_gcm),
+};
+
 static const struct omap_aes_pdata omap_aes_pdata_omap2 = {
 	.algs_info	= omap_aes_algs_info_ecb_cbc,
 	.algs_info_size	= ARRAY_SIZE(omap_aes_algs_info_ecb_cbc),
@@ -958,6 +853,7 @@ static const struct omap_aes_pdata omap_aes_pdata_omap3 = {
 static const struct omap_aes_pdata omap_aes_pdata_omap4 = {
 	.algs_info	= omap_aes_algs_info_ecb_cbc_ctr,
 	.algs_info_size	= ARRAY_SIZE(omap_aes_algs_info_ecb_cbc_ctr),
+	.aead_algs_info	= &omap_aes_aead_info,
 	.trigger	= omap_aes_dma_trigger_omap4,
 	.key_ofs	= 0x3c,
 	.iv_ofs		= 0x40,
@@ -1140,6 +1036,7 @@ static int omap_aes_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct omap_aes_dev *dd;
 	struct crypto_alg *algp;
+	struct aead_alg *aalg;
 	struct resource res;
 	int err = -ENOMEM, i, j, irq = -1;
 	u32 reg;
@@ -1152,6 +1049,8 @@ static int omap_aes_probe(struct platform_device *pdev)
 	dd->dev = dev;
 	platform_set_drvdata(pdev, dd);
 
+	aead_init_queue(&dd->aead_queue, OMAP_AES_QUEUE_LENGTH);
+
 	err = (dev->of_node) ? omap_aes_get_res_of(dd, dev, &res) :
 			       omap_aes_get_res_pdev(dd, pdev, &res);
 	if (err)
@@ -1207,6 +1106,7 @@ static int omap_aes_probe(struct platform_device *pdev)
 		}
 	}
 
+	spin_lock_init(&dd->lock);
 
 	INIT_LIST_HEAD(&dd->list);
 	spin_lock(&list_lock);
@@ -1243,7 +1143,29 @@ static int omap_aes_probe(struct platform_device *pdev)
 		}
 	}
 
+	if (dd->pdata->aead_algs_info &&
+	    !dd->pdata->aead_algs_info->registered) {
+		for (i = 0; i < dd->pdata->aead_algs_info->size; i++) {
+			aalg = &dd->pdata->aead_algs_info->algs_list[i];
+			algp = &aalg->base;
+
+			pr_debug("reg alg: %s\n", algp->cra_name);
+			INIT_LIST_HEAD(&algp->cra_list);
+
+			err = crypto_register_aead(aalg);
+			if (err)
+				goto err_aead_algs;
+
+			dd->pdata->aead_algs_info->registered++;
+		}
+	}
+
 	return 0;
+err_aead_algs:
+	for (i = dd->pdata->aead_algs_info->registered - 1; i >= 0; i--) {
+		aalg = &dd->pdata->aead_algs_info->algs_list[i];
+		crypto_unregister_aead(aalg);
+	}
 err_algs:
 	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
@@ -1268,6 +1190,7 @@ static int omap_aes_probe(struct platform_device *pdev)
 static int omap_aes_remove(struct platform_device *pdev)
 {
 	struct omap_aes_dev *dd = platform_get_drvdata(pdev);
+	struct aead_alg *aalg;
 	int i, j;
 
 	if (!dd)
@@ -1282,7 +1205,13 @@ static int omap_aes_remove(struct platform_device *pdev)
 			crypto_unregister_alg(
 					&dd->pdata->algs_info[i].algs_list[j]);
 
+	for (i = dd->pdata->aead_algs_info->size - 1; i >= 0; i--) {
+		aalg = &dd->pdata->aead_algs_info->algs_list[i];
+		crypto_unregister_aead(aalg);
+	}
+
 	crypto_engine_exit(dd->engine);
+
 	tasklet_kill(&dd->done_task);
 	omap_aes_dma_cleanup(dd);
 	pm_runtime_disable(dd->dev);
diff --git a/drivers/crypto/omap-aes.h b/drivers/crypto/omap-aes.h
new file mode 100644
index 0000000..8906342
--- /dev/null
+++ b/drivers/crypto/omap-aes.h
@@ -0,0 +1,214 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for OMAP AES HW ACCELERATOR defines
+ *
+ * Copyright (c) 2015 Texas Instruments Incorporated
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ */
+#ifndef __OMAP_AES_H__
+#define __OMAP_AES_H__
+
+#define DST_MAXBURST			4
+#define DMA_MIN				(DST_MAXBURST * sizeof(u32))
+
+#define _calc_walked(inout) (dd->inout##_walk.offset - dd->inout##_sg->offset)
+
+/*
+ * OMAP TRM gives bitfields as start:end, where start is the higher bit
+ * number. For example 7:0
+ */
+#define FLD_MASK(start, end)	(((1 << ((start) - (end) + 1)) - 1) << (end))
+#define FLD_VAL(val, start, end) (((val) << (end)) & FLD_MASK(start, end))
+
+#define AES_REG_KEY(dd, x)		((dd)->pdata->key_ofs - \
+						(((x) ^ 0x01) * 0x04))
+#define AES_REG_IV(dd, x)		((dd)->pdata->iv_ofs + ((x) * 0x04))
+
+#define AES_REG_CTRL(dd)		((dd)->pdata->ctrl_ofs)
+#define AES_REG_CTRL_CONTEXT_READY	BIT(31)
+#define AES_REG_CTRL_CTR_WIDTH_MASK	GENMASK(8, 7)
+#define AES_REG_CTRL_CTR_WIDTH_32	0
+#define AES_REG_CTRL_CTR_WIDTH_64	BIT(7)
+#define AES_REG_CTRL_CTR_WIDTH_96	BIT(8)
+#define AES_REG_CTRL_CTR_WIDTH_128	GENMASK(8, 7)
+#define AES_REG_CTRL_GCM		GENMASK(17, 16)
+#define AES_REG_CTRL_CTR		BIT(6)
+#define AES_REG_CTRL_CBC		BIT(5)
+#define AES_REG_CTRL_KEY_SIZE		GENMASK(4, 3)
+#define AES_REG_CTRL_DIRECTION		BIT(2)
+#define AES_REG_CTRL_INPUT_READY	BIT(1)
+#define AES_REG_CTRL_OUTPUT_READY	BIT(0)
+#define AES_REG_CTRL_MASK		GENMASK(24, 2)
+
+#define AES_REG_C_LEN_0			0x54
+#define AES_REG_C_LEN_1			0x58
+#define AES_REG_A_LEN			0x5C
+
+#define AES_REG_DATA_N(dd, x)		((dd)->pdata->data_ofs + ((x) * 0x04))
+#define AES_REG_TAG_N(dd, x)		(0x70 + ((x) * 0x04))
+
+#define AES_REG_REV(dd)			((dd)->pdata->rev_ofs)
+
+#define AES_REG_MASK(dd)		((dd)->pdata->mask_ofs)
+#define AES_REG_MASK_SIDLE		BIT(6)
+#define AES_REG_MASK_START		BIT(5)
+#define AES_REG_MASK_DMA_OUT_EN		BIT(3)
+#define AES_REG_MASK_DMA_IN_EN		BIT(2)
+#define AES_REG_MASK_SOFTRESET		BIT(1)
+#define AES_REG_AUTOIDLE		BIT(0)
+
+#define AES_REG_LENGTH_N(x)		(0x54 + ((x) * 0x04))
+
+#define AES_REG_IRQ_STATUS(dd)         ((dd)->pdata->irq_status_ofs)
+#define AES_REG_IRQ_ENABLE(dd)         ((dd)->pdata->irq_enable_ofs)
+#define AES_REG_IRQ_DATA_IN            BIT(1)
+#define AES_REG_IRQ_DATA_OUT           BIT(2)
+#define DEFAULT_TIMEOUT		(5 * HZ)
+
+#define DEFAULT_AUTOSUSPEND_DELAY	1000
+
+#define FLAGS_MODE_MASK		0x001f
+#define FLAGS_ENCRYPT		BIT(0)
+#define FLAGS_CBC		BIT(1)
+#define FLAGS_CTR		BIT(2)
+#define FLAGS_GCM		BIT(3)
+#define FLAGS_RFC4106_GCM	BIT(4)
+
+#define FLAGS_INIT		BIT(5)
+#define FLAGS_FAST		BIT(6)
+#define FLAGS_BUSY		BIT(7)
+
+#define FLAGS_IN_DATA_ST_SHIFT	8
+#define FLAGS_OUT_DATA_ST_SHIFT	10
+#define FLAGS_ASSOC_DATA_ST_SHIFT	12
+
+#define AES_BLOCK_WORDS		(AES_BLOCK_SIZE >> 2)
+
+struct omap_aes_gcm_result {
+	struct completion completion;
+	int err;
+};
+
+struct omap_aes_ctx {
+	int		keylen;
+	u32		key[AES_KEYSIZE_256 / sizeof(u32)];
+	u8		nonce[4];
+	struct crypto_skcipher	*fallback;
+	struct crypto_skcipher	*ctr;
+};
+
+struct omap_aes_reqctx {
+	struct omap_aes_dev *dd;
+	unsigned long mode;
+	u8 iv[AES_BLOCK_SIZE];
+	u32 auth_tag[AES_BLOCK_SIZE / sizeof(u32)];
+};
+
+#define OMAP_AES_QUEUE_LENGTH	1
+#define OMAP_AES_CACHE_SIZE	0
+
+struct omap_aes_algs_info {
+	struct crypto_alg	*algs_list;
+	unsigned int		size;
+	unsigned int		registered;
+};
+
+struct omap_aes_aead_algs {
+	struct aead_alg	*algs_list;
+	unsigned int	size;
+	unsigned int	registered;
+};
+
+struct omap_aes_pdata {
+	struct omap_aes_algs_info	*algs_info;
+	unsigned int	algs_info_size;
+	struct omap_aes_aead_algs	*aead_algs_info;
+
+	void		(*trigger)(struct omap_aes_dev *dd, int length);
+
+	u32		key_ofs;
+	u32		iv_ofs;
+	u32		ctrl_ofs;
+	u32		data_ofs;
+	u32		rev_ofs;
+	u32		mask_ofs;
+	u32             irq_enable_ofs;
+	u32             irq_status_ofs;
+
+	u32		dma_enable_in;
+	u32		dma_enable_out;
+	u32		dma_start;
+
+	u32		major_mask;
+	u32		major_shift;
+	u32		minor_mask;
+	u32		minor_shift;
+};
+
+struct omap_aes_dev {
+	struct list_head	list;
+	unsigned long		phys_base;
+	void __iomem		*io_base;
+	struct omap_aes_ctx	*ctx;
+	struct device		*dev;
+	unsigned long		flags;
+	int			err;
+
+	struct tasklet_struct	done_task;
+	struct aead_queue	aead_queue;
+	spinlock_t		lock;
+
+	struct ablkcipher_request	*req;
+	struct aead_request		*aead_req;
+	struct crypto_engine		*engine;
+
+	/*
+	 * total is used by PIO mode for book keeping so introduce
+	 * variable total_save as need it to calc page_order
+	 */
+	size_t				total;
+	size_t				total_save;
+	size_t				assoc_len;
+	size_t				authsize;
+
+	struct scatterlist		*in_sg;
+	struct scatterlist		*out_sg;
+
+	/* Buffers for copying for unaligned cases */
+	struct scatterlist		in_sgl[2];
+	struct scatterlist		out_sgl;
+	struct scatterlist		*orig_out;
+
+	struct scatter_walk		in_walk;
+	struct scatter_walk		out_walk;
+	struct dma_chan		*dma_lch_in;
+	struct dma_chan		*dma_lch_out;
+	int			in_sg_len;
+	int			out_sg_len;
+	int			pio_only;
+	const struct omap_aes_pdata	*pdata;
+};
+
+u32 omap_aes_read(struct omap_aes_dev *dd, u32 offset);
+void omap_aes_write(struct omap_aes_dev *dd, u32 offset, u32 value);
+struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_reqctx *rctx);
+int omap_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
+			unsigned int keylen);
+int omap_aes_4106gcm_setkey(struct crypto_aead *tfm, const u8 *key,
+			    unsigned int keylen);
+int omap_aes_gcm_encrypt(struct aead_request *req);
+int omap_aes_gcm_decrypt(struct aead_request *req);
+int omap_aes_4106gcm_encrypt(struct aead_request *req);
+int omap_aes_4106gcm_decrypt(struct aead_request *req);
+int omap_aes_write_ctrl(struct omap_aes_dev *dd);
+int omap_aes_crypt_dma_start(struct omap_aes_dev *dd);
+int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd);
+void omap_aes_gcm_dma_out_callback(void *data);
+void omap_aes_clear_copy_flags(struct omap_aes_dev *dd);
+
+#endif
diff --git a/drivers/crypto/omap-crypto.c b/drivers/crypto/omap-crypto.c
new file mode 100644
index 0000000..23e3777
--- /dev/null
+++ b/drivers/crypto/omap-crypto.c
@@ -0,0 +1,184 @@
+/*
+ * OMAP Crypto driver common support routines.
+ *
+ * Copyright (c) 2017 Texas Instruments Incorporated
+ *   Tero Kristo <t-kristo@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/scatterlist.h>
+#include <crypto/scatterwalk.h>
+
+#include "omap-crypto.h"
+
+static int omap_crypto_copy_sg_lists(int total, int bs,
+				     struct scatterlist **sg,
+				     struct scatterlist *new_sg, u16 flags)
+{
+	int n = sg_nents(*sg);
+	struct scatterlist *tmp;
+
+	if (!(flags & OMAP_CRYPTO_FORCE_SINGLE_ENTRY)) {
+		new_sg = kmalloc_array(n, sizeof(*sg), GFP_KERNEL);
+		if (!new_sg)
+			return -ENOMEM;
+
+		sg_init_table(new_sg, n);
+	}
+
+	tmp = new_sg;
+
+	while (*sg && total) {
+		int len = (*sg)->length;
+
+		if (total < len)
+			len = total;
+
+		if (len > 0) {
+			total -= len;
+			sg_set_page(tmp, sg_page(*sg), len, (*sg)->offset);
+			if (total <= 0)
+				sg_mark_end(tmp);
+			tmp = sg_next(tmp);
+		}
+
+		*sg = sg_next(*sg);
+	}
+
+	*sg = new_sg;
+
+	return 0;
+}
+
+static int omap_crypto_copy_sgs(int total, int bs, struct scatterlist **sg,
+				struct scatterlist *new_sg, u16 flags)
+{
+	void *buf;
+	int pages;
+	int new_len;
+
+	new_len = ALIGN(total, bs);
+	pages = get_order(new_len);
+
+	buf = (void *)__get_free_pages(GFP_ATOMIC, pages);
+	if (!buf) {
+		pr_err("%s: Couldn't allocate pages for unaligned cases.\n",
+		       __func__);
+		return -ENOMEM;
+	}
+
+	if (flags & OMAP_CRYPTO_COPY_DATA) {
+		scatterwalk_map_and_copy(buf, *sg, 0, total, 0);
+		if (flags & OMAP_CRYPTO_ZERO_BUF)
+			memset(buf + total, 0, new_len - total);
+	}
+
+	if (!(flags & OMAP_CRYPTO_FORCE_SINGLE_ENTRY))
+		sg_init_table(new_sg, 1);
+
+	sg_set_buf(new_sg, buf, new_len);
+
+	*sg = new_sg;
+
+	return 0;
+}
+
+static int omap_crypto_check_sg(struct scatterlist *sg, int total, int bs,
+				u16 flags)
+{
+	int len = 0;
+	int num_sg = 0;
+
+	if (!IS_ALIGNED(total, bs))
+		return OMAP_CRYPTO_NOT_ALIGNED;
+
+	while (sg) {
+		num_sg++;
+
+		if (!IS_ALIGNED(sg->offset, 4))
+			return OMAP_CRYPTO_NOT_ALIGNED;
+		if (!IS_ALIGNED(sg->length, bs))
+			return OMAP_CRYPTO_NOT_ALIGNED;
+
+		len += sg->length;
+		sg = sg_next(sg);
+
+		if (len >= total)
+			break;
+	}
+
+	if ((flags & OMAP_CRYPTO_FORCE_SINGLE_ENTRY) && num_sg > 1)
+		return OMAP_CRYPTO_NOT_ALIGNED;
+
+	if (len != total)
+		return OMAP_CRYPTO_BAD_DATA_LENGTH;
+
+	return 0;
+}
+
+int omap_crypto_align_sg(struct scatterlist **sg, int total, int bs,
+			 struct scatterlist *new_sg, u16 flags,
+			 u8 flags_shift, unsigned long *dd_flags)
+{
+	int ret;
+
+	*dd_flags &= ~(OMAP_CRYPTO_COPY_MASK << flags_shift);
+
+	if (flags & OMAP_CRYPTO_FORCE_COPY)
+		ret = OMAP_CRYPTO_NOT_ALIGNED;
+	else
+		ret = omap_crypto_check_sg(*sg, total, bs, flags);
+
+	if (ret == OMAP_CRYPTO_NOT_ALIGNED) {
+		ret = omap_crypto_copy_sgs(total, bs, sg, new_sg, flags);
+		if (ret)
+			return ret;
+		*dd_flags |= OMAP_CRYPTO_DATA_COPIED << flags_shift;
+	} else if (ret == OMAP_CRYPTO_BAD_DATA_LENGTH) {
+		ret = omap_crypto_copy_sg_lists(total, bs, sg, new_sg, flags);
+		if (ret)
+			return ret;
+		if (!(flags & OMAP_CRYPTO_FORCE_SINGLE_ENTRY))
+			*dd_flags |= OMAP_CRYPTO_SG_COPIED << flags_shift;
+	} else if (flags & OMAP_CRYPTO_FORCE_SINGLE_ENTRY) {
+		sg_set_buf(new_sg, sg_virt(*sg), (*sg)->length);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(omap_crypto_align_sg);
+
+void omap_crypto_cleanup(struct scatterlist *sg, struct scatterlist *orig,
+			 int offset, int len, u8 flags_shift,
+			 unsigned long flags)
+{
+	void *buf;
+	int pages;
+
+	flags >>= flags_shift;
+	flags &= OMAP_CRYPTO_COPY_MASK;
+
+	if (!flags)
+		return;
+
+	buf = sg_virt(sg);
+	pages = get_order(len);
+
+	if (orig && (flags & OMAP_CRYPTO_COPY_MASK))
+		scatterwalk_map_and_copy(buf, orig, offset, len, 1);
+
+	if (flags & OMAP_CRYPTO_DATA_COPIED)
+		free_pages((unsigned long)buf, pages);
+	else if (flags & OMAP_CRYPTO_SG_COPIED)
+		kfree(sg);
+}
+EXPORT_SYMBOL_GPL(omap_crypto_cleanup);
+
+MODULE_DESCRIPTION("OMAP crypto support library.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tero Kristo <t-kristo@ti.com>");
diff --git a/drivers/crypto/omap-crypto.h b/drivers/crypto/omap-crypto.h
new file mode 100644
index 0000000..36a230e
--- /dev/null
+++ b/drivers/crypto/omap-crypto.h
@@ -0,0 +1,37 @@
+/*
+ * OMAP Crypto driver common support routines.
+ *
+ * Copyright (c) 2017 Texas Instruments Incorporated
+ *   Tero Kristo <t-kristo@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef __CRYPTO_OMAP_CRYPTO_H
+#define __CRYPTO_OMAP_CRYPTO_H
+
+enum {
+	OMAP_CRYPTO_NOT_ALIGNED = 1,
+	OMAP_CRYPTO_BAD_DATA_LENGTH,
+};
+
+#define OMAP_CRYPTO_DATA_COPIED		BIT(0)
+#define OMAP_CRYPTO_SG_COPIED		BIT(1)
+
+#define OMAP_CRYPTO_COPY_MASK		0x3
+
+#define OMAP_CRYPTO_COPY_DATA		BIT(0)
+#define OMAP_CRYPTO_FORCE_COPY		BIT(1)
+#define OMAP_CRYPTO_ZERO_BUF		BIT(2)
+#define OMAP_CRYPTO_FORCE_SINGLE_ENTRY	BIT(3)
+
+int omap_crypto_align_sg(struct scatterlist **sg, int total, int bs,
+			 struct scatterlist *new_sg, u16 flags,
+			 u8 flags_shift, unsigned long *dd_flags);
+void omap_crypto_cleanup(struct scatterlist *sg, struct scatterlist *orig,
+			 int offset, int len, u8 flags_shift,
+			 unsigned long flags);
+
+#endif
diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c
index a6f6553..0bcab00 100644
--- a/drivers/crypto/omap-des.c
+++ b/drivers/crypto/omap-des.c
@@ -41,6 +41,8 @@
 #include <crypto/algapi.h>
 #include <crypto/engine.h>
 
+#include "omap-crypto.h"
+
 #define DST_MAXBURST			2
 
 #define DES_BLOCK_WORDS		(DES_BLOCK_SIZE >> 2)
@@ -78,6 +80,11 @@
 #define FLAGS_INIT		BIT(4)
 #define FLAGS_BUSY		BIT(6)
 
+#define DEFAULT_AUTOSUSPEND_DELAY	1000
+
+#define FLAGS_IN_DATA_ST_SHIFT	8
+#define FLAGS_OUT_DATA_ST_SHIFT	10
+
 struct omap_des_ctx {
 	struct omap_des_dev *dd;
 
@@ -151,7 +158,6 @@ struct omap_des_dev {
 	struct scatterlist		in_sgl;
 	struct scatterlist		out_sgl;
 	struct scatterlist		*orig_out;
-	int				sgs_copied;
 
 	struct scatter_walk		in_walk;
 	struct scatter_walk		out_walk;
@@ -370,20 +376,6 @@ static void omap_des_dma_cleanup(struct omap_des_dev *dd)
 	dma_release_channel(dd->dma_lch_in);
 }
 
-static void sg_copy_buf(void *buf, struct scatterlist *sg,
-			      unsigned int start, unsigned int nbytes, int out)
-{
-	struct scatter_walk walk;
-
-	if (!nbytes)
-		return;
-
-	scatterwalk_start(&walk, sg);
-	scatterwalk_advance(&walk, start);
-	scatterwalk_copychunks(buf, &walk, nbytes, out);
-	scatterwalk_done(&walk, out, 0);
-}
-
 static int omap_des_crypt_dma(struct crypto_tfm *tfm,
 		struct scatterlist *in_sg, struct scatterlist *out_sg,
 		int in_sg_len, int out_sg_len)
@@ -506,8 +498,10 @@ static void omap_des_finish_req(struct omap_des_dev *dd, int err)
 
 	pr_debug("err: %d\n", err);
 
-	pm_runtime_put(dd->dev);
 	crypto_finalize_cipher_request(dd->engine, req, err);
+
+	pm_runtime_mark_last_busy(dd->dev);
+	pm_runtime_put_autosuspend(dd->dev);
 }
 
 static int omap_des_crypt_dma_stop(struct omap_des_dev *dd)
@@ -522,55 +516,6 @@ static int omap_des_crypt_dma_stop(struct omap_des_dev *dd)
 	return 0;
 }
 
-static int omap_des_copy_needed(struct scatterlist *sg)
-{
-	while (sg) {
-		if (!IS_ALIGNED(sg->offset, 4))
-			return -1;
-		if (!IS_ALIGNED(sg->length, DES_BLOCK_SIZE))
-			return -1;
-		sg = sg_next(sg);
-	}
-	return 0;
-}
-
-static int omap_des_copy_sgs(struct omap_des_dev *dd)
-{
-	void *buf_in, *buf_out;
-	int pages;
-
-	pages = dd->total >> PAGE_SHIFT;
-
-	if (dd->total & (PAGE_SIZE-1))
-		pages++;
-
-	BUG_ON(!pages);
-
-	buf_in = (void *)__get_free_pages(GFP_ATOMIC, pages);
-	buf_out = (void *)__get_free_pages(GFP_ATOMIC, pages);
-
-	if (!buf_in || !buf_out) {
-		pr_err("Couldn't allocated pages for unaligned cases.\n");
-		return -1;
-	}
-
-	dd->orig_out = dd->out_sg;
-
-	sg_copy_buf(buf_in, dd->in_sg, 0, dd->total, 0);
-
-	sg_init_table(&dd->in_sgl, 1);
-	sg_set_buf(&dd->in_sgl, buf_in, dd->total);
-	dd->in_sg = &dd->in_sgl;
-	dd->in_sg_len = 1;
-
-	sg_init_table(&dd->out_sgl, 1);
-	sg_set_buf(&dd->out_sgl, buf_out, dd->total);
-	dd->out_sg = &dd->out_sgl;
-	dd->out_sg_len = 1;
-
-	return 0;
-}
-
 static int omap_des_handle_queue(struct omap_des_dev *dd,
 				 struct ablkcipher_request *req)
 {
@@ -587,6 +532,8 @@ static int omap_des_prepare_req(struct crypto_engine *engine,
 			crypto_ablkcipher_reqtfm(req));
 	struct omap_des_dev *dd = omap_des_find_dev(ctx);
 	struct omap_des_reqctx *rctx;
+	int ret;
+	u16 flags;
 
 	if (!dd)
 		return -ENODEV;
@@ -597,6 +544,23 @@ static int omap_des_prepare_req(struct crypto_engine *engine,
 	dd->total_save = req->nbytes;
 	dd->in_sg = req->src;
 	dd->out_sg = req->dst;
+	dd->orig_out = req->dst;
+
+	flags = OMAP_CRYPTO_COPY_DATA;
+	if (req->src == req->dst)
+		flags |= OMAP_CRYPTO_FORCE_COPY;
+
+	ret = omap_crypto_align_sg(&dd->in_sg, dd->total, DES_BLOCK_SIZE,
+				   &dd->in_sgl, flags,
+				   FLAGS_IN_DATA_ST_SHIFT, &dd->flags);
+	if (ret)
+		return ret;
+
+	ret = omap_crypto_align_sg(&dd->out_sg, dd->total, DES_BLOCK_SIZE,
+				   &dd->out_sgl, 0,
+				   FLAGS_OUT_DATA_ST_SHIFT, &dd->flags);
+	if (ret)
+		return ret;
 
 	dd->in_sg_len = sg_nents_for_len(dd->in_sg, dd->total);
 	if (dd->in_sg_len < 0)
@@ -606,15 +570,6 @@ static int omap_des_prepare_req(struct crypto_engine *engine,
 	if (dd->out_sg_len < 0)
 		return dd->out_sg_len;
 
-	if (omap_des_copy_needed(dd->in_sg) ||
-	    omap_des_copy_needed(dd->out_sg)) {
-		if (omap_des_copy_sgs(dd))
-			pr_err("Failed to copy SGs for unaligned cases\n");
-		dd->sgs_copied = 1;
-	} else {
-		dd->sgs_copied = 0;
-	}
-
 	rctx = ablkcipher_request_ctx(req);
 	ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
 	rctx->mode &= FLAGS_MODE_MASK;
@@ -642,8 +597,6 @@ static int omap_des_crypt_req(struct crypto_engine *engine,
 static void omap_des_done_task(unsigned long data)
 {
 	struct omap_des_dev *dd = (struct omap_des_dev *)data;
-	void *buf_in, *buf_out;
-	int pages;
 
 	pr_debug("enter done_task\n");
 
@@ -656,16 +609,11 @@ static void omap_des_done_task(unsigned long data)
 		omap_des_crypt_dma_stop(dd);
 	}
 
-	if (dd->sgs_copied) {
-		buf_in = sg_virt(&dd->in_sgl);
-		buf_out = sg_virt(&dd->out_sgl);
+	omap_crypto_cleanup(&dd->in_sgl, NULL, 0, dd->total_save,
+			    FLAGS_IN_DATA_ST_SHIFT, dd->flags);
 
-		sg_copy_buf(buf_out, dd->orig_out, 0, dd->total_save, 1);
-
-		pages = get_order(dd->total_save);
-		free_pages((unsigned long)buf_in, pages);
-		free_pages((unsigned long)buf_out, pages);
-	}
+	omap_crypto_cleanup(&dd->out_sgl, dd->orig_out, 0, dd->total_save,
+			    FLAGS_OUT_DATA_ST_SHIFT, dd->flags);
 
 	omap_des_finish_req(dd, 0);
 
@@ -699,16 +647,28 @@ static int omap_des_crypt(struct ablkcipher_request *req, unsigned long mode)
 
 /* ********************** ALG API ************************************ */
 
-static int omap_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int omap_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 			   unsigned int keylen)
 {
-	struct omap_des_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct omap_des_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
 
 	if (keylen != DES_KEY_SIZE && keylen != (3*DES_KEY_SIZE))
 		return -EINVAL;
 
 	pr_debug("enter, keylen: %d\n", keylen);
 
+	/* Do we need to test against weak key? */
+	if (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY) {
+		u32 tmp[DES_EXPKEY_WORDS];
+		int ret = des_ekey(tmp, key);
+
+		if (!ret) {
+			tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
+			return -EINVAL;
+		}
+	}
+
 	memcpy(ctx->key, key, keylen);
 	ctx->keylen = keylen;
 
@@ -1032,8 +992,10 @@ static int omap_des_probe(struct platform_device *pdev)
 	}
 	dd->phys_base = res->start;
 
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, DEFAULT_AUTOSUSPEND_DELAY);
+
 	pm_runtime_enable(dev);
-	pm_runtime_irq_safe(dev);
 	err = pm_runtime_get_sync(dev);
 	if (err < 0) {
 		pm_runtime_put_noidle(dev);
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index d0b16e5..9ad9d39 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -41,6 +41,7 @@
 #include <crypto/algapi.h>
 #include <crypto/sha.h>
 #include <crypto/hash.h>
+#include <crypto/hmac.h>
 #include <crypto/internal/hash.h>
 
 #define MD5_DIGEST_SIZE			16
@@ -225,7 +226,7 @@ struct omap_sham_dev {
 	struct dma_chan		*dma_lch;
 	struct tasklet_struct	done_task;
 	u8			polling_mode;
-	u8			xmit_buf[BUFLEN];
+	u8			xmit_buf[BUFLEN] OMAP_ALIGNED;
 
 	unsigned long		flags;
 	struct crypto_queue	queue;
@@ -750,7 +751,10 @@ static int omap_sham_align_sgs(struct scatterlist *sg,
 	if (final)
 		new_len = DIV_ROUND_UP(new_len, bs) * bs;
 	else
-		new_len = new_len / bs * bs;
+		new_len = (new_len - 1) / bs * bs;
+
+	if (nbytes != new_len)
+		list_ok = false;
 
 	while (nbytes > 0 && sg_tmp) {
 		n++;
@@ -846,6 +850,8 @@ static int omap_sham_prepare_request(struct ahash_request *req, bool update)
 			xmit_len = DIV_ROUND_UP(xmit_len, bs) * bs;
 		else
 			xmit_len = xmit_len / bs * bs;
+	} else if (!final) {
+		xmit_len -= bs;
 	}
 
 	hash_later = rctx->total - xmit_len;
@@ -873,14 +879,21 @@ static int omap_sham_prepare_request(struct ahash_request *req, bool update)
 	}
 
 	if (hash_later) {
-		if (req->nbytes) {
-			scatterwalk_map_and_copy(rctx->buffer, req->src,
-						 req->nbytes - hash_later,
-						 hash_later, 0);
-		} else {
+		int offset = 0;
+
+		if (hash_later > req->nbytes) {
 			memcpy(rctx->buffer, rctx->buffer + xmit_len,
-			       hash_later);
+			       hash_later - req->nbytes);
+			offset = hash_later - req->nbytes;
 		}
+
+		if (req->nbytes) {
+			scatterwalk_map_and_copy(rctx->buffer + offset,
+						 req->src,
+						 offset + req->nbytes -
+						 hash_later, hash_later, 0);
+		}
+
 		rctx->bufcnt = hash_later;
 	} else {
 		rctx->bufcnt = 0;
@@ -1130,7 +1143,7 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd,
 	ctx = ahash_request_ctx(req);
 
 	err = omap_sham_prepare_request(req, ctx->op == OP_UPDATE);
-	if (err)
+	if (err || !ctx->total)
 		goto err1;
 
 	dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n",
@@ -1189,11 +1202,10 @@ static int omap_sham_update(struct ahash_request *req)
 	if (!req->nbytes)
 		return 0;
 
-	if (ctx->total + req->nbytes < ctx->buflen) {
+	if (ctx->bufcnt + req->nbytes <= ctx->buflen) {
 		scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, req->src,
 					 0, req->nbytes, 0);
 		ctx->bufcnt += req->nbytes;
-		ctx->total += req->nbytes;
 		return 0;
 	}
 
@@ -1326,8 +1338,8 @@ static int omap_sham_setkey(struct crypto_ahash *tfm, const u8 *key,
 		memcpy(bctx->opad, bctx->ipad, bs);
 
 		for (i = 0; i < bs; i++) {
-			bctx->ipad[i] ^= 0x36;
-			bctx->opad[i] ^= 0x5c;
+			bctx->ipad[i] ^= HMAC_IPAD_VALUE;
+			bctx->opad[i] ^= HMAC_OPAD_VALUE;
 		}
 	}
 
diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c
index 2839fcc..d3e25c3 100644
--- a/drivers/crypto/qat/qat_common/adf_aer.c
+++ b/drivers/crypto/qat/qat_common/adf_aer.c
@@ -109,20 +109,7 @@ EXPORT_SYMBOL_GPL(adf_reset_sbr);
 
 void adf_reset_flr(struct adf_accel_dev *accel_dev)
 {
-	struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
-	u16 control = 0;
-	int pos = 0;
-
-	dev_info(&GET_DEV(accel_dev), "Function level reset\n");
-	pos = pci_pcie_cap(pdev);
-	if (!pos) {
-		dev_err(&GET_DEV(accel_dev), "Restart device failed\n");
-		return;
-	}
-	pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &control);
-	control |= PCI_EXP_DEVCTL_BCR_FLR;
-	pci_write_config_word(pdev, pos + PCI_EXP_DEVCTL, control);
-	msleep(100);
+	pcie_flr(accel_to_pci_dev(accel_dev));
 }
 EXPORT_SYMBOL_GPL(adf_reset_flr);
 
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index 20f35df..5b5efcc 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -51,6 +51,7 @@
 #include <crypto/aes.h>
 #include <crypto/sha.h>
 #include <crypto/hash.h>
+#include <crypto/hmac.h>
 #include <crypto/algapi.h>
 #include <crypto/authenc.h>
 #include <linux/dma-mapping.h>
@@ -178,8 +179,8 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash,
 	for (i = 0; i < block_size; i++) {
 		char *ipad_ptr = ipad + i;
 		char *opad_ptr = opad + i;
-		*ipad_ptr ^= 0x36;
-		*opad_ptr ^= 0x5C;
+		*ipad_ptr ^= HMAC_IPAD_VALUE;
+		*opad_ptr ^= HMAC_OPAD_VALUE;
 	}
 
 	if (crypto_shash_init(shash))
diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c
index 2aab80b..6f5dd68 100644
--- a/drivers/crypto/qat/qat_common/qat_asym_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c
@@ -521,11 +521,11 @@ static int qat_dh_set_secret(struct crypto_kpp *tfm, const void *buf,
 	return 0;
 }
 
-static int qat_dh_max_size(struct crypto_kpp *tfm)
+static unsigned int qat_dh_max_size(struct crypto_kpp *tfm)
 {
 	struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm);
 
-	return ctx->p ? ctx->p_size : -EINVAL;
+	return ctx->p_size;
 }
 
 static int qat_dh_init_tfm(struct crypto_kpp *tfm)
@@ -1256,11 +1256,11 @@ static int qat_rsa_setprivkey(struct crypto_akcipher *tfm, const void *key,
 	return qat_rsa_setkey(tfm, key, keylen, true);
 }
 
-static int qat_rsa_max_size(struct crypto_akcipher *tfm)
+static unsigned int qat_rsa_max_size(struct crypto_akcipher *tfm)
 {
 	struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
 
-	return (ctx->n) ? ctx->key_sz : -EINVAL;
+	return ctx->key_sz;
 }
 
 static int qat_rsa_init_tfm(struct crypto_akcipher *tfm)
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
index 90efd10..5cf6474 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
+++ b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
@@ -16,13 +16,13 @@
  */
 #include "sun4i-ss.h"
 
-static int sun4i_ss_opti_poll(struct ablkcipher_request *areq)
+static int sun4i_ss_opti_poll(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
-	struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
 	struct sun4i_ss_ctx *ss = op->ss;
-	unsigned int ivsize = crypto_ablkcipher_ivsize(tfm);
-	struct sun4i_cipher_req_ctx *ctx = ablkcipher_request_ctx(areq);
+	unsigned int ivsize = crypto_skcipher_ivsize(tfm);
+	struct sun4i_cipher_req_ctx *ctx = skcipher_request_ctx(areq);
 	u32 mode = ctx->mode;
 	/* when activating SS, the default FIFO space is SS_RX_DEFAULT(32) */
 	u32 rx_cnt = SS_RX_DEFAULT;
@@ -31,17 +31,17 @@ static int sun4i_ss_opti_poll(struct ablkcipher_request *areq)
 	u32 v;
 	int err = 0;
 	unsigned int i;
-	unsigned int ileft = areq->nbytes;
-	unsigned int oleft = areq->nbytes;
+	unsigned int ileft = areq->cryptlen;
+	unsigned int oleft = areq->cryptlen;
 	unsigned int todo;
 	struct sg_mapping_iter mi, mo;
 	unsigned int oi, oo; /* offset for in and out */
 	unsigned long flags;
 
-	if (areq->nbytes == 0)
+	if (!areq->cryptlen)
 		return 0;
 
-	if (!areq->info) {
+	if (!areq->iv) {
 		dev_err_ratelimited(ss->dev, "ERROR: Empty IV\n");
 		return -EINVAL;
 	}
@@ -56,9 +56,9 @@ static int sun4i_ss_opti_poll(struct ablkcipher_request *areq)
 	for (i = 0; i < op->keylen; i += 4)
 		writel(*(op->key + i / 4), ss->base + SS_KEY0 + i);
 
-	if (areq->info) {
+	if (areq->iv) {
 		for (i = 0; i < 4 && i < ivsize / 4; i++) {
-			v = *(u32 *)(areq->info + i * 4);
+			v = *(u32 *)(areq->iv + i * 4);
 			writel(v, ss->base + SS_IV0 + i * 4);
 		}
 	}
@@ -76,13 +76,13 @@ static int sun4i_ss_opti_poll(struct ablkcipher_request *areq)
 		goto release_ss;
 	}
 
-	ileft = areq->nbytes / 4;
-	oleft = areq->nbytes / 4;
+	ileft = areq->cryptlen / 4;
+	oleft = areq->cryptlen / 4;
 	oi = 0;
 	oo = 0;
 	do {
 		todo = min3(rx_cnt, ileft, (mi.length - oi) / 4);
-		if (todo > 0) {
+		if (todo) {
 			ileft -= todo;
 			writesl(ss->base + SS_RXFIFO, mi.addr + oi, todo);
 			oi += todo * 4;
@@ -97,7 +97,7 @@ static int sun4i_ss_opti_poll(struct ablkcipher_request *areq)
 		tx_cnt = SS_TXFIFO_SPACES(spaces);
 
 		todo = min3(tx_cnt, oleft, (mo.length - oo) / 4);
-		if (todo > 0) {
+		if (todo) {
 			oleft -= todo;
 			readsl(ss->base + SS_TXFIFO, mo.addr + oo, todo);
 			oo += todo * 4;
@@ -106,12 +106,12 @@ static int sun4i_ss_opti_poll(struct ablkcipher_request *areq)
 			sg_miter_next(&mo);
 			oo = 0;
 		}
-	} while (oleft > 0);
+	} while (oleft);
 
-	if (areq->info) {
+	if (areq->iv) {
 		for (i = 0; i < 4 && i < ivsize / 4; i++) {
 			v = readl(ss->base + SS_IV0 + i * 4);
-			*(u32 *)(areq->info + i * 4) = v;
+			*(u32 *)(areq->iv + i * 4) = v;
 		}
 	}
 
@@ -124,16 +124,16 @@ static int sun4i_ss_opti_poll(struct ablkcipher_request *areq)
 }
 
 /* Generic function that support SG with size not multiple of 4 */
-static int sun4i_ss_cipher_poll(struct ablkcipher_request *areq)
+static int sun4i_ss_cipher_poll(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
-	struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
 	struct sun4i_ss_ctx *ss = op->ss;
 	int no_chunk = 1;
 	struct scatterlist *in_sg = areq->src;
 	struct scatterlist *out_sg = areq->dst;
-	unsigned int ivsize = crypto_ablkcipher_ivsize(tfm);
-	struct sun4i_cipher_req_ctx *ctx = ablkcipher_request_ctx(areq);
+	unsigned int ivsize = crypto_skcipher_ivsize(tfm);
+	struct sun4i_cipher_req_ctx *ctx = skcipher_request_ctx(areq);
 	u32 mode = ctx->mode;
 	/* when activating SS, the default FIFO space is SS_RX_DEFAULT(32) */
 	u32 rx_cnt = SS_RX_DEFAULT;
@@ -142,8 +142,8 @@ static int sun4i_ss_cipher_poll(struct ablkcipher_request *areq)
 	u32 spaces;
 	int err = 0;
 	unsigned int i;
-	unsigned int ileft = areq->nbytes;
-	unsigned int oleft = areq->nbytes;
+	unsigned int ileft = areq->cryptlen;
+	unsigned int oleft = areq->cryptlen;
 	unsigned int todo;
 	struct sg_mapping_iter mi, mo;
 	unsigned int oi, oo;	/* offset for in and out */
@@ -154,10 +154,10 @@ static int sun4i_ss_cipher_poll(struct ablkcipher_request *areq)
 	unsigned int obl = 0;	/* length of data in bufo */
 	unsigned long flags;
 
-	if (areq->nbytes == 0)
+	if (!areq->cryptlen)
 		return 0;
 
-	if (!areq->info) {
+	if (!areq->iv) {
 		dev_err_ratelimited(ss->dev, "ERROR: Empty IV\n");
 		return -EINVAL;
 	}
@@ -172,12 +172,12 @@ static int sun4i_ss_cipher_poll(struct ablkcipher_request *areq)
 	 * we can use the SS optimized function
 	 */
 	while (in_sg && no_chunk == 1) {
-		if ((in_sg->length % 4) != 0)
+		if (in_sg->length % 4)
 			no_chunk = 0;
 		in_sg = sg_next(in_sg);
 	}
 	while (out_sg && no_chunk == 1) {
-		if ((out_sg->length % 4) != 0)
+		if (out_sg->length % 4)
 			no_chunk = 0;
 		out_sg = sg_next(out_sg);
 	}
@@ -190,9 +190,9 @@ static int sun4i_ss_cipher_poll(struct ablkcipher_request *areq)
 	for (i = 0; i < op->keylen; i += 4)
 		writel(*(op->key + i / 4), ss->base + SS_KEY0 + i);
 
-	if (areq->info) {
+	if (areq->iv) {
 		for (i = 0; i < 4 && i < ivsize / 4; i++) {
-			v = *(u32 *)(areq->info + i * 4);
+			v = *(u32 *)(areq->iv + i * 4);
 			writel(v, ss->base + SS_IV0 + i * 4);
 		}
 	}
@@ -209,19 +209,19 @@ static int sun4i_ss_cipher_poll(struct ablkcipher_request *areq)
 		err = -EINVAL;
 		goto release_ss;
 	}
-	ileft = areq->nbytes;
-	oleft = areq->nbytes;
+	ileft = areq->cryptlen;
+	oleft = areq->cryptlen;
 	oi = 0;
 	oo = 0;
 
-	while (oleft > 0) {
-		if (ileft > 0) {
+	while (oleft) {
+		if (ileft) {
 			/*
 			 * todo is the number of consecutive 4byte word that we
 			 * can read from current SG
 			 */
 			todo = min3(rx_cnt, ileft / 4, (mi.length - oi) / 4);
-			if (todo > 0 && ob == 0) {
+			if (todo && !ob) {
 				writesl(ss->base + SS_RXFIFO, mi.addr + oi,
 					todo);
 				ileft -= todo * 4;
@@ -240,7 +240,7 @@ static int sun4i_ss_cipher_poll(struct ablkcipher_request *areq)
 				ileft -= todo;
 				oi += todo;
 				ob += todo;
-				if (ob % 4 == 0) {
+				if (!(ob % 4)) {
 					writesl(ss->base + SS_RXFIFO, buf,
 						ob / 4);
 					ob = 0;
@@ -257,14 +257,14 @@ static int sun4i_ss_cipher_poll(struct ablkcipher_request *areq)
 		tx_cnt = SS_TXFIFO_SPACES(spaces);
 		dev_dbg(ss->dev, "%x %u/%u %u/%u cnt=%u %u/%u %u/%u cnt=%u %u\n",
 			mode,
-			oi, mi.length, ileft, areq->nbytes, rx_cnt,
-			oo, mo.length, oleft, areq->nbytes, tx_cnt, ob);
+			oi, mi.length, ileft, areq->cryptlen, rx_cnt,
+			oo, mo.length, oleft, areq->cryptlen, tx_cnt, ob);
 
-		if (tx_cnt == 0)
+		if (!tx_cnt)
 			continue;
 		/* todo in 4bytes word */
 		todo = min3(tx_cnt, oleft / 4, (mo.length - oo) / 4);
-		if (todo > 0) {
+		if (todo) {
 			readsl(ss->base + SS_TXFIFO, mo.addr + oo, todo);
 			oleft -= todo * 4;
 			oo += todo * 4;
@@ -300,10 +300,10 @@ static int sun4i_ss_cipher_poll(struct ablkcipher_request *areq)
 			/* bufo must be fully used here */
 		}
 	}
-	if (areq->info) {
+	if (areq->iv) {
 		for (i = 0; i < 4 && i < ivsize / 4; i++) {
 			v = readl(ss->base + SS_IV0 + i * 4);
-			*(u32 *)(areq->info + i * 4) = v;
+			*(u32 *)(areq->iv + i * 4) = v;
 		}
 	}
 
@@ -317,22 +317,22 @@ static int sun4i_ss_cipher_poll(struct ablkcipher_request *areq)
 }
 
 /* CBC AES */
-int sun4i_ss_cbc_aes_encrypt(struct ablkcipher_request *areq)
+int sun4i_ss_cbc_aes_encrypt(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
-	struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
-	struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun4i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
 
 	rctx->mode = SS_OP_AES | SS_CBC | SS_ENABLED | SS_ENCRYPTION |
 		op->keymode;
 	return sun4i_ss_cipher_poll(areq);
 }
 
-int sun4i_ss_cbc_aes_decrypt(struct ablkcipher_request *areq)
+int sun4i_ss_cbc_aes_decrypt(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
-	struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
-	struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun4i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
 
 	rctx->mode = SS_OP_AES | SS_CBC | SS_ENABLED | SS_DECRYPTION |
 		op->keymode;
@@ -340,22 +340,22 @@ int sun4i_ss_cbc_aes_decrypt(struct ablkcipher_request *areq)
 }
 
 /* ECB AES */
-int sun4i_ss_ecb_aes_encrypt(struct ablkcipher_request *areq)
+int sun4i_ss_ecb_aes_encrypt(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
-	struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
-	struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun4i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
 
 	rctx->mode = SS_OP_AES | SS_ECB | SS_ENABLED | SS_ENCRYPTION |
 		op->keymode;
 	return sun4i_ss_cipher_poll(areq);
 }
 
-int sun4i_ss_ecb_aes_decrypt(struct ablkcipher_request *areq)
+int sun4i_ss_ecb_aes_decrypt(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
-	struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
-	struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun4i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
 
 	rctx->mode = SS_OP_AES | SS_ECB | SS_ENABLED | SS_DECRYPTION |
 		op->keymode;
@@ -363,22 +363,22 @@ int sun4i_ss_ecb_aes_decrypt(struct ablkcipher_request *areq)
 }
 
 /* CBC DES */
-int sun4i_ss_cbc_des_encrypt(struct ablkcipher_request *areq)
+int sun4i_ss_cbc_des_encrypt(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
-	struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
-	struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun4i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
 
 	rctx->mode = SS_OP_DES | SS_CBC | SS_ENABLED | SS_ENCRYPTION |
 		op->keymode;
 	return sun4i_ss_cipher_poll(areq);
 }
 
-int sun4i_ss_cbc_des_decrypt(struct ablkcipher_request *areq)
+int sun4i_ss_cbc_des_decrypt(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
-	struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
-	struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun4i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
 
 	rctx->mode = SS_OP_DES | SS_CBC | SS_ENABLED | SS_DECRYPTION |
 		op->keymode;
@@ -386,22 +386,22 @@ int sun4i_ss_cbc_des_decrypt(struct ablkcipher_request *areq)
 }
 
 /* ECB DES */
-int sun4i_ss_ecb_des_encrypt(struct ablkcipher_request *areq)
+int sun4i_ss_ecb_des_encrypt(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
-	struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
-	struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun4i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
 
 	rctx->mode = SS_OP_DES | SS_ECB | SS_ENABLED | SS_ENCRYPTION |
 		op->keymode;
 	return sun4i_ss_cipher_poll(areq);
 }
 
-int sun4i_ss_ecb_des_decrypt(struct ablkcipher_request *areq)
+int sun4i_ss_ecb_des_decrypt(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
-	struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
-	struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun4i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
 
 	rctx->mode = SS_OP_DES | SS_ECB | SS_ENABLED | SS_DECRYPTION |
 		op->keymode;
@@ -409,22 +409,22 @@ int sun4i_ss_ecb_des_decrypt(struct ablkcipher_request *areq)
 }
 
 /* CBC 3DES */
-int sun4i_ss_cbc_des3_encrypt(struct ablkcipher_request *areq)
+int sun4i_ss_cbc_des3_encrypt(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
-	struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
-	struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun4i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
 
 	rctx->mode = SS_OP_3DES | SS_CBC | SS_ENABLED | SS_ENCRYPTION |
 		op->keymode;
 	return sun4i_ss_cipher_poll(areq);
 }
 
-int sun4i_ss_cbc_des3_decrypt(struct ablkcipher_request *areq)
+int sun4i_ss_cbc_des3_decrypt(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
-	struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
-	struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun4i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
 
 	rctx->mode = SS_OP_3DES | SS_CBC | SS_ENABLED | SS_DECRYPTION |
 		op->keymode;
@@ -432,22 +432,22 @@ int sun4i_ss_cbc_des3_decrypt(struct ablkcipher_request *areq)
 }
 
 /* ECB 3DES */
-int sun4i_ss_ecb_des3_encrypt(struct ablkcipher_request *areq)
+int sun4i_ss_ecb_des3_encrypt(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
-	struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
-	struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun4i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
 
 	rctx->mode = SS_OP_3DES | SS_ECB | SS_ENABLED | SS_ENCRYPTION |
 		op->keymode;
 	return sun4i_ss_cipher_poll(areq);
 }
 
-int sun4i_ss_ecb_des3_decrypt(struct ablkcipher_request *areq)
+int sun4i_ss_ecb_des3_decrypt(struct skcipher_request *areq)
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
-	struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
-	struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sun4i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
 
 	rctx->mode = SS_OP_3DES | SS_ECB | SS_ENABLED | SS_DECRYPTION |
 		op->keymode;
@@ -457,24 +457,25 @@ int sun4i_ss_ecb_des3_decrypt(struct ablkcipher_request *areq)
 int sun4i_ss_cipher_init(struct crypto_tfm *tfm)
 {
 	struct sun4i_tfm_ctx *op = crypto_tfm_ctx(tfm);
-	struct crypto_alg *alg = tfm->__crt_alg;
 	struct sun4i_ss_alg_template *algt;
 
 	memset(op, 0, sizeof(struct sun4i_tfm_ctx));
 
-	algt = container_of(alg, struct sun4i_ss_alg_template, alg.crypto);
+	algt = container_of(tfm->__crt_alg, struct sun4i_ss_alg_template,
+			    alg.crypto.base);
 	op->ss = algt->ss;
 
-	tfm->crt_ablkcipher.reqsize = sizeof(struct sun4i_cipher_req_ctx);
+	crypto_skcipher_set_reqsize(__crypto_skcipher_cast(tfm),
+				    sizeof(struct sun4i_cipher_req_ctx));
 
 	return 0;
 }
 
 /* check and set the AES key, prepare the mode to be used */
-int sun4i_ss_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+int sun4i_ss_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			unsigned int keylen)
 {
-	struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
 	struct sun4i_ss_ctx *ss = op->ss;
 
 	switch (keylen) {
@@ -489,7 +490,7 @@ int sun4i_ss_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 		break;
 	default:
 		dev_err(ss->dev, "ERROR: Invalid keylen %u\n", keylen);
-		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 	op->keylen = keylen;
@@ -498,10 +499,10 @@ int sun4i_ss_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 }
 
 /* check and set the DES key, prepare the mode to be used */
-int sun4i_ss_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+int sun4i_ss_des_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			unsigned int keylen)
 {
-	struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
 	struct sun4i_ss_ctx *ss = op->ss;
 	u32 flags;
 	u32 tmp[DES_EXPKEY_WORDS];
@@ -509,15 +510,15 @@ int sun4i_ss_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 
 	if (unlikely(keylen != DES_KEY_SIZE)) {
 		dev_err(ss->dev, "Invalid keylen %u\n", keylen);
-		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 
-	flags = crypto_ablkcipher_get_flags(tfm);
+	flags = crypto_skcipher_get_flags(tfm);
 
 	ret = des_ekey(tmp, key);
-	if (unlikely(ret == 0) && (flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
-		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
+	if (unlikely(!ret) && (flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
 		dev_dbg(ss->dev, "Weak key %u\n", keylen);
 		return -EINVAL;
 	}
@@ -528,15 +529,15 @@ int sun4i_ss_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 }
 
 /* check and set the 3DES key, prepare the mode to be used */
-int sun4i_ss_des3_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+int sun4i_ss_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			 unsigned int keylen)
 {
-	struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
 	struct sun4i_ss_ctx *ss = op->ss;
 
 	if (unlikely(keylen != 3 * DES_KEY_SIZE)) {
 		dev_err(ss->dev, "Invalid keylen %u\n", keylen);
-		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 	op->keylen = keylen;
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-core.c b/drivers/crypto/sunxi-ss/sun4i-ss-core.c
index 3ac6c6c..02ad825 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-core.c
+++ b/drivers/crypto/sunxi-ss/sun4i-ss-core.c
@@ -83,134 +83,133 @@ static struct sun4i_ss_alg_template ss_algs[] = {
 		}
 	}
 },
-{       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+{       .type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.alg.crypto = {
-		.cra_name = "cbc(aes)",
-		.cra_driver_name = "cbc-aes-sun4i-ss",
-		.cra_priority = 300,
-		.cra_blocksize = AES_BLOCK_SIZE,
-		.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
-		.cra_ctxsize = sizeof(struct sun4i_tfm_ctx),
-		.cra_module = THIS_MODULE,
-		.cra_alignmask = 3,
-		.cra_type = &crypto_ablkcipher_type,
-		.cra_init = sun4i_ss_cipher_init,
-		.cra_ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey         = sun4i_ss_aes_setkey,
-			.encrypt        = sun4i_ss_cbc_aes_encrypt,
-			.decrypt        = sun4i_ss_cbc_aes_decrypt,
+		.setkey         = sun4i_ss_aes_setkey,
+		.encrypt        = sun4i_ss_cbc_aes_encrypt,
+		.decrypt        = sun4i_ss_cbc_aes_decrypt,
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.base = {
+			.cra_name = "cbc(aes)",
+			.cra_driver_name = "cbc-aes-sun4i-ss",
+			.cra_priority = 300,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_ctxsize = sizeof(struct sun4i_tfm_ctx),
+			.cra_module = THIS_MODULE,
+			.cra_alignmask = 3,
+			.cra_init = sun4i_ss_cipher_init,
 		}
 	}
 },
-{       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+{       .type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.alg.crypto = {
-		.cra_name = "ecb(aes)",
-		.cra_driver_name = "ecb-aes-sun4i-ss",
-		.cra_priority = 300,
-		.cra_blocksize = AES_BLOCK_SIZE,
-		.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
-		.cra_ctxsize = sizeof(struct sun4i_tfm_ctx),
-		.cra_module = THIS_MODULE,
-		.cra_alignmask = 3,
-		.cra_type = &crypto_ablkcipher_type,
-		.cra_init = sun4i_ss_cipher_init,
-		.cra_ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey         = sun4i_ss_aes_setkey,
-			.encrypt        = sun4i_ss_ecb_aes_encrypt,
-			.decrypt        = sun4i_ss_ecb_aes_decrypt,
+		.setkey         = sun4i_ss_aes_setkey,
+		.encrypt        = sun4i_ss_ecb_aes_encrypt,
+		.decrypt        = sun4i_ss_ecb_aes_decrypt,
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.base = {
+			.cra_name = "ecb(aes)",
+			.cra_driver_name = "ecb-aes-sun4i-ss",
+			.cra_priority = 300,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_ctxsize = sizeof(struct sun4i_tfm_ctx),
+			.cra_module = THIS_MODULE,
+			.cra_alignmask = 3,
+			.cra_init = sun4i_ss_cipher_init,
 		}
 	}
 },
-{       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+{       .type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.alg.crypto = {
-		.cra_name = "cbc(des)",
-		.cra_driver_name = "cbc-des-sun4i-ss",
-		.cra_priority = 300,
-		.cra_blocksize = DES_BLOCK_SIZE,
-		.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
-		.cra_ctxsize = sizeof(struct sun4i_req_ctx),
-		.cra_module = THIS_MODULE,
-		.cra_alignmask = 3,
-		.cra_type = &crypto_ablkcipher_type,
-		.cra_init = sun4i_ss_cipher_init,
-		.cra_u.ablkcipher = {
-			.min_keysize    = DES_KEY_SIZE,
-			.max_keysize    = DES_KEY_SIZE,
-			.ivsize         = DES_BLOCK_SIZE,
-			.setkey         = sun4i_ss_des_setkey,
-			.encrypt        = sun4i_ss_cbc_des_encrypt,
-			.decrypt        = sun4i_ss_cbc_des_decrypt,
+		.setkey         = sun4i_ss_des_setkey,
+		.encrypt        = sun4i_ss_cbc_des_encrypt,
+		.decrypt        = sun4i_ss_cbc_des_decrypt,
+		.min_keysize    = DES_KEY_SIZE,
+		.max_keysize    = DES_KEY_SIZE,
+		.ivsize         = DES_BLOCK_SIZE,
+		.base = {
+			.cra_name = "cbc(des)",
+			.cra_driver_name = "cbc-des-sun4i-ss",
+			.cra_priority = 300,
+			.cra_blocksize = DES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_ctxsize = sizeof(struct sun4i_req_ctx),
+			.cra_module = THIS_MODULE,
+			.cra_alignmask = 3,
+			.cra_init = sun4i_ss_cipher_init,
 		}
 	}
 },
-{       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+{       .type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.alg.crypto = {
-		.cra_name = "ecb(des)",
-		.cra_driver_name = "ecb-des-sun4i-ss",
-		.cra_priority = 300,
-		.cra_blocksize = DES_BLOCK_SIZE,
-		.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
-		.cra_ctxsize = sizeof(struct sun4i_req_ctx),
-		.cra_module = THIS_MODULE,
-		.cra_alignmask = 3,
-		.cra_type = &crypto_ablkcipher_type,
-		.cra_init = sun4i_ss_cipher_init,
-		.cra_u.ablkcipher = {
-			.min_keysize    = DES_KEY_SIZE,
-			.max_keysize    = DES_KEY_SIZE,
-			.setkey         = sun4i_ss_des_setkey,
-			.encrypt        = sun4i_ss_ecb_des_encrypt,
-			.decrypt        = sun4i_ss_ecb_des_decrypt,
+		.setkey         = sun4i_ss_des_setkey,
+		.encrypt        = sun4i_ss_ecb_des_encrypt,
+		.decrypt        = sun4i_ss_ecb_des_decrypt,
+		.min_keysize    = DES_KEY_SIZE,
+		.max_keysize    = DES_KEY_SIZE,
+		.base = {
+			.cra_name = "ecb(des)",
+			.cra_driver_name = "ecb-des-sun4i-ss",
+			.cra_priority = 300,
+			.cra_blocksize = DES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_ctxsize = sizeof(struct sun4i_req_ctx),
+			.cra_module = THIS_MODULE,
+			.cra_alignmask = 3,
+			.cra_init = sun4i_ss_cipher_init,
 		}
 	}
 },
-{       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+{       .type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.alg.crypto = {
-		.cra_name = "cbc(des3_ede)",
-		.cra_driver_name = "cbc-des3-sun4i-ss",
-		.cra_priority = 300,
-		.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-		.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
-		.cra_ctxsize = sizeof(struct sun4i_req_ctx),
-		.cra_module = THIS_MODULE,
-		.cra_alignmask = 3,
-		.cra_type = &crypto_ablkcipher_type,
-		.cra_init = sun4i_ss_cipher_init,
-		.cra_u.ablkcipher = {
-			.min_keysize    = DES3_EDE_KEY_SIZE,
-			.max_keysize    = DES3_EDE_KEY_SIZE,
-			.ivsize         = DES3_EDE_BLOCK_SIZE,
-			.setkey         = sun4i_ss_des3_setkey,
-			.encrypt        = sun4i_ss_cbc_des3_encrypt,
-			.decrypt        = sun4i_ss_cbc_des3_decrypt,
+		.setkey         = sun4i_ss_des3_setkey,
+		.encrypt        = sun4i_ss_cbc_des3_encrypt,
+		.decrypt        = sun4i_ss_cbc_des3_decrypt,
+		.min_keysize    = DES3_EDE_KEY_SIZE,
+		.max_keysize    = DES3_EDE_KEY_SIZE,
+		.ivsize         = DES3_EDE_BLOCK_SIZE,
+		.base = {
+			.cra_name = "cbc(des3_ede)",
+			.cra_driver_name = "cbc-des3-sun4i-ss",
+			.cra_priority = 300,
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_ctxsize = sizeof(struct sun4i_req_ctx),
+			.cra_module = THIS_MODULE,
+			.cra_alignmask = 3,
+			.cra_init = sun4i_ss_cipher_init,
 		}
 	}
 },
-{       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+{       .type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.alg.crypto = {
-		.cra_name = "ecb(des3_ede)",
-		.cra_driver_name = "ecb-des3-sun4i-ss",
-		.cra_priority = 300,
-		.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-		.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
-		.cra_ctxsize = sizeof(struct sun4i_req_ctx),
-		.cra_module = THIS_MODULE,
-		.cra_alignmask = 3,
-		.cra_type = &crypto_ablkcipher_type,
-		.cra_init = sun4i_ss_cipher_init,
-		.cra_u.ablkcipher = {
-			.min_keysize    = DES3_EDE_KEY_SIZE,
-			.max_keysize    = DES3_EDE_KEY_SIZE,
-			.ivsize         = DES3_EDE_BLOCK_SIZE,
-			.setkey         = sun4i_ss_des3_setkey,
-			.encrypt        = sun4i_ss_ecb_des3_encrypt,
-			.decrypt        = sun4i_ss_ecb_des3_decrypt,
+		.setkey         = sun4i_ss_des3_setkey,
+		.encrypt        = sun4i_ss_ecb_des3_encrypt,
+		.decrypt        = sun4i_ss_ecb_des3_decrypt,
+		.min_keysize    = DES3_EDE_KEY_SIZE,
+		.max_keysize    = DES3_EDE_KEY_SIZE,
+		.ivsize         = DES3_EDE_BLOCK_SIZE,
+		.base = {
+			.cra_name = "ecb(des3_ede)",
+			.cra_driver_name = "ecb-des3-sun4i-ss",
+			.cra_priority = 300,
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER,
+			.cra_ctxsize = sizeof(struct sun4i_req_ctx),
+			.cra_module = THIS_MODULE,
+			.cra_alignmask = 3,
+			.cra_init = sun4i_ss_cipher_init,
 		}
 	}
 },
@@ -266,12 +265,12 @@ static int sun4i_ss_probe(struct platform_device *pdev)
 
 	/* Enable both clocks */
 	err = clk_prepare_enable(ss->busclk);
-	if (err != 0) {
+	if (err) {
 		dev_err(&pdev->dev, "Cannot prepare_enable busclk\n");
 		return err;
 	}
 	err = clk_prepare_enable(ss->ssclk);
-	if (err != 0) {
+	if (err) {
 		dev_err(&pdev->dev, "Cannot prepare_enable ssclk\n");
 		goto error_ssclk;
 	}
@@ -281,7 +280,7 @@ static int sun4i_ss_probe(struct platform_device *pdev)
 	 * Try to set the clock to the maximum allowed
 	 */
 	err = clk_set_rate(ss->ssclk, cr_mod);
-	if (err != 0) {
+	if (err) {
 		dev_err(&pdev->dev, "Cannot set clock rate to ssclk\n");
 		goto error_clk;
 	}
@@ -340,17 +339,17 @@ static int sun4i_ss_probe(struct platform_device *pdev)
 	for (i = 0; i < ARRAY_SIZE(ss_algs); i++) {
 		ss_algs[i].ss = ss;
 		switch (ss_algs[i].type) {
-		case CRYPTO_ALG_TYPE_ABLKCIPHER:
-			err = crypto_register_alg(&ss_algs[i].alg.crypto);
-			if (err != 0) {
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			err = crypto_register_skcipher(&ss_algs[i].alg.crypto);
+			if (err) {
 				dev_err(ss->dev, "Fail to register %s\n",
-					ss_algs[i].alg.crypto.cra_name);
+					ss_algs[i].alg.crypto.base.cra_name);
 				goto error_alg;
 			}
 			break;
 		case CRYPTO_ALG_TYPE_AHASH:
 			err = crypto_register_ahash(&ss_algs[i].alg.hash);
-			if (err != 0) {
+			if (err) {
 				dev_err(ss->dev, "Fail to register %s\n",
 					ss_algs[i].alg.hash.halg.base.cra_name);
 				goto error_alg;
@@ -364,8 +363,8 @@ static int sun4i_ss_probe(struct platform_device *pdev)
 	i--;
 	for (; i >= 0; i--) {
 		switch (ss_algs[i].type) {
-		case CRYPTO_ALG_TYPE_ABLKCIPHER:
-			crypto_unregister_alg(&ss_algs[i].alg.crypto);
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			crypto_unregister_skcipher(&ss_algs[i].alg.crypto);
 			break;
 		case CRYPTO_ALG_TYPE_AHASH:
 			crypto_unregister_ahash(&ss_algs[i].alg.hash);
@@ -388,8 +387,8 @@ static int sun4i_ss_remove(struct platform_device *pdev)
 
 	for (i = 0; i < ARRAY_SIZE(ss_algs); i++) {
 		switch (ss_algs[i].type) {
-		case CRYPTO_ALG_TYPE_ABLKCIPHER:
-			crypto_unregister_alg(&ss_algs[i].alg.crypto);
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			crypto_unregister_skcipher(&ss_algs[i].alg.crypto);
 			break;
 		case CRYPTO_ALG_TYPE_AHASH:
 			crypto_unregister_ahash(&ss_algs[i].alg.hash);
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-hash.c b/drivers/crypto/sunxi-ss/sun4i-ss-hash.c
index 0de2f62..a4b5ff2 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-hash.c
+++ b/drivers/crypto/sunxi-ss/sun4i-ss-hash.c
@@ -60,7 +60,7 @@ int sun4i_hash_export_md5(struct ahash_request *areq, void *out)
 
 	memcpy(octx->block, op->buf, op->len);
 
-	if (op->byte_count > 0) {
+	if (op->byte_count) {
 		for (i = 0; i < 4; i++)
 			octx->hash[i] = op->hash[i];
 	} else {
@@ -102,7 +102,7 @@ int sun4i_hash_export_sha1(struct ahash_request *areq, void *out)
 
 	memcpy(octx->buffer, op->buf, op->len);
 
-	if (op->byte_count > 0) {
+	if (op->byte_count) {
 		for (i = 0; i < 5; i++)
 			octx->state[i] = op->hash[i];
 	} else {
@@ -167,44 +167,34 @@ int sun4i_hash_import_sha1(struct ahash_request *areq, const void *in)
  */
 static int sun4i_hash(struct ahash_request *areq)
 {
-	u32 v, ivmode = 0;
-	unsigned int i = 0;
 	/*
 	 * i is the total bytes read from SGs, to be compared to areq->nbytes
 	 * i is important because we cannot rely on SG length since the sum of
 	 * SG->length could be greater than areq->nbytes
+	 *
+	 * end is the position when we need to stop writing to the device,
+	 * to be compared to i
+	 *
+	 * in_i: advancement in the current SG
 	 */
-
+	unsigned int i = 0, end, fill, min_fill, nwait, nbw = 0, j = 0, todo;
+	unsigned int in_i = 0;
+	u32 spaces, rx_cnt = SS_RX_DEFAULT, bf[32] = {0}, wb = 0, v, ivmode = 0;
 	struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct sun4i_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
 	struct sun4i_ss_ctx *ss = tfmctx->ss;
-	unsigned int in_i = 0; /* advancement in the current SG */
-	unsigned int end;
-	/*
-	 * end is the position when we need to stop writing to the device,
-	 * to be compared to i
-	 */
-	int in_r, err = 0;
-	unsigned int todo;
-	u32 spaces, rx_cnt = SS_RX_DEFAULT;
-	size_t copied = 0;
-	struct sg_mapping_iter mi;
-	unsigned int j = 0;
-	int zeros;
-	unsigned int index, padlen;
-	__be64 bits;
-	u32 bf[32];
-	u32 wb = 0;
-	unsigned int nwait, nbw = 0;
 	struct scatterlist *in_sg = areq->src;
+	struct sg_mapping_iter mi;
+	int in_r, err = 0;
+	size_t copied = 0;
 
 	dev_dbg(ss->dev, "%s %s bc=%llu len=%u mode=%x wl=%u h0=%0x",
 		__func__, crypto_tfm_alg_name(areq->base.tfm),
 		op->byte_count, areq->nbytes, op->mode,
 		op->len, op->hash[0]);
 
-	if (unlikely(areq->nbytes == 0) && (op->flags & SS_HASH_FINAL) == 0)
+	if (unlikely(!areq->nbytes) && !(op->flags & SS_HASH_FINAL))
 		return 0;
 
 	/* protect against overflow */
@@ -213,7 +203,7 @@ static int sun4i_hash(struct ahash_request *areq)
 		return -EINVAL;
 	}
 
-	if (op->len + areq->nbytes < 64 && (op->flags & SS_HASH_FINAL) == 0) {
+	if (op->len + areq->nbytes < 64 && !(op->flags & SS_HASH_FINAL)) {
 		/* linearize data to op->buf */
 		copied = sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
 					    op->buf + op->len, areq->nbytes, 0);
@@ -227,7 +217,7 @@ static int sun4i_hash(struct ahash_request *areq)
 	 * if some data have been processed before,
 	 * we need to restore the partial hash state
 	 */
-	if (op->byte_count > 0) {
+	if (op->byte_count) {
 		ivmode = SS_IV_ARBITRARY;
 		for (i = 0; i < 5; i++)
 			writel(op->hash[i], ss->base + SS_IV0 + i * 4);
@@ -235,11 +225,11 @@ static int sun4i_hash(struct ahash_request *areq)
 	/* Enable the device */
 	writel(op->mode | SS_ENABLED | ivmode, ss->base + SS_CTL);
 
-	if ((op->flags & SS_HASH_UPDATE) == 0)
+	if (!(op->flags & SS_HASH_UPDATE))
 		goto hash_final;
 
 	/* start of handling data */
-	if ((op->flags & SS_HASH_FINAL) == 0) {
+	if (!(op->flags & SS_HASH_FINAL)) {
 		end = ((areq->nbytes + op->len) / 64) * 64 - op->len;
 
 		if (end > areq->nbytes || areq->nbytes - end > 63) {
@@ -253,14 +243,14 @@ static int sun4i_hash(struct ahash_request *areq)
 		end = ((areq->nbytes + op->len) / 4) * 4 - op->len;
 	}
 
-	/* TODO if SGlen % 4 and op->len == 0 then DMA */
+	/* TODO if SGlen % 4 and !op->len then DMA */
 	i = 1;
 	while (in_sg && i == 1) {
-		if ((in_sg->length % 4) != 0)
+		if (in_sg->length % 4)
 			i = 0;
 		in_sg = sg_next(in_sg);
 	}
-	if (i == 1 && op->len == 0)
+	if (i == 1 && !op->len && areq->nbytes)
 		dev_dbg(ss->dev, "We can DMA\n");
 
 	i = 0;
@@ -275,7 +265,7 @@ static int sun4i_hash(struct ahash_request *areq)
 		 * - the buffer is already used
 		 * - the SG does not have enough byte remaining ( < 4)
 		 */
-		if (op->len > 0 || (mi.length - in_i) < 4) {
+		if (op->len || (mi.length - in_i) < 4) {
 			/*
 			 * if we have entered here we have two reason to stop
 			 * - the buffer is full
@@ -294,7 +284,7 @@ static int sun4i_hash(struct ahash_request *areq)
 					in_i = 0;
 				}
 			}
-			if (op->len > 3 && (op->len % 4) == 0) {
+			if (op->len > 3 && !(op->len % 4)) {
 				/* write buf to the device */
 				writesl(ss->base + SS_RXFIFO, op->buf,
 					op->len / 4);
@@ -313,7 +303,7 @@ static int sun4i_hash(struct ahash_request *areq)
 			i += todo * 4;
 			in_i += todo * 4;
 			rx_cnt -= todo;
-			if (rx_cnt == 0) {
+			if (!rx_cnt) {
 				spaces = readl(ss->base + SS_FCSR);
 				rx_cnt = SS_RXFIFO_SPACES(spaces);
 			}
@@ -351,7 +341,7 @@ static int sun4i_hash(struct ahash_request *areq)
 	 * Now if we have the flag final go to finalize part
 	 * If not, store the partial hash
 	 */
-	if ((op->flags & SS_HASH_FINAL) > 0)
+	if (op->flags & SS_HASH_FINAL)
 		goto hash_final;
 
 	writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL);
@@ -359,7 +349,7 @@ static int sun4i_hash(struct ahash_request *areq)
 	do {
 		v = readl(ss->base + SS_CTL);
 		i++;
-	} while (i < SS_TIMEOUT && (v & SS_DATA_END) > 0);
+	} while (i < SS_TIMEOUT && (v & SS_DATA_END));
 	if (unlikely(i >= SS_TIMEOUT)) {
 		dev_err_ratelimited(ss->dev,
 				    "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
@@ -368,6 +358,15 @@ static int sun4i_hash(struct ahash_request *areq)
 		goto release_ss;
 	}
 
+	/*
+	 * The datasheet isn't very clear about when to retrieve the digest. The
+	 * bit SS_DATA_END is cleared when the engine has processed the data and
+	 * when the digest is computed *but* it doesn't mean the digest is
+	 * available in the digest registers. Hence the delay to be sure we can
+	 * read it.
+	 */
+	ndelay(1);
+
 	for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++)
 		op->hash[i] = readl(ss->base + SS_MD0 + i * 4);
 
@@ -388,56 +387,50 @@ static int sun4i_hash(struct ahash_request *areq)
 hash_final:
 
 	/* write the remaining words of the wait buffer */
-	if (op->len > 0) {
+	if (op->len) {
 		nwait = op->len / 4;
-		if (nwait > 0) {
+		if (nwait) {
 			writesl(ss->base + SS_RXFIFO, op->buf, nwait);
 			op->byte_count += 4 * nwait;
 		}
+
 		nbw = op->len - 4 * nwait;
-		wb = *(u32 *)(op->buf + nwait * 4);
-		wb &= (0xFFFFFFFF >> (4 - nbw) * 8);
+		if (nbw) {
+			wb = *(u32 *)(op->buf + nwait * 4);
+			wb &= GENMASK((nbw * 8) - 1, 0);
+
+			op->byte_count += nbw;
+		}
 	}
 
 	/* write the remaining bytes of the nbw buffer */
-	if (nbw > 0) {
-		wb |= ((1 << 7) << (nbw * 8));
-		bf[j++] = wb;
-	} else {
-		bf[j++] = 1 << 7;
-	}
+	wb |= ((1 << 7) << (nbw * 8));
+	bf[j++] = wb;
 
 	/*
 	 * number of space to pad to obtain 64o minus 8(size) minus 4 (final 1)
 	 * I take the operations from other MD5/SHA1 implementations
 	 */
 
-	/* we have already send 4 more byte of which nbw data */
-	if (op->mode == SS_OP_MD5) {
-		index = (op->byte_count + 4) & 0x3f;
-		op->byte_count += nbw;
-		if (index > 56)
-			zeros = (120 - index) / 4;
-		else
-			zeros = (56 - index) / 4;
-	} else {
-		op->byte_count += nbw;
-		index = op->byte_count & 0x3f;
-		padlen = (index < 56) ? (56 - index) : ((64 + 56) - index);
-		zeros = (padlen - 1) / 4;
-	}
+	/* last block size */
+	fill = 64 - (op->byte_count % 64);
+	min_fill = 2 * sizeof(u32) + (nbw ? 0 : sizeof(u32));
 
-	memset(bf + j, 0, 4 * zeros);
-	j += zeros;
+	/* if we can't fill all data, jump to the next 64 block */
+	if (fill < min_fill)
+		fill += 64;
+
+	j += (fill - min_fill) / sizeof(u32);
 
 	/* write the length of data */
 	if (op->mode == SS_OP_SHA1) {
-		bits = cpu_to_be64(op->byte_count << 3);
-		bf[j++] = bits & 0xffffffff;
-		bf[j++] = (bits >> 32) & 0xffffffff;
+		__be64 bits = cpu_to_be64(op->byte_count << 3);
+		bf[j++] = lower_32_bits(bits);
+		bf[j++] = upper_32_bits(bits);
 	} else {
-		bf[j++] = (op->byte_count << 3) & 0xffffffff;
-		bf[j++] = (op->byte_count >> 29) & 0xffffffff;
+		__le64 bits = op->byte_count << 3;
+		bf[j++] = lower_32_bits(bits);
+		bf[j++] = upper_32_bits(bits);
 	}
 	writesl(ss->base + SS_RXFIFO, bf, j);
 
@@ -453,7 +446,7 @@ static int sun4i_hash(struct ahash_request *areq)
 	do {
 		v = readl(ss->base + SS_CTL);
 		i++;
-	} while (i < SS_TIMEOUT && (v & SS_DATA_END) > 0);
+	} while (i < SS_TIMEOUT && (v & SS_DATA_END));
 	if (unlikely(i >= SS_TIMEOUT)) {
 		dev_err_ratelimited(ss->dev,
 				    "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
@@ -462,6 +455,15 @@ static int sun4i_hash(struct ahash_request *areq)
 		goto release_ss;
 	}
 
+	/*
+	 * The datasheet isn't very clear about when to retrieve the digest. The
+	 * bit SS_DATA_END is cleared when the engine has processed the data and
+	 * when the digest is computed *but* it doesn't mean the digest is
+	 * available in the digest registers. Hence the delay to be sure we can
+	 * read it.
+	 */
+	ndelay(1);
+
 	/* Get the hash from the device */
 	if (op->mode == SS_OP_SHA1) {
 		for (i = 0; i < 5; i++) {
@@ -513,7 +515,7 @@ int sun4i_hash_digest(struct ahash_request *areq)
 	struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 
 	err = sun4i_hash_init(areq);
-	if (err != 0)
+	if (err)
 		return err;
 
 	op->flags = SS_HASH_UPDATE | SS_HASH_FINAL;
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss.h b/drivers/crypto/sunxi-ss/sun4i-ss.h
index f04c0f8..a0e1efc 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss.h
+++ b/drivers/crypto/sunxi-ss/sun4i-ss.h
@@ -24,9 +24,11 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <crypto/md5.h>
+#include <crypto/skcipher.h>
 #include <crypto/sha.h>
 #include <crypto/hash.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/aes.h>
 #include <crypto/des.h>
 #include <crypto/internal/rng.h>
@@ -140,7 +142,7 @@ struct sun4i_ss_alg_template {
 	u32 type;
 	u32 mode;
 	union {
-		struct crypto_alg crypto;
+		struct skcipher_alg crypto;
 		struct ahash_alg hash;
 	} alg;
 	struct sun4i_ss_ctx *ss;
@@ -177,25 +179,25 @@ int sun4i_hash_import_md5(struct ahash_request *areq, const void *in);
 int sun4i_hash_export_sha1(struct ahash_request *areq, void *out);
 int sun4i_hash_import_sha1(struct ahash_request *areq, const void *in);
 
-int sun4i_ss_cbc_aes_encrypt(struct ablkcipher_request *areq);
-int sun4i_ss_cbc_aes_decrypt(struct ablkcipher_request *areq);
-int sun4i_ss_ecb_aes_encrypt(struct ablkcipher_request *areq);
-int sun4i_ss_ecb_aes_decrypt(struct ablkcipher_request *areq);
+int sun4i_ss_cbc_aes_encrypt(struct skcipher_request *areq);
+int sun4i_ss_cbc_aes_decrypt(struct skcipher_request *areq);
+int sun4i_ss_ecb_aes_encrypt(struct skcipher_request *areq);
+int sun4i_ss_ecb_aes_decrypt(struct skcipher_request *areq);
 
-int sun4i_ss_cbc_des_encrypt(struct ablkcipher_request *areq);
-int sun4i_ss_cbc_des_decrypt(struct ablkcipher_request *areq);
-int sun4i_ss_ecb_des_encrypt(struct ablkcipher_request *areq);
-int sun4i_ss_ecb_des_decrypt(struct ablkcipher_request *areq);
+int sun4i_ss_cbc_des_encrypt(struct skcipher_request *areq);
+int sun4i_ss_cbc_des_decrypt(struct skcipher_request *areq);
+int sun4i_ss_ecb_des_encrypt(struct skcipher_request *areq);
+int sun4i_ss_ecb_des_decrypt(struct skcipher_request *areq);
 
-int sun4i_ss_cbc_des3_encrypt(struct ablkcipher_request *areq);
-int sun4i_ss_cbc_des3_decrypt(struct ablkcipher_request *areq);
-int sun4i_ss_ecb_des3_encrypt(struct ablkcipher_request *areq);
-int sun4i_ss_ecb_des3_decrypt(struct ablkcipher_request *areq);
+int sun4i_ss_cbc_des3_encrypt(struct skcipher_request *areq);
+int sun4i_ss_cbc_des3_decrypt(struct skcipher_request *areq);
+int sun4i_ss_ecb_des3_encrypt(struct skcipher_request *areq);
+int sun4i_ss_ecb_des3_decrypt(struct skcipher_request *areq);
 
 int sun4i_ss_cipher_init(struct crypto_tfm *tfm);
-int sun4i_ss_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+int sun4i_ss_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			unsigned int keylen);
-int sun4i_ss_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+int sun4i_ss_des_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			unsigned int keylen);
-int sun4i_ss_des3_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+int sun4i_ss_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			 unsigned int keylen);
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 0bba6a1..79791c6 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -816,7 +816,7 @@ static void talitos_unregister_rng(struct device *dev)
  * HMAC_SNOOP_NO_AFEA (HSNA) instead of type IPSEC_ESP
  */
 #define TALITOS_CRA_PRIORITY_AEAD_HSNA	(TALITOS_CRA_PRIORITY - 1)
-#define TALITOS_MAX_KEY_SIZE		96
+#define TALITOS_MAX_KEY_SIZE		(AES_MAX_KEY_SIZE + SHA512_BLOCK_SIZE)
 #define TALITOS_MAX_IV_LENGTH		16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */
 
 struct talitos_ctx {
@@ -1495,6 +1495,11 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher,
 {
 	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
 
+	if (keylen > TALITOS_MAX_KEY_SIZE) {
+		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
 	memcpy(&ctx->key, key, keylen);
 	ctx->keylen = keylen;
 
diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c
index 022c7ab..96072b9 100644
--- a/drivers/crypto/vmx/aes.c
+++ b/drivers/crypto/vmx/aes.c
@@ -37,15 +37,10 @@ struct p8_aes_ctx {
 
 static int p8_aes_init(struct crypto_tfm *tfm)
 {
-	const char *alg;
+	const char *alg = crypto_tfm_alg_name(tfm);
 	struct crypto_cipher *fallback;
 	struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	if (!(alg = crypto_tfm_alg_name(tfm))) {
-		printk(KERN_ERR "Failed to get algorithm name.\n");
-		return -ENOENT;
-	}
-
 	fallback = crypto_alloc_cipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(fallback)) {
 		printk(KERN_ERR
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index 72a26eb..7394d35 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -39,15 +39,10 @@ struct p8_aes_cbc_ctx {
 
 static int p8_aes_cbc_init(struct crypto_tfm *tfm)
 {
-	const char *alg;
+	const char *alg = crypto_tfm_alg_name(tfm);
 	struct crypto_skcipher *fallback;
 	struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	if (!(alg = crypto_tfm_alg_name(tfm))) {
-		printk(KERN_ERR "Failed to get algorithm name.\n");
-		return -ENOENT;
-	}
-
 	fallback = crypto_alloc_skcipher(alg, 0,
 			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
 
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
index 7cf6d31..9c26d9e 100644
--- a/drivers/crypto/vmx/aes_ctr.c
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -36,15 +36,10 @@ struct p8_aes_ctr_ctx {
 
 static int p8_aes_ctr_init(struct crypto_tfm *tfm)
 {
-	const char *alg;
+	const char *alg = crypto_tfm_alg_name(tfm);
 	struct crypto_blkcipher *fallback;
 	struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	if (!(alg = crypto_tfm_alg_name(tfm))) {
-		printk(KERN_ERR "Failed to get algorithm name.\n");
-		return -ENOENT;
-	}
-
 	fallback =
 	    crypto_alloc_blkcipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(fallback)) {
diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c
index 6adc929..8cd6e62 100644
--- a/drivers/crypto/vmx/aes_xts.c
+++ b/drivers/crypto/vmx/aes_xts.c
@@ -41,15 +41,10 @@ struct p8_aes_xts_ctx {
 
 static int p8_aes_xts_init(struct crypto_tfm *tfm)
 {
-	const char *alg;
+	const char *alg = crypto_tfm_alg_name(tfm);
 	struct crypto_skcipher *fallback;
 	struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	if (!(alg = crypto_tfm_alg_name(tfm))) {
-		printk(KERN_ERR "Failed to get algorithm name.\n");
-		return -ENOENT;
-	}
-
 	fallback = crypto_alloc_skcipher(alg, 0,
 			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(fallback)) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index e88c180..b32eb8c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -868,6 +868,7 @@ struct adapter {
 
 	/* TC u32 offload */
 	struct cxgb4_tc_u32_table *tc_u32;
+	struct chcr_stats_debug chcr_stats;
 };
 
 /* Support for "sched-class" command to allow a TX Scheduling Class to be
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 1fa34b0..77a59d7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -3069,6 +3069,40 @@ static const struct file_operations meminfo_fops = {
 	.llseek  = seq_lseek,
 	.release = single_release,
 };
+
+static int chcr_show(struct seq_file *seq, void *v)
+{
+	struct adapter *adap = seq->private;
+
+	seq_puts(seq, "Chelsio Crypto Accelerator Stats \n");
+	seq_printf(seq, "Cipher Ops: %10u \n",
+		   atomic_read(&adap->chcr_stats.cipher_rqst));
+	seq_printf(seq, "Digest Ops: %10u \n",
+		   atomic_read(&adap->chcr_stats.digest_rqst));
+	seq_printf(seq, "Aead Ops: %10u \n",
+		   atomic_read(&adap->chcr_stats.aead_rqst));
+	seq_printf(seq, "Completion: %10u \n",
+		   atomic_read(&adap->chcr_stats.complete));
+	seq_printf(seq, "Error: %10u \n",
+		   atomic_read(&adap->chcr_stats.error));
+	seq_printf(seq, "Fallback: %10u \n",
+		   atomic_read(&adap->chcr_stats.fallback));
+	return 0;
+}
+
+
+static int chcr_stats_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, chcr_show, inode->i_private);
+}
+
+static const struct file_operations chcr_stats_debugfs_fops = {
+        .owner   = THIS_MODULE,
+        .open    = chcr_stats_open,
+        .read    = seq_read,
+        .llseek  = seq_lseek,
+        .release = single_release,
+};
 /* Add an array of Debug FS files.
  */
 void add_debugfs_files(struct adapter *adap,
@@ -3143,6 +3177,7 @@ int t4_setup_debugfs(struct adapter *adap)
 		{ "tids", &tid_info_debugfs_fops, S_IRUSR, 0},
 		{ "blocked_fl", &blocked_fl_fops, S_IRUSR | S_IWUSR, 0 },
 		{ "meminfo", &meminfo_fops, S_IRUSR, 0 },
+		{ "crypto", &chcr_stats_debugfs_fops, S_IRUSR, 0 },
 	};
 
 	/* Debug FS nodes common to all T5 and later adapters.
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index d0868c2..ec53fe9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -642,6 +642,7 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
 	lld->sge_ingpadboundary = adap->sge.fl_align;
 	lld->sge_egrstatuspagesize = adap->sge.stat_len;
 	lld->sge_pktshift = adap->sge.pktshift;
+	lld->ulp_crypto = adap->params.crypto;
 	lld->enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN;
 	lld->max_ordird_qp = adap->params.max_ordird_qp;
 	lld->max_ird_adapter = adap->params.max_ird_adapter;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 6e74040..8f1c874 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -275,6 +275,15 @@ struct cxgb4_virt_res {                      /* virtualized HW resources */
 	unsigned int ncrypto_fc;
 };
 
+struct chcr_stats_debug {
+	atomic_t cipher_rqst;
+	atomic_t digest_rqst;
+	atomic_t aead_rqst;
+	atomic_t complete;
+	atomic_t error;
+	atomic_t fallback;
+};
+
 #define OCQ_WIN_OFFSET(pdev, vres) \
 	(pci_resource_len((pdev), 2) - roundup_pow_of_two((vres)->ocq.size))
 
@@ -322,6 +331,7 @@ struct cxgb4_lld_info {
 	unsigned int iscsi_tagmask;	     /* iscsi ddp tag mask */
 	unsigned int iscsi_pgsz_order;	     /* iscsi ddp page size orders */
 	unsigned int iscsi_llimit;	     /* chip's iscsi region llimit */
+	unsigned int ulp_crypto;             /* crypto lookaside support */
 	void **iscsi_ppm;		     /* iscsi page pod manager */
 	int nodeid;			     /* device numa node id */
 	bool fr_nsmr_tpte_wr_support;	     /* FW supports FR_NSMR_TPTE_WR */
diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h
index c37cc59..b5e11de 100644
--- a/include/crypto/akcipher.h
+++ b/include/crypto/akcipher.h
@@ -98,7 +98,7 @@ struct akcipher_alg {
 			   unsigned int keylen);
 	int (*set_priv_key)(struct crypto_akcipher *tfm, const void *key,
 			    unsigned int keylen);
-	int (*max_size)(struct crypto_akcipher *tfm);
+	unsigned int (*max_size)(struct crypto_akcipher *tfm);
 	int (*init)(struct crypto_akcipher *tfm);
 	void (*exit)(struct crypto_akcipher *tfm);
 
@@ -257,13 +257,14 @@ static inline void akcipher_request_set_crypt(struct akcipher_request *req,
 /**
  * crypto_akcipher_maxsize() - Get len for output buffer
  *
- * Function returns the dest buffer size required for a given key
+ * Function returns the dest buffer size required for a given key.
+ * Function assumes that the key is already set in the transformation. If this
+ * function is called without a setkey or with a failed setkey, you will end up
+ * in a NULL dereference.
  *
  * @tfm:	AKCIPHER tfm handle allocated with crypto_alloc_akcipher()
- *
- * Return: minimum len for output buffer or error code in key hasn't been set
  */
-static inline int crypto_akcipher_maxsize(struct crypto_akcipher *tfm)
+static inline unsigned int crypto_akcipher_maxsize(struct crypto_akcipher *tfm)
 {
 	struct akcipher_alg *alg = crypto_akcipher_alg(tfm);
 
diff --git a/include/crypto/dh.h b/include/crypto/dh.h
index 6b424ad..f638998 100644
--- a/include/crypto/dh.h
+++ b/include/crypto/dh.h
@@ -73,9 +73,9 @@ int crypto_dh_encode_key(char *buf, unsigned int len, const struct dh *params);
 /**
  * crypto_dh_decode_key() - decode a private key
  * @buf:	Buffer holding a packet key that should be decoded
- * @len:	Lenth of the packet private key buffer
+ * @len:	Length of the packet private key buffer
  * @params:	Buffer allocated by the caller that is filled with the
- *		unpacket DH private key.
+ *		unpacked DH private key.
  *
  * The unpacking obtains the private key by pointing @p to the correct location
  * in @buf. Thus, both pointers refer to the same memory.
diff --git a/include/crypto/ecdh.h b/include/crypto/ecdh.h
index 03a64f6..1aff2a8 100644
--- a/include/crypto/ecdh.h
+++ b/include/crypto/ecdh.h
@@ -74,9 +74,9 @@ int crypto_ecdh_encode_key(char *buf, unsigned int len, const struct ecdh *p);
 /**
  * crypto_ecdh_decode_key() - decode a private key
  * @buf:	Buffer holding a packet key that should be decoded
- * @len:	Lenth of the packet private key buffer
+ * @len:	Length of the packet private key buffer
  * @p:		Buffer allocated by the caller that is filled with the
- *		unpacket ECDH private key.
+ *		unpacked ECDH private key.
  *
  * The unpacking obtains the private key by pointing @p to the correct location
  * in @buf. Thus, both pointers refer to the same memory.
diff --git a/include/crypto/engine.h b/include/crypto/engine.h
index 1bf600f..dd04c16 100644
--- a/include/crypto/engine.h
+++ b/include/crypto/engine.h
@@ -58,6 +58,7 @@ struct crypto_engine {
 	struct list_head	list;
 	spinlock_t		queue_lock;
 	struct crypto_queue	queue;
+	struct device		*dev;
 
 	bool			rt;
 
diff --git a/include/crypto/hmac.h b/include/crypto/hmac.h
new file mode 100644
index 0000000..ef09f79
--- /dev/null
+++ b/include/crypto/hmac.h
@@ -0,0 +1,7 @@
+#ifndef _CRYPTO_HMAC_H
+#define _CRYPTO_HMAC_H
+
+#define HMAC_IPAD_VALUE 0x36
+#define HMAC_OPAD_VALUE 0x5c
+
+#endif /* _CRYPTO_HMAC_H */
diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h
index ce8e1f7..2133d17 100644
--- a/include/crypto/kpp.h
+++ b/include/crypto/kpp.h
@@ -53,7 +53,7 @@ struct crypto_kpp {
  *
  * @set_secret:		Function invokes the protocol specific function to
  *			store the secret private key along with parameters.
- *			The implementation knows how to decode thie buffer
+ *			The implementation knows how to decode the buffer
  * @generate_public_key: Function generate the public key to be sent to the
  *			counterpart. In case of error, where output is not big
  *			enough req->dst_len will be updated to the size
@@ -79,7 +79,7 @@ struct kpp_alg {
 	int (*generate_public_key)(struct kpp_request *req);
 	int (*compute_shared_secret)(struct kpp_request *req);
 
-	int (*max_size)(struct crypto_kpp *tfm);
+	unsigned int (*max_size)(struct crypto_kpp *tfm);
 
 	int (*init)(struct crypto_kpp *tfm);
 	void (*exit)(struct crypto_kpp *tfm);
@@ -102,7 +102,7 @@ struct kpp_alg {
  * @mask: specifies the mask for the algorithm
  *
  * Allocate a handle for kpp algorithm. The returned struct crypto_kpp
- * is requeried for any following API invocation
+ * is required for any following API invocation
  *
  * Return: allocated handle in case of success; IS_ERR() is true in case of
  *	   an error, PTR_ERR() returns the error code.
@@ -323,13 +323,14 @@ static inline int crypto_kpp_compute_shared_secret(struct kpp_request *req)
 /**
  * crypto_kpp_maxsize() - Get len for output buffer
  *
- * Function returns the output buffer size required
+ * Function returns the output buffer size required for a given key.
+ * Function assumes that the key is already set in the transformation. If this
+ * function is called without a setkey or with a failed setkey, you will end up
+ * in a NULL dereference.
  *
  * @tfm:	KPP tfm handle allocated with crypto_alloc_kpp()
- *
- * Return: minimum len for output buffer or error code if key hasn't been set
  */
-static inline int crypto_kpp_maxsize(struct crypto_kpp *tfm)
+static inline unsigned int crypto_kpp_maxsize(struct crypto_kpp *tfm)
 {
 	struct kpp_alg *alg = crypto_kpp_alg(tfm);
 
diff --git a/include/linux/timeriomem-rng.h b/include/linux/timeriomem-rng.h
index 46eb27d..3e00122 100644
--- a/include/linux/timeriomem-rng.h
+++ b/include/linux/timeriomem-rng.h
@@ -13,4 +13,7 @@ struct timeriomem_rng_data {
 
 	/* measures in usecs */
 	unsigned int		period;
+
+	/* bits of entropy per 1024 bits read */
+	unsigned int		quality;
 };