From: Eneas U de Queiroz Date: Fri, 20 Dec 2019 13:52:17 +0000 (-0300) Subject: ipq40xx: qce - add fixes for AES ciphers X-Git-Url: http://git.cdn.openwrt.org/?a=commitdiff_plain;h=13b8404b1eaaa51027cae42c8152b9123d92a425;p=openwrt%2Fstaging%2Fzorun.git ipq40xx: qce - add fixes for AES ciphers This backports commits from master that fix AES ciphers when using the qce driver: - A couple of simple fixes for CTR and XTS modes used with AES: * 041-crypto-qce-fix-ctr-aes-qce-block-chunk-sizes.patch * 042-crypto-qce-fix-xts-aes-qce-key-sizes.patch - A fix for a bug that affected cases when there were more entries in the input sg list than necessary to actually encrypt, resulting in failure in gcm, where the authentication tag is present after the encryption data: * 043-crypto-qce-save-a-sg-table-slot-for-result-buf.patch - A fix to update the IV buffer passed to the driver from the kernel: * 044-crypto-qce-update-the-skcipher-IV.patch - A patch that reduces memory footprint and driver initialization by only initializing the fallback mechanism where it is actually used: * 046-crypto-qce-initialize-fallback-only-for-AES.patch - Three patches that make gcm and xts modes work with the qce driver, and improve performance with small blocks: * 047-crypto-qce-use-cryptlen-when-adding-extra-sgl.patch * 048-crypto-qce-use-AES-fallback-for-small-requests.patch * 049-crypto-qce-handle-AES-XTS-cases-that-qce-fails.patch - A patch that allows the hashes/ciphers to be built individually. * 051-crypto-qce-allow-building-only-hashes-ciphers.patch Signed-off-by: Eneas U de Queiroz [renumbered patches, added patches from dropped commit, refreshed, 5.4] Signed-off-by: Christian Lamparter --- diff --git a/target/linux/ipq40xx/patches-4.19/041-crypto-qce-fix-ctr-aes-qce-block-chunk-sizes.patch b/target/linux/ipq40xx/patches-4.19/041-crypto-qce-fix-ctr-aes-qce-block-chunk-sizes.patch new file mode 100644 index 0000000000..59f7445a15 --- /dev/null +++ b/target/linux/ipq40xx/patches-4.19/041-crypto-qce-fix-ctr-aes-qce-block-chunk-sizes.patch @@ -0,0 +1,39 @@ +From 3f5598286445f695bb63a22239dd3603c69a6eaf Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Mon, 28 Oct 2019 09:03:07 -0300 +Subject: [PATCH] crypto: qce - fix ctr-aes-qce block, chunk sizes + +Set blocksize of ctr-aes-qce to 1, so it can operate as a stream cipher, +adding the definition for chucksize instead, where the underlying block +size belongs. + +Signed-off-by: Eneas U de Queiroz + +--- a/drivers/crypto/qce/skcipher.c ++++ b/drivers/crypto/qce/skcipher.c +@@ -261,6 +261,7 @@ struct qce_skcipher_def { + const char *name; + const char *drv_name; + unsigned int blocksize; ++ unsigned int chunksize; + unsigned int ivsize; + unsigned int min_keysize; + unsigned int max_keysize; +@@ -289,7 +290,8 @@ static const struct qce_skcipher_def skc + .flags = QCE_ALG_AES | QCE_MODE_CTR, + .name = "ctr(aes)", + .drv_name = "ctr-aes-qce", +- .blocksize = AES_BLOCK_SIZE, ++ .blocksize = 1, ++ .chunksize = AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, +@@ -359,6 +361,7 @@ static int qce_skcipher_register_one(con + def->drv_name); + + alg->base.cra_blocksize = def->blocksize; ++ alg->chunksize = def->chunksize; + alg->ivsize = def->ivsize; + alg->min_keysize = def->min_keysize; + alg->max_keysize = def->max_keysize; diff --git a/target/linux/ipq40xx/patches-4.19/042-crypto-qce-fix-xts-aes-qce-key-sizes.patch b/target/linux/ipq40xx/patches-4.19/042-crypto-qce-fix-xts-aes-qce-key-sizes.patch new file mode 100644 index 0000000000..50b888351c --- /dev/null +++ b/target/linux/ipq40xx/patches-4.19/042-crypto-qce-fix-xts-aes-qce-key-sizes.patch @@ -0,0 +1,50 @@ +From 0138c3c13809250338d7cfba6f4ca3b2da02b2c8 Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Thu, 21 Nov 2019 14:28:23 -0300 +Subject: [PATCH] crypto: qce - fix xts-aes-qce key sizes + +XTS-mode uses two keys, so the keysizes should be doubled in +skcipher_def, and halved when checking if it is AES-128/192/256. + +Signed-off-by: Eneas U de Queiroz + +--- a/drivers/crypto/qce/skcipher.c ++++ b/drivers/crypto/qce/skcipher.c +@@ -168,7 +168,7 @@ static int qce_skcipher_setkey(struct cr + return -EINVAL; + + if (IS_AES(flags)) { +- switch (keylen) { ++ switch (IS_XTS(flags) ? keylen >> 1 : keylen) { + case AES_KEYSIZE_128: + case AES_KEYSIZE_256: + break; +@@ -203,13 +203,15 @@ static int qce_skcipher_crypt(struct skc + struct qce_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct qce_cipher_reqctx *rctx = skcipher_request_ctx(req); + struct qce_alg_template *tmpl = to_cipher_tmpl(tfm); ++ int keylen; + int ret; + + rctx->flags = tmpl->alg_flags; + rctx->flags |= encrypt ? QCE_ENCRYPT : QCE_DECRYPT; ++ keylen = IS_XTS(rctx->flags) ? ctx->enc_keylen >> 1 : ctx->enc_keylen; + +- if (IS_AES(rctx->flags) && ctx->enc_keylen != AES_KEYSIZE_128 && +- ctx->enc_keylen != AES_KEYSIZE_256) { ++ if (IS_AES(rctx->flags) && keylen != AES_KEYSIZE_128 && ++ keylen != AES_KEYSIZE_256) { + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); +@@ -302,8 +304,8 @@ static const struct qce_skcipher_def skc + .drv_name = "xts-aes-qce", + .blocksize = AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, +- .min_keysize = AES_MIN_KEY_SIZE, +- .max_keysize = AES_MAX_KEY_SIZE, ++ .min_keysize = AES_MIN_KEY_SIZE * 2, ++ .max_keysize = AES_MAX_KEY_SIZE * 2, + }, + { + .flags = QCE_ALG_DES | QCE_MODE_ECB, diff --git a/target/linux/ipq40xx/patches-4.19/043-crypto-qce-save-a-sg-table-slot-for-result-buf.patch b/target/linux/ipq40xx/patches-4.19/043-crypto-qce-save-a-sg-table-slot-for-result-buf.patch new file mode 100644 index 0000000000..95650690fb --- /dev/null +++ b/target/linux/ipq40xx/patches-4.19/043-crypto-qce-save-a-sg-table-slot-for-result-buf.patch @@ -0,0 +1,79 @@ +From 31f796293b6c38126a466414c565827b9cfdbe39 Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Wed, 20 Nov 2019 21:39:11 -0300 +Subject: [PATCH] crypto: qce - save a sg table slot for result buf + +When ctr-aes-qce is used for gcm-mode, an extra sg entry for the +authentication tag is present, causing trouble when the qce driver +prepares the dst-results eg table for dma. + +It computes the number of entries needed with sg_nents_for_len, leaving +out the tag entry. Then it creates a sg table with that number plus +one, used to store a "result" sg. + +When copying the sg table, it does not limit the number of entries +copied, so tha extra slot is filled with the authentication tag sg. +When the driver tries to add the result sg, the list is full, and it +returns EINVAL. + +By limiting the number of sg entries copied to the dest table, the slot +for the result buffer is guaranteed to be unused. + +Signed-off-by: Eneas U de Queiroz + +--- a/drivers/crypto/qce/dma.c ++++ b/drivers/crypto/qce/dma.c +@@ -55,7 +55,8 @@ void qce_dma_release(struct qce_dma_data + } + + struct scatterlist * +-qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl) ++qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl, ++ int max_ents) + { + struct scatterlist *sg = sgt->sgl, *sg_last = NULL; + +@@ -68,12 +69,13 @@ qce_sgtable_add(struct sg_table *sgt, st + if (!sg) + return ERR_PTR(-EINVAL); + +- while (new_sgl && sg) { ++ while (new_sgl && sg && max_ents) { + sg_set_page(sg, sg_page(new_sgl), new_sgl->length, + new_sgl->offset); + sg_last = sg; + sg = sg_next(sg); + new_sgl = sg_next(new_sgl); ++ max_ents--; + } + + return sg_last; +--- a/drivers/crypto/qce/dma.h ++++ b/drivers/crypto/qce/dma.h +@@ -50,6 +50,7 @@ int qce_dma_prep_sgs(struct qce_dma_data + void qce_dma_issue_pending(struct qce_dma_data *dma); + int qce_dma_terminate_all(struct qce_dma_data *dma); + struct scatterlist * +-qce_sgtable_add(struct sg_table *sgt, struct scatterlist *sg_add); ++qce_sgtable_add(struct sg_table *sgt, struct scatterlist *sg_add, ++ int max_ents); + + #endif /* _DMA_H_ */ +--- a/drivers/crypto/qce/skcipher.c ++++ b/drivers/crypto/qce/skcipher.c +@@ -103,13 +103,13 @@ qce_skcipher_async_req_handle(struct cry + + sg_init_one(&rctx->result_sg, qce->dma.result_buf, QCE_RESULT_BUF_SZ); + +- sg = qce_sgtable_add(&rctx->dst_tbl, req->dst); ++ sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, rctx->dst_nents - 1); + if (IS_ERR(sg)) { + ret = PTR_ERR(sg); + goto error_free; + } + +- sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg); ++ sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg, 1); + if (IS_ERR(sg)) { + ret = PTR_ERR(sg); + goto error_free; diff --git a/target/linux/ipq40xx/patches-4.19/044-crypto-qce-update-the-skcipher-IV.patch b/target/linux/ipq40xx/patches-4.19/044-crypto-qce-update-the-skcipher-IV.patch new file mode 100644 index 0000000000..33be510a82 --- /dev/null +++ b/target/linux/ipq40xx/patches-4.19/044-crypto-qce-update-the-skcipher-IV.patch @@ -0,0 +1,27 @@ +From 502ca0b7c1d856a46dbd78e67690c12c47775b97 Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Fri, 22 Nov 2019 09:00:02 -0300 +Subject: [PATCH] crypto: qce - update the skcipher IV + +Update the IV after the completion of each cipher operation. + +Signed-off-by: Eneas U de Queiroz + +--- a/drivers/crypto/qce/skcipher.c ++++ b/drivers/crypto/qce/skcipher.c +@@ -29,6 +29,7 @@ static void qce_skcipher_done(void *data + struct qce_cipher_reqctx *rctx = skcipher_request_ctx(req); + struct qce_alg_template *tmpl = to_cipher_tmpl(crypto_skcipher_reqtfm(req)); + struct qce_device *qce = tmpl->qce; ++ struct qce_result_dump *result_buf = qce->dma.result_buf; + enum dma_data_direction dir_src, dir_dst; + u32 status; + int error; +@@ -53,6 +54,7 @@ static void qce_skcipher_done(void *data + if (error < 0) + dev_dbg(qce->dev, "skcipher operation error (%x)\n", status); + ++ memcpy(rctx->iv, result_buf->encr_cntr_iv, rctx->ivsize); + qce->async_req_done(tmpl->qce, error); + } + diff --git a/target/linux/ipq40xx/patches-4.19/046-crypto-qce-initialize-fallback-only-for-AES.patch b/target/linux/ipq40xx/patches-4.19/046-crypto-qce-initialize-fallback-only-for-AES.patch new file mode 100644 index 0000000000..e3adaf864e --- /dev/null +++ b/target/linux/ipq40xx/patches-4.19/046-crypto-qce-initialize-fallback-only-for-AES.patch @@ -0,0 +1,54 @@ +From f2a33ce18232919d3831d1c61a06b6067209282d Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Fri, 22 Nov 2019 09:34:29 -0300 +Subject: [PATCH] crypto: qce - initialize fallback only for AES + +Adjust cra_flags to add CRYPTO_NEED_FALLBACK only for AES ciphers, where +AES-192 is not handled by the qce hardware, and don't allocate & free +the fallback skcipher for anything other than AES. + +The rest of the code is unchanged, as the use of the fallback is already +restricted to AES. + +Signed-off-by: Eneas U de Queiroz + +--- a/drivers/crypto/qce/skcipher.c ++++ b/drivers/crypto/qce/skcipher.c +@@ -246,7 +246,15 @@ static int qce_skcipher_init(struct cryp + + memset(ctx, 0, sizeof(*ctx)); + crypto_skcipher_set_reqsize(tfm, sizeof(struct qce_cipher_reqctx)); ++ return 0; ++} ++ ++static int qce_skcipher_init_fallback(struct crypto_skcipher *tfm) ++{ ++ struct qce_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ int ret; + ++ qce_skcipher_init(tfm); + ctx->fallback = crypto_alloc_skcipher(crypto_tfm_alg_name(&tfm->base), + 0, CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); +@@ -375,14 +383,18 @@ static int qce_skcipher_register_one(con + + alg->base.cra_priority = 300; + alg->base.cra_flags = CRYPTO_ALG_ASYNC | +- CRYPTO_ALG_NEED_FALLBACK | + CRYPTO_ALG_KERN_DRIVER_ONLY; + alg->base.cra_ctxsize = sizeof(struct qce_cipher_ctx); + alg->base.cra_alignmask = 0; + alg->base.cra_module = THIS_MODULE; + +- alg->init = qce_skcipher_init; +- alg->exit = qce_skcipher_exit; ++ if (IS_AES(def->flags)) { ++ alg->base.cra_flags |= CRYPTO_ALG_NEED_FALLBACK; ++ alg->init = qce_skcipher_init_fallback; ++ alg->exit = qce_skcipher_exit; ++ } else { ++ alg->init = qce_skcipher_init; ++ } + + INIT_LIST_HEAD(&tmpl->entry); + tmpl->crypto_alg_type = CRYPTO_ALG_TYPE_SKCIPHER; diff --git a/target/linux/ipq40xx/patches-4.19/047-crypto-qce-use-cryptlen-when-adding-extra-sgl.patch b/target/linux/ipq40xx/patches-4.19/047-crypto-qce-use-cryptlen-when-adding-extra-sgl.patch new file mode 100644 index 0000000000..aabc8fd622 --- /dev/null +++ b/target/linux/ipq40xx/patches-4.19/047-crypto-qce-use-cryptlen-when-adding-extra-sgl.patch @@ -0,0 +1,83 @@ +From 686aa4db696270dadc5e8b2971769e1676251ff1 Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Fri, 31 Jan 2020 17:43:16 -0300 +Subject: [PATCH] crypto: qce - use cryptlen when adding extra sgl + +The qce crypto driver appends an extra entry to the dst sgl, to maintain +private state information. + +When the gcm driver sends requests to the ctr skcipher, it passes the +authentication tag after the actual crypto payload, but it must not be +touched. + +Commit 1336c2221bee ("crypto: qce - save a sg table slot for result +buf") limited the destination sgl to avoid overwriting the +authentication tag but it assumed the tag would be in a separate sgl +entry. + +This is not always the case, so it is better to limit the length of the +destination buffer to req->cryptlen before appending the result buf. + +Signed-off-by: Eneas U de Queiroz + +--- a/drivers/crypto/qce/dma.c ++++ b/drivers/crypto/qce/dma.c +@@ -56,9 +56,10 @@ void qce_dma_release(struct qce_dma_data + + struct scatterlist * + qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl, +- int max_ents) ++ unsigned int max_len) + { + struct scatterlist *sg = sgt->sgl, *sg_last = NULL; ++ unsigned int new_len; + + while (sg) { + if (!sg_page(sg)) +@@ -69,13 +70,13 @@ qce_sgtable_add(struct sg_table *sgt, st + if (!sg) + return ERR_PTR(-EINVAL); + +- while (new_sgl && sg && max_ents) { +- sg_set_page(sg, sg_page(new_sgl), new_sgl->length, +- new_sgl->offset); ++ while (new_sgl && sg && max_len) { ++ new_len = new_sgl->length > max_len ? max_len : new_sgl->length; ++ sg_set_page(sg, sg_page(new_sgl), new_len, new_sgl->offset); + sg_last = sg; + sg = sg_next(sg); + new_sgl = sg_next(new_sgl); +- max_ents--; ++ max_len -= new_len; + } + + return sg_last; +--- a/drivers/crypto/qce/dma.h ++++ b/drivers/crypto/qce/dma.h +@@ -51,6 +51,6 @@ void qce_dma_issue_pending(struct qce_dm + int qce_dma_terminate_all(struct qce_dma_data *dma); + struct scatterlist * + qce_sgtable_add(struct sg_table *sgt, struct scatterlist *sg_add, +- int max_ents); ++ unsigned int max_len); + + #endif /* _DMA_H_ */ +--- a/drivers/crypto/qce/skcipher.c ++++ b/drivers/crypto/qce/skcipher.c +@@ -105,13 +105,14 @@ qce_skcipher_async_req_handle(struct cry + + sg_init_one(&rctx->result_sg, qce->dma.result_buf, QCE_RESULT_BUF_SZ); + +- sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, rctx->dst_nents - 1); ++ sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, req->cryptlen); + if (IS_ERR(sg)) { + ret = PTR_ERR(sg); + goto error_free; + } + +- sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg, 1); ++ sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg, ++ QCE_RESULT_BUF_SZ); + if (IS_ERR(sg)) { + ret = PTR_ERR(sg); + goto error_free; diff --git a/target/linux/ipq40xx/patches-4.19/048-crypto-qce-use-AES-fallback-for-small-requests.patch b/target/linux/ipq40xx/patches-4.19/048-crypto-qce-use-AES-fallback-for-small-requests.patch new file mode 100644 index 0000000000..514fde7b63 --- /dev/null +++ b/target/linux/ipq40xx/patches-4.19/048-crypto-qce-use-AES-fallback-for-small-requests.patch @@ -0,0 +1,122 @@ +From 2d3b6fae7d1a2ad821769440daa91d7eec5c8250 Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Fri, 20 Dec 2019 09:41:44 -0300 +Subject: [PATCH] crypto: qce - use AES fallback for small requests + +Process small blocks using the fallback cipher, as a workaround for an +observed failure (DMA-related, apparently) when computing the GCM ghash +key. This brings a speed gain as well, since it avoids the latency of +using the hardware engine to process small blocks. + +Using software for all 16-byte requests would be enough to make GCM +work, but to increase performance, a larger threshold would be better. +Measuring the performance of supported ciphers with openssl speed, +software matches hardware at around 768-1024 bytes. + +Considering the 256-bit ciphers, software is 2-3 times faster than qce +at 256-bytes, 30% faster at 512, and about even at 768-bytes. With +128-bit keys, the break-even point would be around 1024-bytes. + +This adds the 'aes_sw_max_len' parameter, to set the largest request +length processed by the software fallback. Its default is being set to +512 bytes, a little lower than the break-even point, to balance the cost +in CPU usage. + +Signed-off-by: Eneas U de Queiroz + +--- a/drivers/crypto/Kconfig ++++ b/drivers/crypto/Kconfig +@@ -585,6 +585,29 @@ config CRYPTO_DEV_QCE + hardware. To compile this driver as a module, choose M here. The + module will be called qcrypto. + ++config CRYPTO_DEV_QCE_SW_MAX_LEN ++ int "Default maximum request size to use software for AES" ++ depends on CRYPTO_DEV_QCE && CRYPTO_DEV_QCE_SKCIPHER ++ default 512 ++ help ++ This sets the default maximum request size to perform AES requests ++ using software instead of the crypto engine. It can be changed by ++ setting the aes_sw_max_len parameter. ++ ++ Small blocks are processed faster in software than hardware. ++ Considering the 256-bit ciphers, software is 2-3 times faster than ++ qce at 256-bytes, 30% faster at 512, and about even at 768-bytes. ++ With 128-bit keys, the break-even point would be around 1024-bytes. ++ ++ The default is set a little lower, to 512 bytes, to balance the ++ cost in CPU usage. The minimum recommended setting is 16-bytes ++ (1 AES block), since AES-GCM will fail if you set it lower. ++ Setting this to zero will send all requests to the hardware. ++ ++ Note that 192-bit keys are not supported by the hardware and are ++ always processed by the software fallback, and all DES requests ++ are done by the hardware. ++ + config CRYPTO_DEV_QCOM_RNG + tristate "Qualcomm Random Number Generator Driver" + depends on ARCH_QCOM || COMPILE_TEST +--- a/drivers/crypto/qce/skcipher.c ++++ b/drivers/crypto/qce/skcipher.c +@@ -13,6 +13,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -20,6 +21,13 @@ + + #include "cipher.h" + ++static unsigned int aes_sw_max_len = CONFIG_CRYPTO_DEV_QCE_SW_MAX_LEN; ++module_param(aes_sw_max_len, uint, 0644); ++MODULE_PARM_DESC(aes_sw_max_len, ++ "Only use hardware for AES requests larger than this " ++ "[0=always use hardware; anything <16 breaks AES-GCM; default=" ++ __stringify(CONFIG_CRYPTO_DEV_QCE_SOFT_THRESHOLD)"]"); ++ + static LIST_HEAD(skcipher_algs); + + static void qce_skcipher_done(void *data) +@@ -170,15 +178,7 @@ static int qce_skcipher_setkey(struct cr + if (!key || !keylen) + return -EINVAL; + +- if (IS_AES(flags)) { +- switch (IS_XTS(flags) ? keylen >> 1 : keylen) { +- case AES_KEYSIZE_128: +- case AES_KEYSIZE_256: +- break; +- default: +- goto fallback; +- } +- } else if (IS_DES(flags)) { ++ if (IS_DES(flags)) { + u32 tmp[DES_EXPKEY_WORDS]; + + ret = des_ekey(tmp, key); +@@ -189,8 +189,8 @@ static int qce_skcipher_setkey(struct cr + + ctx->enc_keylen = keylen; + memcpy(ctx->enc_key, key, keylen); +- return 0; +-fallback: ++ if (!IS_AES(flags)) ++ return 0; + ret = crypto_skcipher_setkey(ctx->fallback, key, keylen); + if (!ret) + ctx->enc_keylen = keylen; +@@ -213,8 +213,9 @@ static int qce_skcipher_crypt(struct skc + rctx->flags |= encrypt ? QCE_ENCRYPT : QCE_DECRYPT; + keylen = IS_XTS(rctx->flags) ? ctx->enc_keylen >> 1 : ctx->enc_keylen; + +- if (IS_AES(rctx->flags) && keylen != AES_KEYSIZE_128 && +- keylen != AES_KEYSIZE_256) { ++ if (IS_AES(rctx->flags) && ++ ((keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256) || ++ req->cryptlen <= aes_sw_max_len)) { + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); diff --git a/target/linux/ipq40xx/patches-4.19/049-crypto-qce-handle-AES-XTS-cases-that-qce-fails.patch b/target/linux/ipq40xx/patches-4.19/049-crypto-qce-handle-AES-XTS-cases-that-qce-fails.patch new file mode 100644 index 0000000000..00dc3791ab --- /dev/null +++ b/target/linux/ipq40xx/patches-4.19/049-crypto-qce-handle-AES-XTS-cases-that-qce-fails.patch @@ -0,0 +1,53 @@ +From bbf2b1cf22dc98f3df33b6666df046dfb9564d91 Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Wed, 5 Feb 2020 13:42:25 -0300 +Subject: [PATCH] crypto: qce - handle AES-XTS cases that qce fails + +QCE hangs when presented with an AES-XTS request whose length is larger +than QCE_SECTOR_SIZE (512-bytes), and is not a multiple of it. Let the +fallback cipher handle them. + +Signed-off-by: Eneas U de Queiroz + +--- a/drivers/crypto/qce/common.c ++++ b/drivers/crypto/qce/common.c +@@ -23,8 +23,6 @@ + #include "regs-v5.h" + #include "sha.h" + +-#define QCE_SECTOR_SIZE 512 +- + static inline u32 qce_read(struct qce_device *qce, u32 offset) + { + return readl(qce->base + offset); +--- a/drivers/crypto/qce/common.h ++++ b/drivers/crypto/qce/common.h +@@ -20,6 +20,9 @@ + #include + #include + ++/* xts du size */ ++#define QCE_SECTOR_SIZE 512 ++ + /* key size in bytes */ + #define QCE_SHA_HMAC_KEY_SIZE 64 + #define QCE_MAX_CIPHER_KEY_SIZE AES_KEYSIZE_256 +--- a/drivers/crypto/qce/skcipher.c ++++ b/drivers/crypto/qce/skcipher.c +@@ -213,9 +213,14 @@ static int qce_skcipher_crypt(struct skc + rctx->flags |= encrypt ? QCE_ENCRYPT : QCE_DECRYPT; + keylen = IS_XTS(rctx->flags) ? ctx->enc_keylen >> 1 : ctx->enc_keylen; + ++ /* qce is hanging when AES-XTS request len > QCE_SECTOR_SIZE and ++ * is not a multiple of it; pass such requests to the fallback ++ */ + if (IS_AES(rctx->flags) && +- ((keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256) || +- req->cryptlen <= aes_sw_max_len)) { ++ (((keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256) || ++ req->cryptlen <= aes_sw_max_len) || ++ (IS_XTS(rctx->flags) && req->cryptlen > QCE_SECTOR_SIZE && ++ req->cryptlen % QCE_SECTOR_SIZE))) { + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); diff --git a/target/linux/ipq40xx/patches-4.19/051-crypto-qce-allow-building-only-hashes-ciphers.patch b/target/linux/ipq40xx/patches-4.19/051-crypto-qce-allow-building-only-hashes-ciphers.patch new file mode 100644 index 0000000000..11f711940d --- /dev/null +++ b/target/linux/ipq40xx/patches-4.19/051-crypto-qce-allow-building-only-hashes-ciphers.patch @@ -0,0 +1,406 @@ +From 62134842498927a0fcc19798a615340a7a6a9e62 Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Mon, 28 Oct 2019 15:17:19 -0300 +Subject: [PATCH] crypto: qce - allow building only hashes/ciphers + +Signed-off-by: Eneas U de Queiroz + +--- a/drivers/crypto/Kconfig ++++ b/drivers/crypto/Kconfig +@@ -573,6 +573,14 @@ config CRYPTO_DEV_QCE + tristate "Qualcomm crypto engine accelerator" + depends on ARCH_QCOM || COMPILE_TEST + depends on HAS_IOMEM ++ help ++ This driver supports Qualcomm crypto engine accelerator ++ hardware. To compile this driver as a module, choose M here. The ++ module will be called qcrypto. ++ ++config CRYPTO_DEV_QCE_SKCIPHER ++ bool ++ depends on CRYPTO_DEV_QCE + select CRYPTO_AES + select CRYPTO_DES + select CRYPTO_ECB +@@ -580,10 +588,57 @@ config CRYPTO_DEV_QCE + select CRYPTO_XTS + select CRYPTO_CTR + select CRYPTO_BLKCIPHER ++ ++config CRYPTO_DEV_QCE_SHA ++ bool ++ depends on CRYPTO_DEV_QCE ++ ++choice ++ prompt "Algorithms enabled for QCE acceleration" ++ default CRYPTO_DEV_QCE_ENABLE_ALL ++ depends on CRYPTO_DEV_QCE + help +- This driver supports Qualcomm crypto engine accelerator +- hardware. To compile this driver as a module, choose M here. The +- module will be called qcrypto. ++ This option allows to choose whether to build support for all algorihtms ++ (default), hashes-only, or skciphers-only. ++ ++ The QCE engine does not appear to scale as well as the CPU to handle ++ multiple crypto requests. While the ipq40xx chips have 4-core CPUs, the ++ QCE handles only 2 requests in parallel. ++ ++ Ipsec throughput seems to improve when disabling either family of ++ algorithms, sharing the load with the CPU. Enabling skciphers-only ++ appears to work best. ++ ++ config CRYPTO_DEV_QCE_ENABLE_ALL ++ bool "All supported algorithms" ++ select CRYPTO_DEV_QCE_SKCIPHER ++ select CRYPTO_DEV_QCE_SHA ++ help ++ Enable all supported algorithms: ++ - AES (CBC, CTR, ECB, XTS) ++ - 3DES (CBC, ECB) ++ - DES (CBC, ECB) ++ - SHA1, HMAC-SHA1 ++ - SHA256, HMAC-SHA256 ++ ++ config CRYPTO_DEV_QCE_ENABLE_SKCIPHER ++ bool "Symmetric-key ciphers only" ++ select CRYPTO_DEV_QCE_SKCIPHER ++ help ++ Enable symmetric-key ciphers only: ++ - AES (CBC, CTR, ECB, XTS) ++ - 3DES (ECB, CBC) ++ - DES (ECB, CBC) ++ ++ config CRYPTO_DEV_QCE_ENABLE_SHA ++ bool "Hash/HMAC only" ++ select CRYPTO_DEV_QCE_SHA ++ help ++ Enable hashes/HMAC algorithms only: ++ - SHA1, HMAC-SHA1 ++ - SHA256, HMAC-SHA256 ++ ++endchoice + + config CRYPTO_DEV_QCE_SW_MAX_LEN + int "Default maximum request size to use software for AES" +--- a/drivers/crypto/qce/Makefile ++++ b/drivers/crypto/qce/Makefile +@@ -2,6 +2,7 @@ + obj-$(CONFIG_CRYPTO_DEV_QCE) += qcrypto.o + qcrypto-objs := core.o \ + common.o \ +- dma.o \ +- sha.o \ +- skcipher.o ++ dma.o ++ ++qcrypto-$(CONFIG_CRYPTO_DEV_QCE_SHA) += sha.o ++qcrypto-$(CONFIG_CRYPTO_DEV_QCE_SKCIPHER) += skcipher.o +--- a/drivers/crypto/qce/common.c ++++ b/drivers/crypto/qce/common.c +@@ -51,52 +51,56 @@ qce_clear_array(struct qce_device *qce, + qce_write(qce, offset + i * sizeof(u32), 0); + } + +-static u32 qce_encr_cfg(unsigned long flags, u32 aes_key_size) ++static u32 qce_config_reg(struct qce_device *qce, int little) + { +- u32 cfg = 0; ++ u32 beats = (qce->burst_size >> 3) - 1; ++ u32 pipe_pair = qce->pipe_pair_id; ++ u32 config; + +- if (IS_AES(flags)) { +- if (aes_key_size == AES_KEYSIZE_128) +- cfg |= ENCR_KEY_SZ_AES128 << ENCR_KEY_SZ_SHIFT; +- else if (aes_key_size == AES_KEYSIZE_256) +- cfg |= ENCR_KEY_SZ_AES256 << ENCR_KEY_SZ_SHIFT; +- } ++ config = (beats << REQ_SIZE_SHIFT) & REQ_SIZE_MASK; ++ config |= BIT(MASK_DOUT_INTR_SHIFT) | BIT(MASK_DIN_INTR_SHIFT) | ++ BIT(MASK_OP_DONE_INTR_SHIFT) | BIT(MASK_ERR_INTR_SHIFT); ++ config |= (pipe_pair << PIPE_SET_SELECT_SHIFT) & PIPE_SET_SELECT_MASK; ++ config &= ~HIGH_SPD_EN_N_SHIFT; + +- if (IS_AES(flags)) +- cfg |= ENCR_ALG_AES << ENCR_ALG_SHIFT; +- else if (IS_DES(flags) || IS_3DES(flags)) +- cfg |= ENCR_ALG_DES << ENCR_ALG_SHIFT; ++ if (little) ++ config |= BIT(LITTLE_ENDIAN_MODE_SHIFT); + +- if (IS_DES(flags)) +- cfg |= ENCR_KEY_SZ_DES << ENCR_KEY_SZ_SHIFT; ++ return config; ++} + +- if (IS_3DES(flags)) +- cfg |= ENCR_KEY_SZ_3DES << ENCR_KEY_SZ_SHIFT; ++void qce_cpu_to_be32p_array(__be32 *dst, const u8 *src, unsigned int len) ++{ ++ __be32 *d = dst; ++ const u8 *s = src; ++ unsigned int n; + +- switch (flags & QCE_MODE_MASK) { +- case QCE_MODE_ECB: +- cfg |= ENCR_MODE_ECB << ENCR_MODE_SHIFT; +- break; +- case QCE_MODE_CBC: +- cfg |= ENCR_MODE_CBC << ENCR_MODE_SHIFT; +- break; +- case QCE_MODE_CTR: +- cfg |= ENCR_MODE_CTR << ENCR_MODE_SHIFT; +- break; +- case QCE_MODE_XTS: +- cfg |= ENCR_MODE_XTS << ENCR_MODE_SHIFT; +- break; +- case QCE_MODE_CCM: +- cfg |= ENCR_MODE_CCM << ENCR_MODE_SHIFT; +- cfg |= LAST_CCM_XFR << LAST_CCM_SHIFT; +- break; +- default: +- return ~0; ++ n = len / sizeof(u32); ++ for (; n > 0; n--) { ++ *d = cpu_to_be32p((const __u32 *) s); ++ s += sizeof(__u32); ++ d++; + } ++} + +- return cfg; ++static void qce_setup_config(struct qce_device *qce) ++{ ++ u32 config; ++ ++ /* get big endianness */ ++ config = qce_config_reg(qce, 0); ++ ++ /* clear status */ ++ qce_write(qce, REG_STATUS, 0); ++ qce_write(qce, REG_CONFIG, config); ++} ++ ++static inline void qce_crypto_go(struct qce_device *qce) ++{ ++ qce_write(qce, REG_GOPROC, BIT(GO_SHIFT) | BIT(RESULTS_DUMP_SHIFT)); + } + ++#ifdef CONFIG_CRYPTO_DEV_QCE_SHA + static u32 qce_auth_cfg(unsigned long flags, u32 key_size) + { + u32 cfg = 0; +@@ -143,88 +147,6 @@ static u32 qce_auth_cfg(unsigned long fl + return cfg; + } + +-static u32 qce_config_reg(struct qce_device *qce, int little) +-{ +- u32 beats = (qce->burst_size >> 3) - 1; +- u32 pipe_pair = qce->pipe_pair_id; +- u32 config; +- +- config = (beats << REQ_SIZE_SHIFT) & REQ_SIZE_MASK; +- config |= BIT(MASK_DOUT_INTR_SHIFT) | BIT(MASK_DIN_INTR_SHIFT) | +- BIT(MASK_OP_DONE_INTR_SHIFT) | BIT(MASK_ERR_INTR_SHIFT); +- config |= (pipe_pair << PIPE_SET_SELECT_SHIFT) & PIPE_SET_SELECT_MASK; +- config &= ~HIGH_SPD_EN_N_SHIFT; +- +- if (little) +- config |= BIT(LITTLE_ENDIAN_MODE_SHIFT); +- +- return config; +-} +- +-void qce_cpu_to_be32p_array(__be32 *dst, const u8 *src, unsigned int len) +-{ +- __be32 *d = dst; +- const u8 *s = src; +- unsigned int n; +- +- n = len / sizeof(u32); +- for (; n > 0; n--) { +- *d = cpu_to_be32p((const __u32 *) s); +- s += sizeof(__u32); +- d++; +- } +-} +- +-static void qce_xts_swapiv(__be32 *dst, const u8 *src, unsigned int ivsize) +-{ +- u8 swap[QCE_AES_IV_LENGTH]; +- u32 i, j; +- +- if (ivsize > QCE_AES_IV_LENGTH) +- return; +- +- memset(swap, 0, QCE_AES_IV_LENGTH); +- +- for (i = (QCE_AES_IV_LENGTH - ivsize), j = ivsize - 1; +- i < QCE_AES_IV_LENGTH; i++, j--) +- swap[i] = src[j]; +- +- qce_cpu_to_be32p_array(dst, swap, QCE_AES_IV_LENGTH); +-} +- +-static void qce_xtskey(struct qce_device *qce, const u8 *enckey, +- unsigned int enckeylen, unsigned int cryptlen) +-{ +- u32 xtskey[QCE_MAX_CIPHER_KEY_SIZE / sizeof(u32)] = {0}; +- unsigned int xtsklen = enckeylen / (2 * sizeof(u32)); +- unsigned int xtsdusize; +- +- qce_cpu_to_be32p_array((__be32 *)xtskey, enckey + enckeylen / 2, +- enckeylen / 2); +- qce_write_array(qce, REG_ENCR_XTS_KEY0, xtskey, xtsklen); +- +- /* xts du size 512B */ +- xtsdusize = min_t(u32, QCE_SECTOR_SIZE, cryptlen); +- qce_write(qce, REG_ENCR_XTS_DU_SIZE, xtsdusize); +-} +- +-static void qce_setup_config(struct qce_device *qce) +-{ +- u32 config; +- +- /* get big endianness */ +- config = qce_config_reg(qce, 0); +- +- /* clear status */ +- qce_write(qce, REG_STATUS, 0); +- qce_write(qce, REG_CONFIG, config); +-} +- +-static inline void qce_crypto_go(struct qce_device *qce) +-{ +- qce_write(qce, REG_GOPROC, BIT(GO_SHIFT) | BIT(RESULTS_DUMP_SHIFT)); +-} +- + static int qce_setup_regs_ahash(struct crypto_async_request *async_req, + u32 totallen, u32 offset) + { +@@ -309,6 +231,87 @@ go_proc: + + return 0; + } ++#endif ++ ++#ifdef CONFIG_CRYPTO_DEV_QCE_SKCIPHER ++static u32 qce_encr_cfg(unsigned long flags, u32 aes_key_size) ++{ ++ u32 cfg = 0; ++ ++ if (IS_AES(flags)) { ++ if (aes_key_size == AES_KEYSIZE_128) ++ cfg |= ENCR_KEY_SZ_AES128 << ENCR_KEY_SZ_SHIFT; ++ else if (aes_key_size == AES_KEYSIZE_256) ++ cfg |= ENCR_KEY_SZ_AES256 << ENCR_KEY_SZ_SHIFT; ++ } ++ ++ if (IS_AES(flags)) ++ cfg |= ENCR_ALG_AES << ENCR_ALG_SHIFT; ++ else if (IS_DES(flags) || IS_3DES(flags)) ++ cfg |= ENCR_ALG_DES << ENCR_ALG_SHIFT; ++ ++ if (IS_DES(flags)) ++ cfg |= ENCR_KEY_SZ_DES << ENCR_KEY_SZ_SHIFT; ++ ++ if (IS_3DES(flags)) ++ cfg |= ENCR_KEY_SZ_3DES << ENCR_KEY_SZ_SHIFT; ++ ++ switch (flags & QCE_MODE_MASK) { ++ case QCE_MODE_ECB: ++ cfg |= ENCR_MODE_ECB << ENCR_MODE_SHIFT; ++ break; ++ case QCE_MODE_CBC: ++ cfg |= ENCR_MODE_CBC << ENCR_MODE_SHIFT; ++ break; ++ case QCE_MODE_CTR: ++ cfg |= ENCR_MODE_CTR << ENCR_MODE_SHIFT; ++ break; ++ case QCE_MODE_XTS: ++ cfg |= ENCR_MODE_XTS << ENCR_MODE_SHIFT; ++ break; ++ case QCE_MODE_CCM: ++ cfg |= ENCR_MODE_CCM << ENCR_MODE_SHIFT; ++ cfg |= LAST_CCM_XFR << LAST_CCM_SHIFT; ++ break; ++ default: ++ return ~0; ++ } ++ ++ return cfg; ++} ++ ++static void qce_xts_swapiv(__be32 *dst, const u8 *src, unsigned int ivsize) ++{ ++ u8 swap[QCE_AES_IV_LENGTH]; ++ u32 i, j; ++ ++ if (ivsize > QCE_AES_IV_LENGTH) ++ return; ++ ++ memset(swap, 0, QCE_AES_IV_LENGTH); ++ ++ for (i = (QCE_AES_IV_LENGTH - ivsize), j = ivsize - 1; ++ i < QCE_AES_IV_LENGTH; i++, j--) ++ swap[i] = src[j]; ++ ++ qce_cpu_to_be32p_array(dst, swap, QCE_AES_IV_LENGTH); ++} ++ ++static void qce_xtskey(struct qce_device *qce, const u8 *enckey, ++ unsigned int enckeylen, unsigned int cryptlen) ++{ ++ u32 xtskey[QCE_MAX_CIPHER_KEY_SIZE / sizeof(u32)] = {0}; ++ unsigned int xtsklen = enckeylen / (2 * sizeof(u32)); ++ unsigned int xtsdusize; ++ ++ qce_cpu_to_be32p_array((__be32 *)xtskey, enckey + enckeylen / 2, ++ enckeylen / 2); ++ qce_write_array(qce, REG_ENCR_XTS_KEY0, xtskey, xtsklen); ++ ++ /* xts du size 512B */ ++ xtsdusize = min_t(u32, QCE_SECTOR_SIZE, cryptlen); ++ qce_write(qce, REG_ENCR_XTS_DU_SIZE, xtsdusize); ++} + + static int qce_setup_regs_skcipher(struct crypto_async_request *async_req, + u32 totallen, u32 offset) +@@ -390,15 +393,20 @@ static int qce_setup_regs_skcipher(struc + + return 0; + } ++#endif + + int qce_start(struct crypto_async_request *async_req, u32 type, u32 totallen, + u32 offset) + { + switch (type) { ++#ifdef CONFIG_CRYPTO_DEV_QCE_SKCIPHER + case CRYPTO_ALG_TYPE_SKCIPHER: + return qce_setup_regs_skcipher(async_req, totallen, offset); ++#endif ++#ifdef CONFIG_CRYPTO_DEV_QCE_SHA + case CRYPTO_ALG_TYPE_AHASH: + return qce_setup_regs_ahash(async_req, totallen, offset); ++#endif + default: + return -EINVAL; + } +--- a/drivers/crypto/qce/core.c ++++ b/drivers/crypto/qce/core.c +@@ -30,8 +30,12 @@ + #define QCE_QUEUE_LENGTH 1 + + static const struct qce_algo_ops *qce_ops[] = { ++#ifdef CONFIG_CRYPTO_DEV_QCE_SKCIPHER + &skcipher_ops, ++#endif ++#ifdef CONFIG_CRYPTO_DEV_QCE_SHA + &ahash_ops, ++#endif + }; + + static void qce_unregister_algs(struct qce_device *qce) diff --git a/target/linux/ipq40xx/patches-5.4/041-crypto-qce-fix-ctr-aes-qce-block-chunk-sizes.patch b/target/linux/ipq40xx/patches-5.4/041-crypto-qce-fix-ctr-aes-qce-block-chunk-sizes.patch new file mode 100644 index 0000000000..ac4f163f4a --- /dev/null +++ b/target/linux/ipq40xx/patches-5.4/041-crypto-qce-fix-ctr-aes-qce-block-chunk-sizes.patch @@ -0,0 +1,43 @@ +From bb5c863b3d3cbd10e80b2ebf409934a091058f54 Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Fri, 20 Dec 2019 16:02:13 -0300 +Subject: [PATCH 02/11] crypto: qce - fix ctr-aes-qce block, chunk sizes + +Set blocksize of ctr-aes-qce to 1, so it can operate as a stream cipher, +adding the definition for chucksize instead, where the underlying block +size belongs. + +Signed-off-by: Eneas U de Queiroz +Signed-off-by: Herbert Xu +--- + drivers/crypto/qce/skcipher.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/crypto/qce/skcipher.c ++++ b/drivers/crypto/qce/skcipher.c +@@ -270,6 +270,7 @@ struct qce_skcipher_def { + const char *name; + const char *drv_name; + unsigned int blocksize; ++ unsigned int chunksize; + unsigned int ivsize; + unsigned int min_keysize; + unsigned int max_keysize; +@@ -298,7 +299,8 @@ static const struct qce_skcipher_def skc + .flags = QCE_ALG_AES | QCE_MODE_CTR, + .name = "ctr(aes)", + .drv_name = "ctr-aes-qce", +- .blocksize = AES_BLOCK_SIZE, ++ .blocksize = 1, ++ .chunksize = AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, +@@ -368,6 +370,7 @@ static int qce_skcipher_register_one(con + def->drv_name); + + alg->base.cra_blocksize = def->blocksize; ++ alg->chunksize = def->chunksize; + alg->ivsize = def->ivsize; + alg->min_keysize = def->min_keysize; + alg->max_keysize = def->max_keysize; diff --git a/target/linux/ipq40xx/patches-5.4/042-crypto-qce-fix-xts-aes-qce-key-sizes.patch b/target/linux/ipq40xx/patches-5.4/042-crypto-qce-fix-xts-aes-qce-key-sizes.patch new file mode 100644 index 0000000000..4dcf1ac726 --- /dev/null +++ b/target/linux/ipq40xx/patches-5.4/042-crypto-qce-fix-xts-aes-qce-key-sizes.patch @@ -0,0 +1,60 @@ +From 7de4c2bd196f111e39cc60f6197654aff23ba2b4 Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Fri, 20 Dec 2019 16:02:14 -0300 +Subject: [PATCH 03/11] crypto: qce - fix xts-aes-qce key sizes + +XTS-mode uses two keys, so the keysizes should be doubled in +skcipher_def, and halved when checking if it is AES-128/192/256. + +Signed-off-by: Eneas U de Queiroz +Signed-off-by: Herbert Xu +--- + drivers/crypto/qce/skcipher.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/drivers/crypto/qce/skcipher.c ++++ b/drivers/crypto/qce/skcipher.c +@@ -154,12 +154,13 @@ static int qce_skcipher_setkey(struct cr + { + struct crypto_tfm *tfm = crypto_skcipher_tfm(ablk); + struct qce_cipher_ctx *ctx = crypto_tfm_ctx(tfm); ++ unsigned long flags = to_cipher_tmpl(ablk)->alg_flags; + int ret; + + if (!key || !keylen) + return -EINVAL; + +- switch (keylen) { ++ switch (IS_XTS(flags) ? keylen >> 1 : keylen) { + case AES_KEYSIZE_128: + case AES_KEYSIZE_256: + break; +@@ -213,13 +214,15 @@ static int qce_skcipher_crypt(struct skc + struct qce_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct qce_cipher_reqctx *rctx = skcipher_request_ctx(req); + struct qce_alg_template *tmpl = to_cipher_tmpl(tfm); ++ int keylen; + int ret; + + rctx->flags = tmpl->alg_flags; + rctx->flags |= encrypt ? QCE_ENCRYPT : QCE_DECRYPT; ++ keylen = IS_XTS(rctx->flags) ? ctx->enc_keylen >> 1 : ctx->enc_keylen; + +- if (IS_AES(rctx->flags) && ctx->enc_keylen != AES_KEYSIZE_128 && +- ctx->enc_keylen != AES_KEYSIZE_256) { ++ if (IS_AES(rctx->flags) && keylen != AES_KEYSIZE_128 && ++ keylen != AES_KEYSIZE_256) { + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_sync_tfm(subreq, ctx->fallback); +@@ -311,8 +314,8 @@ static const struct qce_skcipher_def skc + .drv_name = "xts-aes-qce", + .blocksize = AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, +- .min_keysize = AES_MIN_KEY_SIZE, +- .max_keysize = AES_MAX_KEY_SIZE, ++ .min_keysize = AES_MIN_KEY_SIZE * 2, ++ .max_keysize = AES_MAX_KEY_SIZE * 2, + }, + { + .flags = QCE_ALG_DES | QCE_MODE_ECB, diff --git a/target/linux/ipq40xx/patches-5.4/043-crypto-qce-save-a-sg-table-slot-for-result-buf.patch b/target/linux/ipq40xx/patches-5.4/043-crypto-qce-save-a-sg-table-slot-for-result-buf.patch new file mode 100644 index 0000000000..2385d483f2 --- /dev/null +++ b/target/linux/ipq40xx/patches-5.4/043-crypto-qce-save-a-sg-table-slot-for-result-buf.patch @@ -0,0 +1,85 @@ +From 3ee50c896d712dc2fc8f34c2cd1918d035e74045 Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Fri, 20 Dec 2019 16:02:15 -0300 +Subject: [PATCH 04/11] crypto: qce - save a sg table slot for result buf + +When ctr-aes-qce is used for gcm-mode, an extra sg entry for the +authentication tag is present, causing trouble when the qce driver +prepares the dst-results sg table for dma. + +It computes the number of entries needed with sg_nents_for_len, leaving +out the tag entry. Then it creates a sg table with that number plus +one, used to store a result buffer. + +When copying the sg table, there's no limit to the number of entries +copied, so the extra slot is filled with the authentication tag sg. +When the driver tries to add the result sg, the list is full, and it +returns EINVAL. + +By limiting the number of sg entries copied to the dest table, the slot +for the result buffer is guaranteed to be unused. + +Signed-off-by: Eneas U de Queiroz +Signed-off-by: Herbert Xu +--- + drivers/crypto/qce/dma.c | 6 ++++-- + drivers/crypto/qce/dma.h | 3 ++- + drivers/crypto/qce/skcipher.c | 4 ++-- + 3 files changed, 8 insertions(+), 5 deletions(-) + +--- a/drivers/crypto/qce/dma.c ++++ b/drivers/crypto/qce/dma.c +@@ -47,7 +47,8 @@ void qce_dma_release(struct qce_dma_data + } + + struct scatterlist * +-qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl) ++qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl, ++ int max_ents) + { + struct scatterlist *sg = sgt->sgl, *sg_last = NULL; + +@@ -60,12 +61,13 @@ qce_sgtable_add(struct sg_table *sgt, st + if (!sg) + return ERR_PTR(-EINVAL); + +- while (new_sgl && sg) { ++ while (new_sgl && sg && max_ents) { + sg_set_page(sg, sg_page(new_sgl), new_sgl->length, + new_sgl->offset); + sg_last = sg; + sg = sg_next(sg); + new_sgl = sg_next(new_sgl); ++ max_ents--; + } + + return sg_last; +--- a/drivers/crypto/qce/dma.h ++++ b/drivers/crypto/qce/dma.h +@@ -42,6 +42,7 @@ int qce_dma_prep_sgs(struct qce_dma_data + void qce_dma_issue_pending(struct qce_dma_data *dma); + int qce_dma_terminate_all(struct qce_dma_data *dma); + struct scatterlist * +-qce_sgtable_add(struct sg_table *sgt, struct scatterlist *sg_add); ++qce_sgtable_add(struct sg_table *sgt, struct scatterlist *sg_add, ++ int max_ents); + + #endif /* _DMA_H_ */ +--- a/drivers/crypto/qce/skcipher.c ++++ b/drivers/crypto/qce/skcipher.c +@@ -95,13 +95,13 @@ qce_skcipher_async_req_handle(struct cry + + sg_init_one(&rctx->result_sg, qce->dma.result_buf, QCE_RESULT_BUF_SZ); + +- sg = qce_sgtable_add(&rctx->dst_tbl, req->dst); ++ sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, rctx->dst_nents - 1); + if (IS_ERR(sg)) { + ret = PTR_ERR(sg); + goto error_free; + } + +- sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg); ++ sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg, 1); + if (IS_ERR(sg)) { + ret = PTR_ERR(sg); + goto error_free; diff --git a/target/linux/ipq40xx/patches-5.4/044-crypto-qce-update-the-skcipher-IV.patch b/target/linux/ipq40xx/patches-5.4/044-crypto-qce-update-the-skcipher-IV.patch new file mode 100644 index 0000000000..5efdb72c44 --- /dev/null +++ b/target/linux/ipq40xx/patches-5.4/044-crypto-qce-update-the-skcipher-IV.patch @@ -0,0 +1,31 @@ +From 3e806a12d10af2581aa26c37b58439286eab9782 Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Fri, 20 Dec 2019 16:02:16 -0300 +Subject: [PATCH 05/11] crypto: qce - update the skcipher IV + +Update the IV after the completion of each cipher operation. + +Signed-off-by: Eneas U de Queiroz +Signed-off-by: Herbert Xu +--- + drivers/crypto/qce/skcipher.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/crypto/qce/skcipher.c ++++ b/drivers/crypto/qce/skcipher.c +@@ -21,6 +21,7 @@ static void qce_skcipher_done(void *data + struct qce_cipher_reqctx *rctx = skcipher_request_ctx(req); + struct qce_alg_template *tmpl = to_cipher_tmpl(crypto_skcipher_reqtfm(req)); + struct qce_device *qce = tmpl->qce; ++ struct qce_result_dump *result_buf = qce->dma.result_buf; + enum dma_data_direction dir_src, dir_dst; + u32 status; + int error; +@@ -45,6 +46,7 @@ static void qce_skcipher_done(void *data + if (error < 0) + dev_dbg(qce->dev, "skcipher operation error (%x)\n", status); + ++ memcpy(rctx->iv, result_buf->encr_cntr_iv, rctx->ivsize); + qce->async_req_done(tmpl->qce, error); + } + diff --git a/target/linux/ipq40xx/patches-5.4/046-crypto-qce-initialize-fallback-only-for-AES.patch b/target/linux/ipq40xx/patches-5.4/046-crypto-qce-initialize-fallback-only-for-AES.patch new file mode 100644 index 0000000000..84aef04ef4 --- /dev/null +++ b/target/linux/ipq40xx/patches-5.4/046-crypto-qce-initialize-fallback-only-for-AES.patch @@ -0,0 +1,54 @@ +From 8ceda883205db6dfedb82e39f67feae3b50c95a1 Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Fri, 20 Dec 2019 16:02:17 -0300 +Subject: [PATCH 06/11] crypto: qce - initialize fallback only for AES + +Adjust cra_flags to add CRYPTO_NEED_FALLBACK only for AES ciphers, where +AES-192 is not handled by the qce hardware, and don't allocate & free +the fallback skcipher for other algorithms. + +Signed-off-by: Eneas U de Queiroz +Signed-off-by: Herbert Xu +--- + drivers/crypto/qce/skcipher.c | 17 ++++++++++++++--- + 1 file changed, 14 insertions(+), 3 deletions(-) + +--- a/drivers/crypto/qce/skcipher.c ++++ b/drivers/crypto/qce/skcipher.c +@@ -257,7 +257,14 @@ static int qce_skcipher_init(struct cryp + + memset(ctx, 0, sizeof(*ctx)); + crypto_skcipher_set_reqsize(tfm, sizeof(struct qce_cipher_reqctx)); ++ return 0; ++} ++ ++static int qce_skcipher_init_fallback(struct crypto_skcipher *tfm) ++{ ++ struct qce_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + ++ qce_skcipher_init(tfm); + ctx->fallback = crypto_alloc_sync_skcipher(crypto_tfm_alg_name(&tfm->base), + 0, CRYPTO_ALG_NEED_FALLBACK); + return PTR_ERR_OR_ZERO(ctx->fallback); +@@ -387,14 +394,18 @@ static int qce_skcipher_register_one(con + + alg->base.cra_priority = 300; + alg->base.cra_flags = CRYPTO_ALG_ASYNC | +- CRYPTO_ALG_NEED_FALLBACK | + CRYPTO_ALG_KERN_DRIVER_ONLY; + alg->base.cra_ctxsize = sizeof(struct qce_cipher_ctx); + alg->base.cra_alignmask = 0; + alg->base.cra_module = THIS_MODULE; + +- alg->init = qce_skcipher_init; +- alg->exit = qce_skcipher_exit; ++ if (IS_AES(def->flags)) { ++ alg->base.cra_flags |= CRYPTO_ALG_NEED_FALLBACK; ++ alg->init = qce_skcipher_init_fallback; ++ alg->exit = qce_skcipher_exit; ++ } else { ++ alg->init = qce_skcipher_init; ++ } + + INIT_LIST_HEAD(&tmpl->entry); + tmpl->crypto_alg_type = CRYPTO_ALG_TYPE_SKCIPHER; diff --git a/target/linux/ipq40xx/patches-5.4/047-crypto-qce-use-cryptlen-when-adding-extra-sgl.patch b/target/linux/ipq40xx/patches-5.4/047-crypto-qce-use-cryptlen-when-adding-extra-sgl.patch new file mode 100644 index 0000000000..160420b485 --- /dev/null +++ b/target/linux/ipq40xx/patches-5.4/047-crypto-qce-use-cryptlen-when-adding-extra-sgl.patch @@ -0,0 +1,89 @@ +From d6364b8128439a8c0e381f80c38667de9f15eef8 Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Fri, 7 Feb 2020 12:02:25 -0300 +Subject: [PATCH 09/11] crypto: qce - use cryptlen when adding extra sgl + +The qce crypto driver appends an extra entry to the dst sgl, to maintain +private state information. + +When the gcm driver sends requests to the ctr skcipher, it passes the +authentication tag after the actual crypto payload, but it must not be +touched. + +Commit 1336c2221bee ("crypto: qce - save a sg table slot for result +buf") limited the destination sgl to avoid overwriting the +authentication tag but it assumed the tag would be in a separate sgl +entry. + +This is not always the case, so it is better to limit the length of the +destination buffer to req->cryptlen before appending the result buf. + +Signed-off-by: Eneas U de Queiroz +Signed-off-by: Herbert Xu +--- + drivers/crypto/qce/dma.c | 11 ++++++----- + drivers/crypto/qce/dma.h | 2 +- + drivers/crypto/qce/skcipher.c | 5 +++-- + 3 files changed, 10 insertions(+), 8 deletions(-) + +--- a/drivers/crypto/qce/dma.c ++++ b/drivers/crypto/qce/dma.c +@@ -48,9 +48,10 @@ void qce_dma_release(struct qce_dma_data + + struct scatterlist * + qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl, +- int max_ents) ++ unsigned int max_len) + { + struct scatterlist *sg = sgt->sgl, *sg_last = NULL; ++ unsigned int new_len; + + while (sg) { + if (!sg_page(sg)) +@@ -61,13 +62,13 @@ qce_sgtable_add(struct sg_table *sgt, st + if (!sg) + return ERR_PTR(-EINVAL); + +- while (new_sgl && sg && max_ents) { +- sg_set_page(sg, sg_page(new_sgl), new_sgl->length, +- new_sgl->offset); ++ while (new_sgl && sg && max_len) { ++ new_len = new_sgl->length > max_len ? max_len : new_sgl->length; ++ sg_set_page(sg, sg_page(new_sgl), new_len, new_sgl->offset); + sg_last = sg; + sg = sg_next(sg); + new_sgl = sg_next(new_sgl); +- max_ents--; ++ max_len -= new_len; + } + + return sg_last; +--- a/drivers/crypto/qce/dma.h ++++ b/drivers/crypto/qce/dma.h +@@ -43,6 +43,6 @@ void qce_dma_issue_pending(struct qce_dm + int qce_dma_terminate_all(struct qce_dma_data *dma); + struct scatterlist * + qce_sgtable_add(struct sg_table *sgt, struct scatterlist *sg_add, +- int max_ents); ++ unsigned int max_len); + + #endif /* _DMA_H_ */ +--- a/drivers/crypto/qce/skcipher.c ++++ b/drivers/crypto/qce/skcipher.c +@@ -97,13 +97,14 @@ qce_skcipher_async_req_handle(struct cry + + sg_init_one(&rctx->result_sg, qce->dma.result_buf, QCE_RESULT_BUF_SZ); + +- sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, rctx->dst_nents - 1); ++ sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, req->cryptlen); + if (IS_ERR(sg)) { + ret = PTR_ERR(sg); + goto error_free; + } + +- sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg, 1); ++ sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg, ++ QCE_RESULT_BUF_SZ); + if (IS_ERR(sg)) { + ret = PTR_ERR(sg); + goto error_free; diff --git a/target/linux/ipq40xx/patches-5.4/048-crypto-qce-use-AES-fallback-for-small-requests.patch b/target/linux/ipq40xx/patches-5.4/048-crypto-qce-use-AES-fallback-for-small-requests.patch new file mode 100644 index 0000000000..d9a6c4dff9 --- /dev/null +++ b/target/linux/ipq40xx/patches-5.4/048-crypto-qce-use-AES-fallback-for-small-requests.patch @@ -0,0 +1,113 @@ +From ce163ba0bf298f1707321ac025ef639f88e62801 Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Fri, 7 Feb 2020 12:02:26 -0300 +Subject: [PATCH 10/11] crypto: qce - use AES fallback for small requests + +Process small blocks using the fallback cipher, as a workaround for an +observed failure (DMA-related, apparently) when computing the GCM ghash +key. This brings a speed gain as well, since it avoids the latency of +using the hardware engine to process small blocks. + +Using software for all 16-byte requests would be enough to make GCM +work, but to increase performance, a larger threshold would be better. +Measuring the performance of supported ciphers with openssl speed, +software matches hardware at around 768-1024 bytes. + +Considering the 256-bit ciphers, software is 2-3 times faster than qce +at 256-bytes, 30% faster at 512, and about even at 768-bytes. With +128-bit keys, the break-even point would be around 1024-bytes. + +This adds the 'aes_sw_max_len' parameter, to set the largest request +length processed by the software fallback. Its default is being set to +512 bytes, a little lower than the break-even point, to balance the cost +in CPU usage. + +Signed-off-by: Eneas U de Queiroz +Signed-off-by: Herbert Xu +--- + +--- a/drivers/crypto/Kconfig ++++ b/drivers/crypto/Kconfig +@@ -628,6 +628,29 @@ config CRYPTO_DEV_QCE + hardware. To compile this driver as a module, choose M here. The + module will be called qcrypto. + ++config CRYPTO_DEV_QCE_SW_MAX_LEN ++ int "Default maximum request size to use software for AES" ++ depends on CRYPTO_DEV_QCE && CRYPTO_DEV_QCE_SKCIPHER ++ default 512 ++ help ++ This sets the default maximum request size to perform AES requests ++ using software instead of the crypto engine. It can be changed by ++ setting the aes_sw_max_len parameter. ++ ++ Small blocks are processed faster in software than hardware. ++ Considering the 256-bit ciphers, software is 2-3 times faster than ++ qce at 256-bytes, 30% faster at 512, and about even at 768-bytes. ++ With 128-bit keys, the break-even point would be around 1024-bytes. ++ ++ The default is set a little lower, to 512 bytes, to balance the ++ cost in CPU usage. The minimum recommended setting is 16-bytes ++ (1 AES block), since AES-GCM will fail if you set it lower. ++ Setting this to zero will send all requests to the hardware. ++ ++ Note that 192-bit keys are not supported by the hardware and are ++ always processed by the software fallback, and all DES requests ++ are done by the hardware. ++ + config CRYPTO_DEV_QCOM_RNG + tristate "Qualcomm Random Number Generator Driver" + depends on ARCH_QCOM || COMPILE_TEST +--- a/drivers/crypto/qce/skcipher.c ++++ b/drivers/crypto/qce/skcipher.c +@@ -5,6 +5,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -12,6 +13,13 @@ + + #include "cipher.h" + ++static unsigned int aes_sw_max_len = CONFIG_CRYPTO_DEV_QCE_SW_MAX_LEN; ++module_param(aes_sw_max_len, uint, 0644); ++MODULE_PARM_DESC(aes_sw_max_len, ++ "Only use hardware for AES requests larger than this " ++ "[0=always use hardware; anything <16 breaks AES-GCM; default=" ++ __stringify(CONFIG_CRYPTO_DEV_QCE_SOFT_THRESHOLD)"]"); ++ + static LIST_HEAD(skcipher_algs); + + static void qce_skcipher_done(void *data) +@@ -166,15 +174,10 @@ static int qce_skcipher_setkey(struct cr + switch (IS_XTS(flags) ? keylen >> 1 : keylen) { + case AES_KEYSIZE_128: + case AES_KEYSIZE_256: ++ memcpy(ctx->enc_key, key, keylen); + break; +- default: +- goto fallback; + } + +- ctx->enc_keylen = keylen; +- memcpy(ctx->enc_key, key, keylen); +- return 0; +-fallback: + ret = crypto_sync_skcipher_setkey(ctx->fallback, key, keylen); + if (!ret) + ctx->enc_keylen = keylen; +@@ -224,8 +227,9 @@ static int qce_skcipher_crypt(struct skc + rctx->flags |= encrypt ? QCE_ENCRYPT : QCE_DECRYPT; + keylen = IS_XTS(rctx->flags) ? ctx->enc_keylen >> 1 : ctx->enc_keylen; + +- if (IS_AES(rctx->flags) && keylen != AES_KEYSIZE_128 && +- keylen != AES_KEYSIZE_256) { ++ if (IS_AES(rctx->flags) && ++ ((keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256) || ++ req->cryptlen <= aes_sw_max_len)) { + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_sync_tfm(subreq, ctx->fallback); diff --git a/target/linux/ipq40xx/patches-5.4/049-crypto-qce-handle-AES-XTS-cases-that-qce-fails.patch b/target/linux/ipq40xx/patches-5.4/049-crypto-qce-handle-AES-XTS-cases-that-qce-fails.patch new file mode 100644 index 0000000000..18beda6296 --- /dev/null +++ b/target/linux/ipq40xx/patches-5.4/049-crypto-qce-handle-AES-XTS-cases-that-qce-fails.patch @@ -0,0 +1,59 @@ +From 7f19380b2cfd412dcef2facefb3f6c62788864d7 Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Fri, 7 Feb 2020 12:02:27 -0300 +Subject: [PATCH 11/11] crypto: qce - handle AES-XTS cases that qce fails + +QCE hangs when presented with an AES-XTS request whose length is larger +than QCE_SECTOR_SIZE (512-bytes), and is not a multiple of it. Let the +fallback cipher handle them. + +Signed-off-by: Eneas U de Queiroz +Signed-off-by: Herbert Xu +--- + drivers/crypto/qce/common.c | 2 -- + drivers/crypto/qce/common.h | 3 +++ + drivers/crypto/qce/skcipher.c | 9 +++++++-- + 3 files changed, 10 insertions(+), 4 deletions(-) + +--- a/drivers/crypto/qce/common.c ++++ b/drivers/crypto/qce/common.c +@@ -15,8 +15,6 @@ + #include "regs-v5.h" + #include "sha.h" + +-#define QCE_SECTOR_SIZE 512 +- + static inline u32 qce_read(struct qce_device *qce, u32 offset) + { + return readl(qce->base + offset); +--- a/drivers/crypto/qce/common.h ++++ b/drivers/crypto/qce/common.h +@@ -12,6 +12,9 @@ + #include + #include + ++/* xts du size */ ++#define QCE_SECTOR_SIZE 512 ++ + /* key size in bytes */ + #define QCE_SHA_HMAC_KEY_SIZE 64 + #define QCE_MAX_CIPHER_KEY_SIZE AES_KEYSIZE_256 +--- a/drivers/crypto/qce/skcipher.c ++++ b/drivers/crypto/qce/skcipher.c +@@ -227,9 +227,14 @@ static int qce_skcipher_crypt(struct skc + rctx->flags |= encrypt ? QCE_ENCRYPT : QCE_DECRYPT; + keylen = IS_XTS(rctx->flags) ? ctx->enc_keylen >> 1 : ctx->enc_keylen; + ++ /* qce is hanging when AES-XTS request len > QCE_SECTOR_SIZE and ++ * is not a multiple of it; pass such requests to the fallback ++ */ + if (IS_AES(rctx->flags) && +- ((keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256) || +- req->cryptlen <= aes_sw_max_len)) { ++ (((keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256) || ++ req->cryptlen <= aes_sw_max_len) || ++ (IS_XTS(rctx->flags) && req->cryptlen > QCE_SECTOR_SIZE && ++ req->cryptlen % QCE_SECTOR_SIZE))) { + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_sync_tfm(subreq, ctx->fallback); diff --git a/target/linux/ipq40xx/patches-5.4/051-crypto-qce-allow-building-only-hashes-ciphers.patch b/target/linux/ipq40xx/patches-5.4/051-crypto-qce-allow-building-only-hashes-ciphers.patch new file mode 100644 index 0000000000..5f350183d8 --- /dev/null +++ b/target/linux/ipq40xx/patches-5.4/051-crypto-qce-allow-building-only-hashes-ciphers.patch @@ -0,0 +1,419 @@ +From 59e056cda4beb5412e3653e6360c2eb0fa770baa Mon Sep 17 00:00:00 2001 +From: Eneas U de Queiroz +Date: Fri, 20 Dec 2019 16:02:18 -0300 +Subject: [PATCH 07/11] crypto: qce - allow building only hashes/ciphers + +Allow the user to choose whether to build support for all algorithms +(default), hashes-only, or skciphers-only. + +The QCE engine does not appear to scale as well as the CPU to handle +multiple crypto requests. While the ipq40xx chips have 4-core CPUs, the +QCE handles only 2 requests in parallel. + +Ipsec throughput seems to improve when disabling either family of +algorithms, sharing the load with the CPU. Enabling skciphers-only +appears to work best. + +Signed-off-by: Eneas U de Queiroz +Signed-off-by: Herbert Xu +--- + +--- a/drivers/crypto/Kconfig ++++ b/drivers/crypto/Kconfig +@@ -616,6 +616,14 @@ config CRYPTO_DEV_QCE + tristate "Qualcomm crypto engine accelerator" + depends on ARCH_QCOM || COMPILE_TEST + depends on HAS_IOMEM ++ help ++ This driver supports Qualcomm crypto engine accelerator ++ hardware. To compile this driver as a module, choose M here. The ++ module will be called qcrypto. ++ ++config CRYPTO_DEV_QCE_SKCIPHER ++ bool ++ depends on CRYPTO_DEV_QCE + select CRYPTO_AES + select CRYPTO_LIB_DES + select CRYPTO_ECB +@@ -623,10 +631,57 @@ config CRYPTO_DEV_QCE + select CRYPTO_XTS + select CRYPTO_CTR + select CRYPTO_BLKCIPHER ++ ++config CRYPTO_DEV_QCE_SHA ++ bool ++ depends on CRYPTO_DEV_QCE ++ ++choice ++ prompt "Algorithms enabled for QCE acceleration" ++ default CRYPTO_DEV_QCE_ENABLE_ALL ++ depends on CRYPTO_DEV_QCE + help +- This driver supports Qualcomm crypto engine accelerator +- hardware. To compile this driver as a module, choose M here. The +- module will be called qcrypto. ++ This option allows to choose whether to build support for all algorihtms ++ (default), hashes-only, or skciphers-only. ++ ++ The QCE engine does not appear to scale as well as the CPU to handle ++ multiple crypto requests. While the ipq40xx chips have 4-core CPUs, the ++ QCE handles only 2 requests in parallel. ++ ++ Ipsec throughput seems to improve when disabling either family of ++ algorithms, sharing the load with the CPU. Enabling skciphers-only ++ appears to work best. ++ ++ config CRYPTO_DEV_QCE_ENABLE_ALL ++ bool "All supported algorithms" ++ select CRYPTO_DEV_QCE_SKCIPHER ++ select CRYPTO_DEV_QCE_SHA ++ help ++ Enable all supported algorithms: ++ - AES (CBC, CTR, ECB, XTS) ++ - 3DES (CBC, ECB) ++ - DES (CBC, ECB) ++ - SHA1, HMAC-SHA1 ++ - SHA256, HMAC-SHA256 ++ ++ config CRYPTO_DEV_QCE_ENABLE_SKCIPHER ++ bool "Symmetric-key ciphers only" ++ select CRYPTO_DEV_QCE_SKCIPHER ++ help ++ Enable symmetric-key ciphers only: ++ - AES (CBC, CTR, ECB, XTS) ++ - 3DES (ECB, CBC) ++ - DES (ECB, CBC) ++ ++ config CRYPTO_DEV_QCE_ENABLE_SHA ++ bool "Hash/HMAC only" ++ select CRYPTO_DEV_QCE_SHA ++ help ++ Enable hashes/HMAC algorithms only: ++ - SHA1, HMAC-SHA1 ++ - SHA256, HMAC-SHA256 ++ ++endchoice + + config CRYPTO_DEV_QCE_SW_MAX_LEN + int "Default maximum request size to use software for AES" +--- a/drivers/crypto/qce/Makefile ++++ b/drivers/crypto/qce/Makefile +@@ -2,6 +2,7 @@ + obj-$(CONFIG_CRYPTO_DEV_QCE) += qcrypto.o + qcrypto-objs := core.o \ + common.o \ +- dma.o \ +- sha.o \ +- skcipher.o ++ dma.o ++ ++qcrypto-$(CONFIG_CRYPTO_DEV_QCE_SHA) += sha.o ++qcrypto-$(CONFIG_CRYPTO_DEV_QCE_SKCIPHER) += skcipher.o +--- a/drivers/crypto/qce/common.c ++++ b/drivers/crypto/qce/common.c +@@ -43,52 +43,56 @@ qce_clear_array(struct qce_device *qce, + qce_write(qce, offset + i * sizeof(u32), 0); + } + +-static u32 qce_encr_cfg(unsigned long flags, u32 aes_key_size) ++static u32 qce_config_reg(struct qce_device *qce, int little) + { +- u32 cfg = 0; ++ u32 beats = (qce->burst_size >> 3) - 1; ++ u32 pipe_pair = qce->pipe_pair_id; ++ u32 config; + +- if (IS_AES(flags)) { +- if (aes_key_size == AES_KEYSIZE_128) +- cfg |= ENCR_KEY_SZ_AES128 << ENCR_KEY_SZ_SHIFT; +- else if (aes_key_size == AES_KEYSIZE_256) +- cfg |= ENCR_KEY_SZ_AES256 << ENCR_KEY_SZ_SHIFT; +- } ++ config = (beats << REQ_SIZE_SHIFT) & REQ_SIZE_MASK; ++ config |= BIT(MASK_DOUT_INTR_SHIFT) | BIT(MASK_DIN_INTR_SHIFT) | ++ BIT(MASK_OP_DONE_INTR_SHIFT) | BIT(MASK_ERR_INTR_SHIFT); ++ config |= (pipe_pair << PIPE_SET_SELECT_SHIFT) & PIPE_SET_SELECT_MASK; ++ config &= ~HIGH_SPD_EN_N_SHIFT; + +- if (IS_AES(flags)) +- cfg |= ENCR_ALG_AES << ENCR_ALG_SHIFT; +- else if (IS_DES(flags) || IS_3DES(flags)) +- cfg |= ENCR_ALG_DES << ENCR_ALG_SHIFT; ++ if (little) ++ config |= BIT(LITTLE_ENDIAN_MODE_SHIFT); + +- if (IS_DES(flags)) +- cfg |= ENCR_KEY_SZ_DES << ENCR_KEY_SZ_SHIFT; ++ return config; ++} + +- if (IS_3DES(flags)) +- cfg |= ENCR_KEY_SZ_3DES << ENCR_KEY_SZ_SHIFT; ++void qce_cpu_to_be32p_array(__be32 *dst, const u8 *src, unsigned int len) ++{ ++ __be32 *d = dst; ++ const u8 *s = src; ++ unsigned int n; + +- switch (flags & QCE_MODE_MASK) { +- case QCE_MODE_ECB: +- cfg |= ENCR_MODE_ECB << ENCR_MODE_SHIFT; +- break; +- case QCE_MODE_CBC: +- cfg |= ENCR_MODE_CBC << ENCR_MODE_SHIFT; +- break; +- case QCE_MODE_CTR: +- cfg |= ENCR_MODE_CTR << ENCR_MODE_SHIFT; +- break; +- case QCE_MODE_XTS: +- cfg |= ENCR_MODE_XTS << ENCR_MODE_SHIFT; +- break; +- case QCE_MODE_CCM: +- cfg |= ENCR_MODE_CCM << ENCR_MODE_SHIFT; +- cfg |= LAST_CCM_XFR << LAST_CCM_SHIFT; +- break; +- default: +- return ~0; ++ n = len / sizeof(u32); ++ for (; n > 0; n--) { ++ *d = cpu_to_be32p((const __u32 *) s); ++ s += sizeof(__u32); ++ d++; + } ++} + +- return cfg; ++static void qce_setup_config(struct qce_device *qce) ++{ ++ u32 config; ++ ++ /* get big endianness */ ++ config = qce_config_reg(qce, 0); ++ ++ /* clear status */ ++ qce_write(qce, REG_STATUS, 0); ++ qce_write(qce, REG_CONFIG, config); ++} ++ ++static inline void qce_crypto_go(struct qce_device *qce) ++{ ++ qce_write(qce, REG_GOPROC, BIT(GO_SHIFT) | BIT(RESULTS_DUMP_SHIFT)); + } + ++#ifdef CONFIG_CRYPTO_DEV_QCE_SHA + static u32 qce_auth_cfg(unsigned long flags, u32 key_size) + { + u32 cfg = 0; +@@ -135,88 +139,6 @@ static u32 qce_auth_cfg(unsigned long fl + return cfg; + } + +-static u32 qce_config_reg(struct qce_device *qce, int little) +-{ +- u32 beats = (qce->burst_size >> 3) - 1; +- u32 pipe_pair = qce->pipe_pair_id; +- u32 config; +- +- config = (beats << REQ_SIZE_SHIFT) & REQ_SIZE_MASK; +- config |= BIT(MASK_DOUT_INTR_SHIFT) | BIT(MASK_DIN_INTR_SHIFT) | +- BIT(MASK_OP_DONE_INTR_SHIFT) | BIT(MASK_ERR_INTR_SHIFT); +- config |= (pipe_pair << PIPE_SET_SELECT_SHIFT) & PIPE_SET_SELECT_MASK; +- config &= ~HIGH_SPD_EN_N_SHIFT; +- +- if (little) +- config |= BIT(LITTLE_ENDIAN_MODE_SHIFT); +- +- return config; +-} +- +-void qce_cpu_to_be32p_array(__be32 *dst, const u8 *src, unsigned int len) +-{ +- __be32 *d = dst; +- const u8 *s = src; +- unsigned int n; +- +- n = len / sizeof(u32); +- for (; n > 0; n--) { +- *d = cpu_to_be32p((const __u32 *) s); +- s += sizeof(__u32); +- d++; +- } +-} +- +-static void qce_xts_swapiv(__be32 *dst, const u8 *src, unsigned int ivsize) +-{ +- u8 swap[QCE_AES_IV_LENGTH]; +- u32 i, j; +- +- if (ivsize > QCE_AES_IV_LENGTH) +- return; +- +- memset(swap, 0, QCE_AES_IV_LENGTH); +- +- for (i = (QCE_AES_IV_LENGTH - ivsize), j = ivsize - 1; +- i < QCE_AES_IV_LENGTH; i++, j--) +- swap[i] = src[j]; +- +- qce_cpu_to_be32p_array(dst, swap, QCE_AES_IV_LENGTH); +-} +- +-static void qce_xtskey(struct qce_device *qce, const u8 *enckey, +- unsigned int enckeylen, unsigned int cryptlen) +-{ +- u32 xtskey[QCE_MAX_CIPHER_KEY_SIZE / sizeof(u32)] = {0}; +- unsigned int xtsklen = enckeylen / (2 * sizeof(u32)); +- unsigned int xtsdusize; +- +- qce_cpu_to_be32p_array((__be32 *)xtskey, enckey + enckeylen / 2, +- enckeylen / 2); +- qce_write_array(qce, REG_ENCR_XTS_KEY0, xtskey, xtsklen); +- +- /* xts du size 512B */ +- xtsdusize = min_t(u32, QCE_SECTOR_SIZE, cryptlen); +- qce_write(qce, REG_ENCR_XTS_DU_SIZE, xtsdusize); +-} +- +-static void qce_setup_config(struct qce_device *qce) +-{ +- u32 config; +- +- /* get big endianness */ +- config = qce_config_reg(qce, 0); +- +- /* clear status */ +- qce_write(qce, REG_STATUS, 0); +- qce_write(qce, REG_CONFIG, config); +-} +- +-static inline void qce_crypto_go(struct qce_device *qce) +-{ +- qce_write(qce, REG_GOPROC, BIT(GO_SHIFT) | BIT(RESULTS_DUMP_SHIFT)); +-} +- + static int qce_setup_regs_ahash(struct crypto_async_request *async_req, + u32 totallen, u32 offset) + { +@@ -301,6 +223,87 @@ go_proc: + + return 0; + } ++#endif ++ ++#ifdef CONFIG_CRYPTO_DEV_QCE_SKCIPHER ++static u32 qce_encr_cfg(unsigned long flags, u32 aes_key_size) ++{ ++ u32 cfg = 0; ++ ++ if (IS_AES(flags)) { ++ if (aes_key_size == AES_KEYSIZE_128) ++ cfg |= ENCR_KEY_SZ_AES128 << ENCR_KEY_SZ_SHIFT; ++ else if (aes_key_size == AES_KEYSIZE_256) ++ cfg |= ENCR_KEY_SZ_AES256 << ENCR_KEY_SZ_SHIFT; ++ } ++ ++ if (IS_AES(flags)) ++ cfg |= ENCR_ALG_AES << ENCR_ALG_SHIFT; ++ else if (IS_DES(flags) || IS_3DES(flags)) ++ cfg |= ENCR_ALG_DES << ENCR_ALG_SHIFT; ++ ++ if (IS_DES(flags)) ++ cfg |= ENCR_KEY_SZ_DES << ENCR_KEY_SZ_SHIFT; ++ ++ if (IS_3DES(flags)) ++ cfg |= ENCR_KEY_SZ_3DES << ENCR_KEY_SZ_SHIFT; ++ ++ switch (flags & QCE_MODE_MASK) { ++ case QCE_MODE_ECB: ++ cfg |= ENCR_MODE_ECB << ENCR_MODE_SHIFT; ++ break; ++ case QCE_MODE_CBC: ++ cfg |= ENCR_MODE_CBC << ENCR_MODE_SHIFT; ++ break; ++ case QCE_MODE_CTR: ++ cfg |= ENCR_MODE_CTR << ENCR_MODE_SHIFT; ++ break; ++ case QCE_MODE_XTS: ++ cfg |= ENCR_MODE_XTS << ENCR_MODE_SHIFT; ++ break; ++ case QCE_MODE_CCM: ++ cfg |= ENCR_MODE_CCM << ENCR_MODE_SHIFT; ++ cfg |= LAST_CCM_XFR << LAST_CCM_SHIFT; ++ break; ++ default: ++ return ~0; ++ } ++ ++ return cfg; ++} ++ ++static void qce_xts_swapiv(__be32 *dst, const u8 *src, unsigned int ivsize) ++{ ++ u8 swap[QCE_AES_IV_LENGTH]; ++ u32 i, j; ++ ++ if (ivsize > QCE_AES_IV_LENGTH) ++ return; ++ ++ memset(swap, 0, QCE_AES_IV_LENGTH); ++ ++ for (i = (QCE_AES_IV_LENGTH - ivsize), j = ivsize - 1; ++ i < QCE_AES_IV_LENGTH; i++, j--) ++ swap[i] = src[j]; ++ ++ qce_cpu_to_be32p_array(dst, swap, QCE_AES_IV_LENGTH); ++} ++ ++static void qce_xtskey(struct qce_device *qce, const u8 *enckey, ++ unsigned int enckeylen, unsigned int cryptlen) ++{ ++ u32 xtskey[QCE_MAX_CIPHER_KEY_SIZE / sizeof(u32)] = {0}; ++ unsigned int xtsklen = enckeylen / (2 * sizeof(u32)); ++ unsigned int xtsdusize; ++ ++ qce_cpu_to_be32p_array((__be32 *)xtskey, enckey + enckeylen / 2, ++ enckeylen / 2); ++ qce_write_array(qce, REG_ENCR_XTS_KEY0, xtskey, xtsklen); ++ ++ /* xts du size 512B */ ++ xtsdusize = min_t(u32, QCE_SECTOR_SIZE, cryptlen); ++ qce_write(qce, REG_ENCR_XTS_DU_SIZE, xtsdusize); ++} + + static int qce_setup_regs_skcipher(struct crypto_async_request *async_req, + u32 totallen, u32 offset) +@@ -382,15 +385,20 @@ static int qce_setup_regs_skcipher(struc + + return 0; + } ++#endif + + int qce_start(struct crypto_async_request *async_req, u32 type, u32 totallen, + u32 offset) + { + switch (type) { ++#ifdef CONFIG_CRYPTO_DEV_QCE_SKCIPHER + case CRYPTO_ALG_TYPE_SKCIPHER: + return qce_setup_regs_skcipher(async_req, totallen, offset); ++#endif ++#ifdef CONFIG_CRYPTO_DEV_QCE_SHA + case CRYPTO_ALG_TYPE_AHASH: + return qce_setup_regs_ahash(async_req, totallen, offset); ++#endif + default: + return -EINVAL; + } +--- a/drivers/crypto/qce/core.c ++++ b/drivers/crypto/qce/core.c +@@ -22,8 +22,12 @@ + #define QCE_QUEUE_LENGTH 1 + + static const struct qce_algo_ops *qce_ops[] = { ++#ifdef CONFIG_CRYPTO_DEV_QCE_SKCIPHER + &skcipher_ops, ++#endif ++#ifdef CONFIG_CRYPTO_DEV_QCE_SHA + &ahash_ops, ++#endif + }; + + static void qce_unregister_algs(struct qce_device *qce)