crypto: arm64/sha256-neon - play nice with CONFIG_PREEMPT kernels
authorArd Biesheuvel <ard.biesheuvel@linaro.org>
Sat, 10 Mar 2018 15:21:54 +0000 (15:21 +0000)
committerHerbert Xu <herbert@gondor.apana.org.au>
Fri, 16 Mar 2018 15:35:58 +0000 (23:35 +0800)
Tweak the SHA256 update routines to invoke the SHA256 block transform
block by block, to avoid excessive scheduling delays caused by the
NEON algorithm running with preemption disabled.

Also, remove a stale comment which no longer applies now that kernel
mode NEON is actually disallowed in some contexts.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/arm64/crypto/sha256-glue.c

index b064d925fe2a7552222c568754e03d474c11a5aa..e8880ccdc71f6a28f7aca4e500f6a70e6779edf0 100644 (file)
@@ -89,21 +89,32 @@ static struct shash_alg algs[] = { {
 static int sha256_update_neon(struct shash_desc *desc, const u8 *data,
                              unsigned int len)
 {
-       /*
-        * Stacking and unstacking a substantial slice of the NEON register
-        * file may significantly affect performance for small updates when
-        * executing in interrupt context, so fall back to the scalar code
-        * in that case.
-        */
+       struct sha256_state *sctx = shash_desc_ctx(desc);
+
        if (!may_use_simd())
                return sha256_base_do_update(desc, data, len,
                                (sha256_block_fn *)sha256_block_data_order);
 
-       kernel_neon_begin();
-       sha256_base_do_update(desc, data, len,
-                               (sha256_block_fn *)sha256_block_neon);
-       kernel_neon_end();
+       while (len > 0) {
+               unsigned int chunk = len;
+
+               /*
+                * Don't hog the CPU for the entire time it takes to process all
+                * input when running on a preemptible kernel, but process the
+                * data block by block instead.
+                */
+               if (IS_ENABLED(CONFIG_PREEMPT) &&
+                   chunk + sctx->count % SHA256_BLOCK_SIZE > SHA256_BLOCK_SIZE)
+                       chunk = SHA256_BLOCK_SIZE -
+                               sctx->count % SHA256_BLOCK_SIZE;
 
+               kernel_neon_begin();
+               sha256_base_do_update(desc, data, chunk,
+                                     (sha256_block_fn *)sha256_block_neon);
+               kernel_neon_end();
+               data += chunk;
+               len -= chunk;
+       }
        return 0;
 }
 
@@ -117,10 +128,9 @@ static int sha256_finup_neon(struct shash_desc *desc, const u8 *data,
                sha256_base_do_finalize(desc,
                                (sha256_block_fn *)sha256_block_data_order);
        } else {
-               kernel_neon_begin();
                if (len)
-                       sha256_base_do_update(desc, data, len,
-                               (sha256_block_fn *)sha256_block_neon);
+                       sha256_update_neon(desc, data, len);
+               kernel_neon_begin();
                sha256_base_do_finalize(desc,
                                (sha256_block_fn *)sha256_block_neon);
                kernel_neon_end();