diff options
author | Eric Biggers <ebiggers@kernel.org> | 2025-07-12 16:22:59 -0700 |
---|---|---|
committer | Eric Biggers <ebiggers@kernel.org> | 2025-07-14 11:11:48 -0700 |
commit | 00d549bb89e471b7df550459fcb51ffbded39cbf (patch) | |
tree | eb4f699859d57af7537194687503bf92841d1e09 /lib | |
parent | 70cb6ca58fddb02e269fe743ba75d53d577b5b1c (diff) |
lib/crypto: arm64/sha1: Migrate optimized code into library
Instead of exposing the arm64-optimized SHA-1 code via arm64-specific
crypto_shash algorithms, instead just implement the sha1_blocks()
library function. This is much simpler, it makes the SHA-1 library
functions be arm64-optimized, and it fixes the longstanding issue where
the arm64-optimized SHA-1 code was disabled by default. SHA-1 still
remains available through crypto_shash, but individual architectures no
longer need to handle it.
Remove support for SHA-1 finalization from assembly code, since the
library does not yet support architecture-specific overrides of the
finalization. (Support for that has been omitted for now, for
simplicity and because usually it isn't performance-critical.)
To match sha1_blocks(), change the type of the nblocks parameter and the
return value of __sha1_ce_transform() from int to size_t. Update the
assembly code accordingly.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250712232329.818226-9-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/crypto/Kconfig | 1 | ||||
-rw-r--r-- | lib/crypto/Makefile | 1 | ||||
-rw-r--r-- | lib/crypto/arm64/sha1-ce-core.S | 130 | ||||
-rw-r--r-- | lib/crypto/arm64/sha1.h | 39 |
4 files changed, 171 insertions, 0 deletions
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 519c5d6a050fd..05cce143af314 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -147,6 +147,7 @@ config CRYPTO_LIB_SHA1_ARCH bool depends on CRYPTO_LIB_SHA1 && !UML default y if ARM + default y if ARM64 && KERNEL_MODE_NEON config CRYPTO_LIB_SHA256 tristate diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 699a421339271..1da13c9e2f711 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -76,6 +76,7 @@ libsha1-y += arm/sha1-armv4-large.o libsha1-$(CONFIG_KERNEL_MODE_NEON) += arm/sha1-armv7-neon.o \ arm/sha1-ce-core.o endif +libsha1-$(CONFIG_ARM64) += arm64/sha1-ce-core.o endif # CONFIG_CRYPTO_LIB_SHA1_ARCH ################################################################################ diff --git a/lib/crypto/arm64/sha1-ce-core.S b/lib/crypto/arm64/sha1-ce-core.S new file mode 100644 index 0000000000000..21efbbafd7d62 --- /dev/null +++ b/lib/crypto/arm64/sha1-ce-core.S @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions + * + * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .arch armv8-a+crypto + + k0 .req v0 + k1 .req v1 + k2 .req v2 + k3 .req v3 + + t0 .req v4 + t1 .req v5 + + dga .req q6 + dgav .req v6 + dgb .req s7 + dgbv .req v7 + + dg0q .req q12 + dg0s .req s12 + dg0v .req v12 + dg1s .req s13 + dg1v .req v13 + dg2s .req s14 + + .macro add_only, op, ev, rc, s0, dg1 + .ifc \ev, ev + add t1.4s, v\s0\().4s, \rc\().4s + sha1h dg2s, dg0s + .ifnb \dg1 + sha1\op dg0q, \dg1, t0.4s + .else + sha1\op dg0q, dg1s, t0.4s + .endif + .else + .ifnb \s0 + add t0.4s, v\s0\().4s, \rc\().4s + .endif + sha1h dg1s, dg0s + sha1\op dg0q, dg2s, t1.4s + .endif + .endm + + .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 + sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s + add_only \op, \ev, \rc, \s1, \dg1 + sha1su1 v\s0\().4s, v\s3\().4s + .endm + + .macro loadrc, k, val, tmp + movz \tmp, :abs_g0_nc:\val + movk \tmp, :abs_g1:\val + dup \k, \tmp + .endm + + /* + * size_t __sha1_ce_transform(struct sha1_block_state *state, + * const u8 *data, size_t nblocks); + */ +SYM_FUNC_START(__sha1_ce_transform) + /* load round constants */ + loadrc k0.4s, 0x5a827999, w6 + loadrc k1.4s, 0x6ed9eba1, w6 + loadrc k2.4s, 0x8f1bbcdc, w6 + loadrc k3.4s, 0xca62c1d6, w6 + + /* load state */ + ld1 {dgav.4s}, [x0] + ldr dgb, [x0, #16] + + /* load input */ +0: ld1 {v8.4s-v11.4s}, [x1], #64 + sub x2, x2, #1 + +CPU_LE( rev32 v8.16b, v8.16b ) +CPU_LE( rev32 v9.16b, v9.16b ) +CPU_LE( rev32 v10.16b, v10.16b ) +CPU_LE( rev32 v11.16b, v11.16b ) + + add t0.4s, v8.4s, k0.4s + mov dg0v.16b, dgav.16b + + add_update c, ev, k0, 8, 9, 10, 11, dgb + add_update c, od, k0, 9, 10, 11, 8 + add_update c, ev, k0, 10, 11, 8, 9 + add_update c, od, k0, 11, 8, 9, 10 + add_update c, ev, k1, 8, 9, 10, 11 + + add_update p, od, k1, 9, 10, 11, 8 + add_update p, ev, k1, 10, 11, 8, 9 + add_update p, od, k1, 11, 8, 9, 10 + add_update p, ev, k1, 8, 9, 10, 11 + add_update p, od, k2, 9, 10, 11, 8 + + add_update m, ev, k2, 10, 11, 8, 9 + add_update m, od, k2, 11, 8, 9, 10 + add_update m, ev, k2, 8, 9, 10, 11 + add_update m, od, k2, 9, 10, 11, 8 + add_update m, ev, k3, 10, 11, 8, 9 + + add_update p, od, k3, 11, 8, 9, 10 + add_only p, ev, k3, 9 + add_only p, od, k3, 10 + add_only p, ev, k3, 11 + add_only p, od + + /* update state */ + add dgbv.2s, dgbv.2s, dg1v.2s + add dgav.4s, dgav.4s, dg0v.4s + + /* return early if voluntary preemption is needed */ + cond_yield 1f, x5, x6 + + /* handled all input blocks? */ + cbnz x2, 0b + + /* store new state */ +1: st1 {dgav.4s}, [x0] + str dgb, [x0, #16] + mov x0, x2 + ret +SYM_FUNC_END(__sha1_ce_transform) diff --git a/lib/crypto/arm64/sha1.h b/lib/crypto/arm64/sha1.h new file mode 100644 index 0000000000000..f822563538cc8 --- /dev/null +++ b/lib/crypto/arm64/sha1.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SHA-1 optimized for ARM64 + * + * Copyright 2025 Google LLC + */ +#include <asm/neon.h> +#include <asm/simd.h> +#include <linux/cpufeature.h> + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce); + +asmlinkage size_t __sha1_ce_transform(struct sha1_block_state *state, + const u8 *data, size_t nblocks); + +static void sha1_blocks(struct sha1_block_state *state, + const u8 *data, size_t nblocks) +{ + if (static_branch_likely(&have_ce) && likely(may_use_simd())) { + do { + size_t rem; + + kernel_neon_begin(); + rem = __sha1_ce_transform(state, data, nblocks); + kernel_neon_end(); + data += (nblocks - rem) * SHA1_BLOCK_SIZE; + nblocks = rem; + } while (nblocks); + } else { + sha1_blocks_generic(state, data, nblocks); + } +} + +#define sha1_mod_init_arch sha1_mod_init_arch +static inline void sha1_mod_init_arch(void) +{ + if (cpu_have_named_feature(SHA1)) + static_branch_enable(&have_ce); +} |