From 66be895158886a6cd816aa1eaa18965a5c522d8f Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 4 Aug 2011 20:19:25 +0200 Subject: crypto: sha1 - SSSE3 based SHA1 implementation for x86-64 This is an assembler implementation of the SHA1 algorithm using the Supplemental SSE3 (SSSE3) instructions or, when available, the Advanced Vector Extensions (AVX). Testing with the tcrypt module shows the raw hash performance is up to 2.3 times faster than the C implementation, using 8k data blocks on a Core 2 Duo T5500. For the smalest data set (16 byte) it is still 25% faster. Since this implementation uses SSE/YMM registers it cannot safely be used in every situation, e.g. while an IRQ interrupts a kernel thread. The implementation falls back to the generic SHA1 variant, if using the SSE/YMM registers is not possible. With this algorithm I was able to increase the throughput of a single IPsec link from 344 Mbit/s to 464 Mbit/s on a Core 2 Quad CPU using the SSSE3 variant -- a speedup of +34.8%. Saving and restoring SSE/YMM state might make the actual throughput fluctuate when there are FPU intensive userland applications running. For example, meassuring the performance using iperf2 directly on the machine under test gives wobbling numbers because iperf2 uses the FPU for each packet to check if the reporting interval has expired (in the above test I got min/max/avg: 402/484/464 MBit/s). Using this algorithm on a IPsec gateway gives much more reasonable and stable numbers, albeit not as high as in the directly connected case. Here is the result from an RFC 2544 test run with a EXFO Packet Blazer FTB-8510: frame size sha1-generic sha1-ssse3 delta 64 byte 37.5 MBit/s 37.5 MBit/s 0.0% 128 byte 56.3 MBit/s 62.5 MBit/s +11.0% 256 byte 87.5 MBit/s 100.0 MBit/s +14.3% 512 byte 131.3 MBit/s 150.0 MBit/s +14.2% 1024 byte 162.5 MBit/s 193.8 MBit/s +19.3% 1280 byte 175.0 MBit/s 212.5 MBit/s +21.4% 1420 byte 175.0 MBit/s 218.7 MBit/s +25.0% 1518 byte 150.0 MBit/s 181.2 MBit/s +20.8% The throughput for the largest frame size is lower than for the previous size because the IP packets need to be fragmented in this case to make there way through the IPsec tunnel. Signed-off-by: Mathias Krause Cc: Maxim Locktyukhin Signed-off-by: Herbert Xu --- crypto/Kconfig | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'crypto/Kconfig') diff --git a/crypto/Kconfig b/crypto/Kconfig index ae27b7534ea..55c50cd3469 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -407,6 +407,16 @@ config CRYPTO_SHA1 help SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). +config CRYPTO_SHA1_SSSE3 + tristate "SHA1 digest algorithm (SSSE3/AVX)" + depends on X86 && 64BIT + select CRYPTO_SHA1 + select CRYPTO_HASH + help + SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using Supplemental SSE3 (SSSE3) instructions or Advanced Vector + Extensions (AVX), when available. + config CRYPTO_SHA256 tristate "SHA224 and SHA256 digest algorithm" select CRYPTO_HASH -- cgit v1.2.3 From 52ba867c8c23dcb24865f80a95c191501e101b9f Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Fri, 2 Sep 2011 01:45:07 +0300 Subject: crypto: blowfish - split generic and common c code Patch splits up the blowfish crypto routine into a common part (key setup) which will be used by blowfish crypto modules (x86_64 assembly and generic-c). Also fixes errors/warnings reported by checkpatch. Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu --- crypto/Kconfig | 10 ++ crypto/Makefile | 1 + crypto/blowfish.c | 367 ++---------------------------------------- crypto/blowfish_common.c | 402 ++++++++++++++++++++++++++++++++++++++++++++++ include/crypto/blowfish.h | 23 +++ 5 files changed, 448 insertions(+), 355 deletions(-) create mode 100644 crypto/blowfish_common.c create mode 100644 include/crypto/blowfish.h (limited to 'crypto/Kconfig') diff --git a/crypto/Kconfig b/crypto/Kconfig index 55c50cd3469..108cb98e217 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -600,6 +600,7 @@ config CRYPTO_ARC4 config CRYPTO_BLOWFISH tristate "Blowfish cipher algorithm" select CRYPTO_ALGAPI + select CRYPTO_BLOWFISH_COMMON help Blowfish cipher algorithm, by Bruce Schneier. @@ -610,6 +611,15 @@ config CRYPTO_BLOWFISH See also: +config CRYPTO_BLOWFISH_COMMON + tristate + help + Common parts of the Blowfish cipher algorithm shared by the + generic c and the assembler implementations. + + See also: + + config CRYPTO_CAMELLIA tristate "Camellia cipher algorithms" depends on CRYPTO diff --git a/crypto/Makefile b/crypto/Makefile index ce5a813d363..495b79172ee 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o obj-$(CONFIG_CRYPTO_DES) += des_generic.o obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish.o +obj-$(CONFIG_CRYPTO_BLOWFISH_COMMON) += blowfish_common.o obj-$(CONFIG_CRYPTO_TWOFISH) += twofish_generic.o obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o obj-$(CONFIG_CRYPTO_SERPENT) += serpent.o diff --git a/crypto/blowfish.c b/crypto/blowfish.c index a67d52ee058..0f86d31fbbd 100644 --- a/crypto/blowfish.c +++ b/crypto/blowfish.c @@ -22,282 +22,7 @@ #include #include #include - -#define BF_BLOCK_SIZE 8 -#define BF_MIN_KEY_SIZE 4 -#define BF_MAX_KEY_SIZE 56 - -struct bf_ctx { - u32 p[18]; - u32 s[1024]; -}; - -static const u32 bf_pbox[16 + 2] = { - 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344, - 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89, - 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c, - 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917, - 0x9216d5d9, 0x8979fb1b, -}; - -static const u32 bf_sbox[256 * 4] = { - 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7, - 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99, - 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16, - 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e, - 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee, - 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013, - 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef, - 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e, - 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60, - 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440, - 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce, - 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a, - 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e, - 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677, - 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193, - 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032, - 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88, - 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239, - 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e, - 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0, - 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3, - 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98, - 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88, - 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe, - 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6, - 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d, - 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b, - 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7, - 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba, - 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463, - 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f, - 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09, - 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3, - 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb, - 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279, - 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8, - 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab, - 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82, - 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db, - 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573, - 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0, - 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b, - 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790, - 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8, - 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4, - 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0, - 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7, - 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c, - 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad, - 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1, - 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299, - 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9, - 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477, - 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf, - 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49, - 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af, - 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa, - 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5, - 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41, - 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915, - 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400, - 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915, - 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664, - 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a, - 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623, - 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266, - 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1, - 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e, - 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6, - 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1, - 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e, - 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1, - 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737, - 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8, - 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff, - 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd, - 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701, - 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7, - 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41, - 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331, - 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf, - 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af, - 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e, - 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87, - 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c, - 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2, - 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16, - 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd, - 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b, - 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509, - 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e, - 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3, - 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f, - 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a, - 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4, - 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960, - 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66, - 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28, - 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802, - 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84, - 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510, - 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf, - 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14, - 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e, - 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50, - 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7, - 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8, - 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281, - 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99, - 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696, - 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128, - 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73, - 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0, - 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0, - 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105, - 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250, - 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3, - 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285, - 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00, - 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061, - 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb, - 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e, - 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735, - 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc, - 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9, - 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340, - 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20, - 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7, - 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934, - 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068, - 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af, - 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840, - 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45, - 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504, - 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a, - 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb, - 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee, - 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6, - 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42, - 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b, - 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2, - 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb, - 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527, - 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b, - 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33, - 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c, - 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3, - 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc, - 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17, - 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564, - 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b, - 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115, - 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922, - 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728, - 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0, - 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e, - 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37, - 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d, - 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804, - 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b, - 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3, - 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb, - 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d, - 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c, - 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350, - 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9, - 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a, - 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe, - 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d, - 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc, - 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f, - 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61, - 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2, - 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9, - 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2, - 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c, - 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e, - 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633, - 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10, - 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169, - 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52, - 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027, - 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5, - 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62, - 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634, - 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76, - 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24, - 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc, - 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4, - 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c, - 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837, - 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0, - 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b, - 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe, - 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b, - 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4, - 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8, - 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6, - 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304, - 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22, - 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4, - 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6, - 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9, - 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59, - 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593, - 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51, - 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28, - 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c, - 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b, - 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28, - 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c, - 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd, - 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a, - 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319, - 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb, - 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f, - 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991, - 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32, - 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680, - 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166, - 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae, - 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb, - 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5, - 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47, - 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370, - 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d, - 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84, - 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048, - 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8, - 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd, - 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9, - 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7, - 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38, - 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f, - 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c, - 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525, - 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1, - 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442, - 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964, - 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e, - 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8, - 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d, - 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f, - 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299, - 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02, - 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc, - 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614, - 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a, - 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6, - 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b, - 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0, - 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060, - 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e, - 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9, - 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f, - 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6, -}; +#include /* * Round loop unrolling macros, S is a pointer to a S-Box array @@ -313,16 +38,15 @@ static const u32 bf_sbox[256 * 4] = { #define ROUND(a, b, n) b ^= P[n]; a ^= bf_F (b) -/* - * The blowfish encipher, processes 64-bit blocks. - * NOTE: This function MUSTN'T respect endianess - */ -static void encrypt_block(struct bf_ctx *bctx, u32 *dst, u32 *src) +static void bf_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { - const u32 *P = bctx->p; - const u32 *S = bctx->s; - u32 yl = src[0]; - u32 yr = src[1]; + struct bf_ctx *ctx = crypto_tfm_ctx(tfm); + const __be32 *in_blk = (const __be32 *)src; + __be32 *const out_blk = (__be32 *)dst; + const u32 *P = ctx->p; + const u32 *S = ctx->s; + u32 yl = be32_to_cpu(in_blk[0]); + u32 yr = be32_to_cpu(in_blk[1]); ROUND(yr, yl, 0); ROUND(yl, yr, 1); @@ -344,21 +68,8 @@ static void encrypt_block(struct bf_ctx *bctx, u32 *dst, u32 *src) yl ^= P[16]; yr ^= P[17]; - dst[0] = yr; - dst[1] = yl; -} - -static void bf_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - const __be32 *in_blk = (const __be32 *)src; - __be32 *const out_blk = (__be32 *)dst; - u32 in32[2], out32[2]; - - in32[0] = be32_to_cpu(in_blk[0]); - in32[1] = be32_to_cpu(in_blk[1]); - encrypt_block(crypto_tfm_ctx(tfm), out32, in32); - out_blk[0] = cpu_to_be32(out32[0]); - out_blk[1] = cpu_to_be32(out32[1]); + out_blk[0] = cpu_to_be32(yr); + out_blk[1] = cpu_to_be32(yl); } static void bf_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) @@ -395,60 +106,6 @@ static void bf_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) out_blk[1] = cpu_to_be32(yl); } -/* - * Calculates the blowfish S and P boxes for encryption and decryption. - */ -static int bf_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) -{ - struct bf_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *P = ctx->p; - u32 *S = ctx->s; - short i, j, count; - u32 data[2], temp; - - /* Copy the initialization s-boxes */ - for (i = 0, count = 0; i < 256; i++) - for (j = 0; j < 4; j++, count++) - S[count] = bf_sbox[count]; - - /* Set the p-boxes */ - for (i = 0; i < 16 + 2; i++) - P[i] = bf_pbox[i]; - - /* Actual subkey generation */ - for (j = 0, i = 0; i < 16 + 2; i++) { - temp = (((u32)key[j] << 24) | - ((u32)key[(j + 1) % keylen] << 16) | - ((u32)key[(j + 2) % keylen] << 8) | - ((u32)key[(j + 3) % keylen])); - - P[i] = P[i] ^ temp; - j = (j + 4) % keylen; - } - - data[0] = 0x00000000; - data[1] = 0x00000000; - - for (i = 0; i < 16 + 2; i += 2) { - encrypt_block((struct bf_ctx *)ctx, data, data); - - P[i] = data[0]; - P[i + 1] = data[1]; - } - - for (i = 0; i < 4; i++) { - for (j = 0, count = i * 256; j < 256; j += 2, count += 2) { - encrypt_block((struct bf_ctx *)ctx, data, data); - - S[count] = data[0]; - S[count + 1] = data[1]; - } - } - - /* Bruce says not to bother with the weak key check. */ - return 0; -} - static struct crypto_alg alg = { .cra_name = "blowfish", .cra_flags = CRYPTO_ALG_TYPE_CIPHER, @@ -460,7 +117,7 @@ static struct crypto_alg alg = { .cra_u = { .cipher = { .cia_min_keysize = BF_MIN_KEY_SIZE, .cia_max_keysize = BF_MAX_KEY_SIZE, - .cia_setkey = bf_setkey, + .cia_setkey = blowfish_setkey, .cia_encrypt = bf_encrypt, .cia_decrypt = bf_decrypt } } }; diff --git a/crypto/blowfish_common.c b/crypto/blowfish_common.c new file mode 100644 index 00000000000..f636aab0209 --- /dev/null +++ b/crypto/blowfish_common.c @@ -0,0 +1,402 @@ +/* + * Cryptographic API. + * + * Common Blowfish algorithm parts shared between the c and assembler + * implementations. + * + * Blowfish Cipher Algorithm, by Bruce Schneier. + * http://www.counterpane.com/blowfish.html + * + * Adapted from Kerneli implementation. + * + * Copyright (c) Herbert Valerio Riedel + * Copyright (c) Kyle McMartin + * Copyright (c) 2002 James Morris + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include + +static const u32 bf_pbox[16 + 2] = { + 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344, + 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89, + 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c, + 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917, + 0x9216d5d9, 0x8979fb1b, +}; + +static const u32 bf_sbox[256 * 4] = { + 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7, + 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99, + 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16, + 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e, + 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee, + 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013, + 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef, + 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e, + 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60, + 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440, + 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce, + 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a, + 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e, + 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677, + 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193, + 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032, + 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88, + 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239, + 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e, + 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0, + 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3, + 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98, + 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88, + 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe, + 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6, + 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d, + 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b, + 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7, + 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba, + 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463, + 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f, + 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09, + 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3, + 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb, + 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279, + 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8, + 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab, + 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82, + 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db, + 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573, + 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0, + 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b, + 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790, + 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8, + 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4, + 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0, + 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7, + 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c, + 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad, + 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1, + 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299, + 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9, + 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477, + 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf, + 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49, + 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af, + 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa, + 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5, + 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41, + 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915, + 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400, + 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915, + 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664, + 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a, + 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623, + 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266, + 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1, + 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e, + 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6, + 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1, + 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e, + 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1, + 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737, + 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8, + 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff, + 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd, + 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701, + 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7, + 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41, + 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331, + 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf, + 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af, + 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e, + 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87, + 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c, + 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2, + 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16, + 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd, + 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b, + 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509, + 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e, + 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3, + 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f, + 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a, + 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4, + 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960, + 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66, + 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28, + 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802, + 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84, + 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510, + 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf, + 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14, + 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e, + 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50, + 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7, + 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8, + 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281, + 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99, + 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696, + 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128, + 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73, + 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0, + 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0, + 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105, + 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250, + 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3, + 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285, + 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00, + 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061, + 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb, + 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e, + 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735, + 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc, + 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9, + 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340, + 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20, + 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7, + 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934, + 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068, + 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af, + 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840, + 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45, + 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504, + 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a, + 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb, + 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee, + 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6, + 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42, + 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b, + 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2, + 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb, + 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527, + 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b, + 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33, + 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c, + 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3, + 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc, + 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17, + 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564, + 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b, + 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115, + 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922, + 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728, + 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0, + 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e, + 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37, + 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d, + 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804, + 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b, + 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3, + 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb, + 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d, + 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c, + 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350, + 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9, + 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a, + 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe, + 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d, + 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc, + 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f, + 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61, + 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2, + 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9, + 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2, + 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c, + 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e, + 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633, + 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10, + 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169, + 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52, + 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027, + 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5, + 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62, + 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634, + 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76, + 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24, + 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc, + 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4, + 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c, + 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837, + 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0, + 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b, + 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe, + 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b, + 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4, + 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8, + 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6, + 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304, + 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22, + 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4, + 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6, + 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9, + 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59, + 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593, + 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51, + 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28, + 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c, + 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b, + 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28, + 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c, + 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd, + 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a, + 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319, + 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb, + 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f, + 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991, + 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32, + 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680, + 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166, + 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae, + 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb, + 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5, + 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47, + 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370, + 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d, + 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84, + 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048, + 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8, + 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd, + 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9, + 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7, + 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38, + 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f, + 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c, + 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525, + 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1, + 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442, + 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964, + 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e, + 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8, + 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d, + 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f, + 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299, + 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02, + 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc, + 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614, + 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a, + 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6, + 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b, + 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0, + 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060, + 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e, + 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9, + 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f, + 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6, +}; + +/* + * Round loop unrolling macros, S is a pointer to a S-Box array + * organized in 4 unsigned longs at a row. + */ +#define GET32_3(x) (((x) & 0xff)) +#define GET32_2(x) (((x) >> (8)) & (0xff)) +#define GET32_1(x) (((x) >> (16)) & (0xff)) +#define GET32_0(x) (((x) >> (24)) & (0xff)) + +#define bf_F(x) (((S[GET32_0(x)] + S[256 + GET32_1(x)]) ^ \ + S[512 + GET32_2(x)]) + S[768 + GET32_3(x)]) + +#define ROUND(a, b, n) ({ b ^= P[n]; a ^= bf_F(b); }) + +/* + * The blowfish encipher, processes 64-bit blocks. + * NOTE: This function MUSTN'T respect endianess + */ +static void encrypt_block(struct bf_ctx *bctx, u32 *dst, u32 *src) +{ + const u32 *P = bctx->p; + const u32 *S = bctx->s; + u32 yl = src[0]; + u32 yr = src[1]; + + ROUND(yr, yl, 0); + ROUND(yl, yr, 1); + ROUND(yr, yl, 2); + ROUND(yl, yr, 3); + ROUND(yr, yl, 4); + ROUND(yl, yr, 5); + ROUND(yr, yl, 6); + ROUND(yl, yr, 7); + ROUND(yr, yl, 8); + ROUND(yl, yr, 9); + ROUND(yr, yl, 10); + ROUND(yl, yr, 11); + ROUND(yr, yl, 12); + ROUND(yl, yr, 13); + ROUND(yr, yl, 14); + ROUND(yl, yr, 15); + + yl ^= P[16]; + yr ^= P[17]; + + dst[0] = yr; + dst[1] = yl; +} + +/* + * Calculates the blowfish S and P boxes for encryption and decryption. + */ +int blowfish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) +{ + struct bf_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *P = ctx->p; + u32 *S = ctx->s; + short i, j, count; + u32 data[2], temp; + + /* Copy the initialization s-boxes */ + for (i = 0, count = 0; i < 256; i++) + for (j = 0; j < 4; j++, count++) + S[count] = bf_sbox[count]; + + /* Set the p-boxes */ + for (i = 0; i < 16 + 2; i++) + P[i] = bf_pbox[i]; + + /* Actual subkey generation */ + for (j = 0, i = 0; i < 16 + 2; i++) { + temp = (((u32)key[j] << 24) | + ((u32)key[(j + 1) % keylen] << 16) | + ((u32)key[(j + 2) % keylen] << 8) | + ((u32)key[(j + 3) % keylen])); + + P[i] = P[i] ^ temp; + j = (j + 4) % keylen; + } + + data[0] = 0x00000000; + data[1] = 0x00000000; + + for (i = 0; i < 16 + 2; i += 2) { + encrypt_block((struct bf_ctx *)ctx, data, data); + + P[i] = data[0]; + P[i + 1] = data[1]; + } + + for (i = 0; i < 4; i++) { + for (j = 0, count = i * 256; j < 256; j += 2, count += 2) { + encrypt_block((struct bf_ctx *)ctx, data, data); + + S[count] = data[0]; + S[count + 1] = data[1]; + } + } + + /* Bruce says not to bother with the weak key check. */ + return 0; +} +EXPORT_SYMBOL_GPL(blowfish_setkey); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Blowfish Cipher common functions"); diff --git a/include/crypto/blowfish.h b/include/crypto/blowfish.h new file mode 100644 index 00000000000..1450d4a2798 --- /dev/null +++ b/include/crypto/blowfish.h @@ -0,0 +1,23 @@ +/* + * Common values for blowfish algorithms + */ + +#ifndef _CRYPTO_BLOWFISH_H +#define _CRYPTO_BLOWFISH_H + +#include +#include + +#define BF_BLOCK_SIZE 8 +#define BF_MIN_KEY_SIZE 4 +#define BF_MAX_KEY_SIZE 56 + +struct bf_ctx { + u32 p[18]; + u32 s[1024]; +}; + +int blowfish_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int key_len); + +#endif -- cgit v1.2.3 From 64b94ceae8c16cd1b2800cac83112d3815be5250 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Fri, 2 Sep 2011 01:45:22 +0300 Subject: crypto: blowfish - add x86_64 assembly implementation Patch adds x86_64 assembly implementation of blowfish. Two set of assembler functions are provided. First set is regular 'one-block at time' encrypt/decrypt functions. Second is 'four-block at time' functions that gain performance increase on out-of-order CPUs. Performance of 4-way functions should be equal to 1-way functions with in-order CPUs. Summary of the tcrypt benchmarks: Blowfish assembler vs blowfish C (256bit 8kb block ECB) encrypt: 2.2x speed decrypt: 2.3x speed Blowfish assembler vs blowfish C (256bit 8kb block CBC) encrypt: 1.12x speed decrypt: 2.5x speed Blowfish assembler vs blowfish C (256bit 8kb block CTR) encrypt: 2.5x speed Full output: http://koti.mbnet.fi/axh/kernel/crypto/tcrypt-speed-blowfish-asm-x86_64.txt http://koti.mbnet.fi/axh/kernel/crypto/tcrypt-speed-blowfish-c-x86_64.txt Tests were run on: vendor_id : AuthenticAMD cpu family : 16 model : 10 model name : AMD Phenom(tm) II X6 1055T Processor stepping : 0 Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 2 + arch/x86/crypto/blowfish-x86_64-asm_64.S | 392 +++++++++++++++++++++++++ arch/x86/crypto/blowfish_glue.c | 487 +++++++++++++++++++++++++++++++ crypto/Kconfig | 15 + 4 files changed, 896 insertions(+) create mode 100644 arch/x86/crypto/blowfish-x86_64-asm_64.S create mode 100644 arch/x86/crypto/blowfish_glue.c (limited to 'crypto/Kconfig') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 57c7f7b4436..725addfacf0 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o +obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o @@ -20,6 +21,7 @@ twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o +blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S new file mode 100644 index 00000000000..44eb23ab967 --- /dev/null +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S @@ -0,0 +1,392 @@ +/* + * Blowfish Cipher Algorithm (x86_64) + * + * Copyright (C) 2011 Jussi Kivilinna + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "blowfish-x86_64-asm.S" +.text + +/* structure of crypto context */ +#define p 0 +#define s0 ((16 + 2) * 4) +#define s1 ((16 + 2 + (1 * 256)) * 4) +#define s2 ((16 + 2 + (2 * 256)) * 4) +#define s3 ((16 + 2 + (3 * 256)) * 4) + +/* register macros */ +#define CTX %rdi +#define RIO %rsi + +#define RX0 %rax +#define RX1 %rbx +#define RX2 %rcx +#define RX3 %rdx + +#define RX0d %eax +#define RX1d %ebx +#define RX2d %ecx +#define RX3d %edx + +#define RX0bl %al +#define RX1bl %bl +#define RX2bl %cl +#define RX3bl %dl + +#define RX0bh %ah +#define RX1bh %bh +#define RX2bh %ch +#define RX3bh %dh + +#define RT0 %rbp +#define RT1 %rsi + +#define RT0d %ebp +#define RT1d %esi + +#define RK0 %r8 +#define RK1 %r9 +#define RK2 %r10 +#define RK3 %r11 + +#define RK0d %r8d +#define RK1d %r9d +#define RK2d %r10d +#define RK3d %r11d + +#define RKEY %r12 + +/*********************************************************************** + * 1-way blowfish + ***********************************************************************/ +#define F(x, k) \ + rorq $16, x; \ + movzbl x ## bh, RT0d; \ + movzbl x ## bl, RT1d; \ + rolq $16, x; \ + movl s0(CTX,RT0,4), k ## d; \ + addl s1(CTX,RT1,4), k ## d; \ + movzbl x ## bh, RT0d; \ + movzbl x ## bl, RT1d; \ + rolq $32, x; \ + xorl s2(CTX,RT0,4), k ## d; \ + addl s3(CTX,RT1,4), k ## d; \ + xorq k, x; + +#define add_roundkey_enc(n) \ + xorq p+4*(n)(CTX), RX0; + +#define round_enc(n) \ + add_roundkey_enc(n); \ + \ + F(RX0, RK0); \ + F(RX0, RK0); + +#define round_final_enc(n) \ + xorq p+4*(n)(CTX), RX0; + +#define add_roundkey_dec(n) \ + movq p+4*(n-1)(CTX), RT0; \ + rorq $32, RT0; \ + xorq RT0, RX0; + +#define round_dec(n) \ + add_roundkey_dec(n); \ + \ + F(RX0, RK0); \ + F(RX0, RK0); \ + +#define read_block() \ + movq (RIO), RX0; \ + rorq $32, RX0; \ + bswapq RX0; + +#define write_block() \ + bswapq RX0; \ + movq RX0, (RIO); + +#define xor_block() \ + bswapq RX0; \ + xorq RX0, (RIO); + +.align 8 +.global __blowfish_enc_blk +.type __blowfish_enc_blk,@function; + +__blowfish_enc_blk: + // input: + // %rdi: ctx, CTX + // %rsi: dst + // %rdx: src + // %rcx: bool xor + pushq %rbp; + pushq %rbx; + + pushq %rsi; + pushq %rcx; + movq %rdx, RIO; + + read_block(); + + round_enc(0); + round_enc(2); + round_enc(4); + round_enc(6); + round_enc(8); + round_enc(10); + round_enc(12); + round_enc(14); + add_roundkey_enc(16); + + popq %rbp; + popq RIO; + + test %bpl, %bpl; + jnz __enc_xor; + + write_block(); + +__enc_ret: + popq %rbx; + popq %rbp; + + ret; + +__enc_xor: + xor_block(); + + jmp __enc_ret; + +.align 8 +.global blowfish_dec_blk +.type blowfish_dec_blk,@function; + +blowfish_dec_blk: + // input: + // %rdi: ctx, CTX + // %rsi: dst + // %rdx: src + pushq %rbp; + pushq %rbx; + + pushq %rsi; + movq %rdx, RIO; + + read_block(); + + round_dec(17); + round_dec(15); + round_dec(13); + round_dec(11); + round_dec(9); + round_dec(7); + round_dec(5); + round_dec(3); + add_roundkey_dec(1); + + popq RIO; + write_block(); + + popq %rbx; + popq %rbp; + + ret; + +/********************************************************************** + 4-way blowfish, four blocks parallel + **********************************************************************/ +#define add_preloaded_roundkey4() \ + xorq RKEY, RX0; \ + xorq RKEY, RX1; \ + xorq RKEY, RX2; \ + xorq RKEY, RX3; + +#define preload_roundkey_enc(n) \ + movq p+4*(n)(CTX), RKEY; + +#define add_roundkey_enc4(n) \ + add_preloaded_roundkey4(); \ + preload_roundkey_enc(n + 2); + +#define round_enc4(n) \ + add_roundkey_enc4(n); \ + \ + F(RX0, RK0); \ + F(RX1, RK1); \ + F(RX2, RK2); \ + F(RX3, RK3); \ + \ + F(RX0, RK0); \ + F(RX1, RK1); \ + F(RX2, RK2); \ + F(RX3, RK3); + +#define preload_roundkey_dec(n) \ + movq p+4*((n)-1)(CTX), RKEY; \ + rorq $32, RKEY; + +#define add_roundkey_dec4(n) \ + add_preloaded_roundkey4(); \ + preload_roundkey_dec(n - 2); + +#define round_dec4(n) \ + add_roundkey_dec4(n); \ + \ + F(RX0, RK0); \ + F(RX1, RK1); \ + F(RX2, RK2); \ + F(RX3, RK3); \ + \ + F(RX0, RK0); \ + F(RX1, RK1); \ + F(RX2, RK2); \ + F(RX3, RK3); + +#define read_block4() \ + movq (RIO), RX0; \ + rorq $32, RX0; \ + bswapq RX0; \ + \ + movq 8(RIO), RX1; \ + rorq $32, RX1; \ + bswapq RX1; \ + \ + movq 16(RIO), RX2; \ + rorq $32, RX2; \ + bswapq RX2; \ + \ + movq 24(RIO), RX3; \ + rorq $32, RX3; \ + bswapq RX3; + +#define write_block4() \ + bswapq RX0; \ + movq RX0, (RIO); \ + \ + bswapq RX1; \ + movq RX1, 8(RIO); \ + \ + bswapq RX2; \ + movq RX2, 16(RIO); \ + \ + bswapq RX3; \ + movq RX3, 24(RIO); + +#define xor_block4() \ + bswapq RX0; \ + xorq RX0, (RIO); \ + \ + bswapq RX1; \ + xorq RX1, 8(RIO); \ + \ + bswapq RX2; \ + xorq RX2, 16(RIO); \ + \ + bswapq RX3; \ + xorq RX3, 24(RIO); + +.align 8 +.global __blowfish_enc_blk_4way +.type __blowfish_enc_blk_4way,@function; + +__blowfish_enc_blk_4way: + // input: + // %rdi: ctx, CTX + // %rsi: dst + // %rdx: src + // %rcx: bool xor + pushq %rbp; + pushq %rbx; + pushq RKEY; + preload_roundkey_enc(0); + + pushq %rsi; + pushq %rcx; + movq %rdx, RIO; + + read_block4(); + + round_enc4(0); + round_enc4(2); + round_enc4(4); + round_enc4(6); + round_enc4(8); + round_enc4(10); + round_enc4(12); + round_enc4(14); + add_preloaded_roundkey4(); + + popq %rbp; + popq RIO; + + test %bpl, %bpl; + jnz __enc_xor4; + + write_block4(); + +__enc_ret4: + popq RKEY; + popq %rbx; + popq %rbp; + + ret; + +__enc_xor4: + xor_block4(); + + jmp __enc_ret4; + +.align 8 +.global blowfish_dec_blk_4way +.type blowfish_dec_blk_4way,@function; + +blowfish_dec_blk_4way: + // input: + // %rdi: ctx, CTX + // %rsi: dst + // %rdx: src + pushq %rbp; + pushq %rbx; + pushq RKEY; + preload_roundkey_dec(17); + + pushq %rsi; + movq %rdx, RIO; + + read_block4(); + + round_dec4(17); + round_dec4(15); + round_dec4(13); + round_dec4(11); + round_dec4(9); + round_dec4(7); + round_dec4(5); + round_dec4(3); + add_preloaded_roundkey4(); + + popq RIO; + write_block4(); + + popq RKEY; + popq %rbx; + popq %rbp; + + ret; + diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c new file mode 100644 index 00000000000..40911abb7b1 --- /dev/null +++ b/arch/x86/crypto/blowfish_glue.c @@ -0,0 +1,487 @@ +/* + * Glue Code for assembler optimized version of Blowfish + * + * Copyright (c) 2011 Jussi Kivilinna + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include +#include +#include +#include +#include +#include + +/* regular block cipher functions */ +asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, + bool xor); +asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); + +/* 4-way parallel cipher functions */ +asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) +{ + __blowfish_enc_blk(ctx, dst, src, false); +} + +static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst, + const u8 *src) +{ + __blowfish_enc_blk(ctx, dst, src, true); +} + +static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src) +{ + __blowfish_enc_blk_4way(ctx, dst, src, false); +} + +static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src) +{ + __blowfish_enc_blk_4way(ctx, dst, src, true); +} + +static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static struct crypto_alg bf_alg = { + .cra_name = "blowfish", + .cra_driver_name = "blowfish-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = BF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct bf_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(bf_alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = BF_MIN_KEY_SIZE, + .cia_max_keysize = BF_MAX_KEY_SIZE, + .cia_setkey = blowfish_setkey, + .cia_encrypt = blowfish_encrypt, + .cia_decrypt = blowfish_decrypt, + } + } +}; + +static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, + void (*fn)(struct bf_ctx *, u8 *, const u8 *), + void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *)) +{ + struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = BF_BLOCK_SIZE; + unsigned int nbytes; + int err; + + err = blkcipher_walk_virt(desc, walk); + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + /* Process four block batch */ + if (nbytes >= bsize * 4) { + do { + fn_4way(ctx, wdst, wsrc); + + wsrc += bsize * 4; + wdst += bsize * 4; + nbytes -= bsize * 4; + } while (nbytes >= bsize * 4); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + fn(ctx, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = blkcipher_walk_done(desc, walk, nbytes); + } + + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way); +} + +static struct crypto_alg blk_ecb_alg = { + .cra_name = "ecb(blowfish)", + .cra_driver_name = "ecb-blowfish-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = BF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct bf_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .setkey = blowfish_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}; + +static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = BF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 *iv = (u64 *)walk->iv; + + do { + *dst = *src ^ *iv; + blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + *(u64 *)walk->iv = *iv; + return nbytes; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_encrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = BF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 ivs[4 - 1]; + u64 last_iv; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process four block batch */ + if (nbytes >= bsize * 4) { + do { + nbytes -= bsize * 4 - bsize; + src -= 4 - 1; + dst -= 4 - 1; + + ivs[0] = src[0]; + ivs[1] = src[1]; + ivs[2] = src[2]; + + blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src); + + dst[1] ^= ivs[0]; + dst[2] ^= ivs[1]; + dst[3] ^= ivs[2]; + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * 4); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + for (;;) { + blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } + +done: + *dst ^= *(u64 *)walk->iv; + *(u64 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_decrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static struct crypto_alg blk_cbc_alg = { + .cra_name = "cbc(blowfish)", + .cra_driver_name = "cbc-blowfish-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = BF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct bf_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .ivsize = BF_BLOCK_SIZE, + .setkey = blowfish_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}; + +static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk) +{ + u8 *ctrblk = walk->iv; + u8 keystream[BF_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + blowfish_enc_blk(ctx, keystream, ctrblk); + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + + crypto_inc(ctrblk, BF_BLOCK_SIZE); +} + +static unsigned int __ctr_crypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = BF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); + __be64 ctrblocks[4]; + + /* Process four block batch */ + if (nbytes >= bsize * 4) { + do { + if (dst != src) { + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + } + + /* create ctrblks for parallel encrypt */ + ctrblocks[0] = cpu_to_be64(ctrblk++); + ctrblocks[1] = cpu_to_be64(ctrblk++); + ctrblocks[2] = cpu_to_be64(ctrblk++); + ctrblocks[3] = cpu_to_be64(ctrblk++); + + blowfish_enc_blk_xor_4way(ctx, (u8 *)dst, + (u8 *)ctrblocks); + + src += 4; + dst += 4; + } while ((nbytes -= bsize * 4) >= bsize * 4); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (dst != src) + *dst = *src; + + ctrblocks[0] = cpu_to_be64(ctrblk++); + + blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks); + + src += 1; + dst += 1; + } while ((nbytes -= bsize) >= bsize); + +done: + *(__be64 *)walk->iv = cpu_to_be64(ctrblk); + return nbytes; +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE); + + while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) { + nbytes = __ctr_crypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + if (walk.nbytes) { + ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + +static struct crypto_alg blk_ctr_alg = { + .cra_name = "ctr(blowfish)", + .cra_driver_name = "ctr-blowfish-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = BF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct bf_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .ivsize = BF_BLOCK_SIZE, + .setkey = blowfish_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}; + +static int __init init(void) +{ + int err; + + err = crypto_register_alg(&bf_alg); + if (err) + goto bf_err; + err = crypto_register_alg(&blk_ecb_alg); + if (err) + goto ecb_err; + err = crypto_register_alg(&blk_cbc_alg); + if (err) + goto cbc_err; + err = crypto_register_alg(&blk_ctr_alg); + if (err) + goto ctr_err; + + return 0; + +ctr_err: + crypto_unregister_alg(&blk_cbc_alg); +cbc_err: + crypto_unregister_alg(&blk_ecb_alg); +ecb_err: + crypto_unregister_alg(&bf_alg); +bf_err: + return err; +} + +static void __exit fini(void) +{ + crypto_unregister_alg(&blk_ctr_alg); + crypto_unregister_alg(&blk_cbc_alg); + crypto_unregister_alg(&blk_ecb_alg); + crypto_unregister_alg(&bf_alg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized"); +MODULE_ALIAS("blowfish"); +MODULE_ALIAS("blowfish-asm"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 108cb98e217..07637745e23 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -620,6 +620,21 @@ config CRYPTO_BLOWFISH_COMMON See also: +config CRYPTO_BLOWFISH_X86_64 + tristate "Blowfish cipher algorithm (x86_64)" + depends on (X86 || UML_X86) && 64BIT + select CRYPTO_ALGAPI + select CRYPTO_BLOWFISH_COMMON + help + Blowfish cipher algorithm (x86_64), by Bruce Schneier. + + This is a variable key length cipher which can use keys from 32 + bits to 448 bits in length. It's fast, simple and specifically + designed for use on "large microprocessors". + + See also: + + config CRYPTO_CAMELLIA tristate "Camellia cipher algorithms" depends on CRYPTO -- cgit v1.2.3 From 8280daad436edb7dd9e7e06fc13bcecb6b2a885c Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Mon, 26 Sep 2011 16:47:25 +0300 Subject: crypto: twofish - add 3-way parallel x86_64 assembler implemention Patch adds 3-way parallel x86_64 assembly implementation of twofish as new module. New assembler functions crypt data in three blocks chunks, improving cipher performance on out-of-order CPUs. Patch has been tested with tcrypt and automated filesystem tests. Summary of the tcrypt benchmarks: Twofish 3-way-asm vs twofish asm (128bit 8kb block ECB) encrypt: 1.3x speed decrypt: 1.3x speed Twofish 3-way-asm vs twofish asm (128bit 8kb block CBC) encrypt: 1.07x speed decrypt: 1.4x speed Twofish 3-way-asm vs twofish asm (128bit 8kb block CTR) encrypt: 1.4x speed Twofish 3-way-asm vs AES asm (128bit 8kb block ECB) encrypt: 1.0x speed decrypt: 1.0x speed Twofish 3-way-asm vs AES asm (128bit 8kb block CBC) encrypt: 0.84x speed decrypt: 1.09x speed Twofish 3-way-asm vs AES asm (128bit 8kb block CTR) encrypt: 1.15x speed Full output: http://koti.mbnet.fi/axh/kernel/crypto/tcrypt-speed-twofish-3way-asm-x86_64.txt http://koti.mbnet.fi/axh/kernel/crypto/tcrypt-speed-twofish-asm-x86_64.txt http://koti.mbnet.fi/axh/kernel/crypto/tcrypt-speed-aes-asm-x86_64.txt Tests were run on: vendor_id : AuthenticAMD cpu family : 16 model : 10 model name : AMD Phenom(tm) II X6 1055T Processor Also userspace test were run on: vendor_id : GenuineIntel cpu family : 6 model : 15 model name : Intel(R) Xeon(R) CPU E7330 @ 2.40GHz stepping : 11 Userspace test results: Encryption/decryption of twofish 3-way vs x86_64-asm on AMD Phenom II: encrypt: 1.27x decrypt: 1.25x Encryption/decryption of twofish 3-way vs x86_64-asm on Intel Xeon E7330: encrypt: 1.36x decrypt: 1.36x Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 2 + arch/x86/crypto/twofish-x86_64-asm_64-3way.S | 316 ++++++++++++++++++ arch/x86/crypto/twofish_glue_3way.c | 472 +++++++++++++++++++++++++++ crypto/Kconfig | 20 ++ 4 files changed, 810 insertions(+) create mode 100644 arch/x86/crypto/twofish-x86_64-asm_64-3way.S create mode 100644 arch/x86/crypto/twofish_glue_3way.c (limited to 'crypto/Kconfig') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 725addfacf0..3537d4b91f7 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o +obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o @@ -23,6 +24,7 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o +twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S new file mode 100644 index 00000000000..5b012a2c511 --- /dev/null +++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S @@ -0,0 +1,316 @@ +/* + * Twofish Cipher 3-way parallel algorithm (x86_64) + * + * Copyright (C) 2011 Jussi Kivilinna + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "twofish-x86_64-asm-3way.S" +.text + +/* structure of crypto context */ +#define s0 0 +#define s1 1024 +#define s2 2048 +#define s3 3072 +#define w 4096 +#define k 4128 + +/********************************************************************** + 3-way twofish + **********************************************************************/ +#define CTX %rdi +#define RIO %rdx + +#define RAB0 %rax +#define RAB1 %rbx +#define RAB2 %rcx + +#define RAB0d %eax +#define RAB1d %ebx +#define RAB2d %ecx + +#define RAB0bh %ah +#define RAB1bh %bh +#define RAB2bh %ch + +#define RAB0bl %al +#define RAB1bl %bl +#define RAB2bl %cl + +#define RCD0 %r8 +#define RCD1 %r9 +#define RCD2 %r10 + +#define RCD0d %r8d +#define RCD1d %r9d +#define RCD2d %r10d + +#define RX0 %rbp +#define RX1 %r11 +#define RX2 %r12 + +#define RX0d %ebp +#define RX1d %r11d +#define RX2d %r12d + +#define RY0 %r13 +#define RY1 %r14 +#define RY2 %r15 + +#define RY0d %r13d +#define RY1d %r14d +#define RY2d %r15d + +#define RT0 %rdx +#define RT1 %rsi + +#define RT0d %edx +#define RT1d %esi + +#define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \ + movzbl ab ## bl, tmp2 ## d; \ + movzbl ab ## bh, tmp1 ## d; \ + rorq $(rot), ab; \ + op1##l T0(CTX, tmp2, 4), dst ## d; \ + op2##l T1(CTX, tmp1, 4), dst ## d; + +/* + * Combined G1 & G2 function. Reordered with help of rotates to have moves + * at begining. + */ +#define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \ + /* G1,1 && G2,1 */ \ + do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \ + do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \ + \ + do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \ + do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \ + \ + do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \ + do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \ + \ + /* G1,2 && G2,2 */ \ + do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \ + do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \ + xchgq cd ## 0, ab ## 0; \ + \ + do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \ + do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \ + xchgq cd ## 1, ab ## 1; \ + \ + do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \ + do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \ + xchgq cd ## 2, ab ## 2; + +#define enc_round_end(ab, x, y, n) \ + addl y ## d, x ## d; \ + addl x ## d, y ## d; \ + addl k+4*(2*(n))(CTX), x ## d; \ + xorl ab ## d, x ## d; \ + addl k+4*(2*(n)+1)(CTX), y ## d; \ + shrq $32, ab; \ + roll $1, ab ## d; \ + xorl y ## d, ab ## d; \ + shlq $32, ab; \ + rorl $1, x ## d; \ + orq x, ab; + +#define dec_round_end(ba, x, y, n) \ + addl y ## d, x ## d; \ + addl x ## d, y ## d; \ + addl k+4*(2*(n))(CTX), x ## d; \ + addl k+4*(2*(n)+1)(CTX), y ## d; \ + xorl ba ## d, y ## d; \ + shrq $32, ba; \ + roll $1, ba ## d; \ + xorl x ## d, ba ## d; \ + shlq $32, ba; \ + rorl $1, y ## d; \ + orq y, ba; + +#define encrypt_round3(ab, cd, n) \ + g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \ + \ + enc_round_end(ab ## 0, RX0, RY0, n); \ + enc_round_end(ab ## 1, RX1, RY1, n); \ + enc_round_end(ab ## 2, RX2, RY2, n); + +#define decrypt_round3(ba, dc, n) \ + g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \ + \ + dec_round_end(ba ## 0, RX0, RY0, n); \ + dec_round_end(ba ## 1, RX1, RY1, n); \ + dec_round_end(ba ## 2, RX2, RY2, n); + +#define encrypt_cycle3(ab, cd, n) \ + encrypt_round3(ab, cd, n*2); \ + encrypt_round3(ab, cd, (n*2)+1); + +#define decrypt_cycle3(ba, dc, n) \ + decrypt_round3(ba, dc, (n*2)+1); \ + decrypt_round3(ba, dc, (n*2)); + +#define inpack3(in, n, xy, m) \ + movq 4*(n)(in), xy ## 0; \ + xorq w+4*m(CTX), xy ## 0; \ + \ + movq 4*(4+(n))(in), xy ## 1; \ + xorq w+4*m(CTX), xy ## 1; \ + \ + movq 4*(8+(n))(in), xy ## 2; \ + xorq w+4*m(CTX), xy ## 2; + +#define outunpack3(op, out, n, xy, m) \ + xorq w+4*m(CTX), xy ## 0; \ + op ## q xy ## 0, 4*(n)(out); \ + \ + xorq w+4*m(CTX), xy ## 1; \ + op ## q xy ## 1, 4*(4+(n))(out); \ + \ + xorq w+4*m(CTX), xy ## 2; \ + op ## q xy ## 2, 4*(8+(n))(out); + +#define inpack_enc3() \ + inpack3(RIO, 0, RAB, 0); \ + inpack3(RIO, 2, RCD, 2); + +#define outunpack_enc3(op) \ + outunpack3(op, RIO, 2, RAB, 6); \ + outunpack3(op, RIO, 0, RCD, 4); + +#define inpack_dec3() \ + inpack3(RIO, 0, RAB, 4); \ + rorq $32, RAB0; \ + rorq $32, RAB1; \ + rorq $32, RAB2; \ + inpack3(RIO, 2, RCD, 6); \ + rorq $32, RCD0; \ + rorq $32, RCD1; \ + rorq $32, RCD2; + +#define outunpack_dec3() \ + rorq $32, RCD0; \ + rorq $32, RCD1; \ + rorq $32, RCD2; \ + outunpack3(mov, RIO, 0, RCD, 0); \ + rorq $32, RAB0; \ + rorq $32, RAB1; \ + rorq $32, RAB2; \ + outunpack3(mov, RIO, 2, RAB, 2); + +.align 8 +.global __twofish_enc_blk_3way +.type __twofish_enc_blk_3way,@function; + +__twofish_enc_blk_3way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src, RIO + * %rcx: bool, if true: xor output + */ + pushq %r15; + pushq %r14; + pushq %r13; + pushq %r12; + pushq %rbp; + pushq %rbx; + + pushq %rcx; /* bool xor */ + pushq %rsi; /* dst */ + + inpack_enc3(); + + encrypt_cycle3(RAB, RCD, 0); + encrypt_cycle3(RAB, RCD, 1); + encrypt_cycle3(RAB, RCD, 2); + encrypt_cycle3(RAB, RCD, 3); + encrypt_cycle3(RAB, RCD, 4); + encrypt_cycle3(RAB, RCD, 5); + encrypt_cycle3(RAB, RCD, 6); + encrypt_cycle3(RAB, RCD, 7); + + popq RIO; /* dst */ + popq %rbp; /* bool xor */ + + testb %bpl, %bpl; + jnz __enc_xor3; + + outunpack_enc3(mov); + + popq %rbx; + popq %rbp; + popq %r12; + popq %r13; + popq %r14; + popq %r15; + ret; + +__enc_xor3: + outunpack_enc3(xor); + + popq %rbx; + popq %rbp; + popq %r12; + popq %r13; + popq %r14; + popq %r15; + ret; + +.global twofish_dec_blk_3way +.type twofish_dec_blk_3way,@function; + +twofish_dec_blk_3way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src, RIO + */ + pushq %r15; + pushq %r14; + pushq %r13; + pushq %r12; + pushq %rbp; + pushq %rbx; + + pushq %rsi; /* dst */ + + inpack_dec3(); + + decrypt_cycle3(RAB, RCD, 7); + decrypt_cycle3(RAB, RCD, 6); + decrypt_cycle3(RAB, RCD, 5); + decrypt_cycle3(RAB, RCD, 4); + decrypt_cycle3(RAB, RCD, 3); + decrypt_cycle3(RAB, RCD, 2); + decrypt_cycle3(RAB, RCD, 1); + decrypt_cycle3(RAB, RCD, 0); + + popq RIO; /* dst */ + + outunpack_dec3(); + + popq %rbx; + popq %rbp; + popq %r12; + popq %r13; + popq %r14; + popq %r15; + ret; + diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c new file mode 100644 index 00000000000..0cbf9faea86 --- /dev/null +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -0,0 +1,472 @@ +/* + * Glue Code for 3-way parallel assembler optimized version of Twofish + * + * Copyright (c) 2011 Jussi Kivilinna + * + * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: + * Copyright (c) 2006 Herbert Xu + * CTR part based on code (crypto/ctr.c) by: + * (C) Copyright IBM Corp. 2007 - Joy Latten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include +#include +#include +#include +#include +#include +#include + +/* regular block cipher functions from twofish_x86_64 module */ +asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); +asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); + +/* 3-way parallel cipher functions */ +asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src) +{ + __twofish_enc_blk_3way(ctx, dst, src, false); +} + +static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src) +{ + __twofish_enc_blk_3way(ctx, dst, src, true); +} + +static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, + void (*fn)(struct twofish_ctx *, u8 *, const u8 *), + void (*fn_3way)(struct twofish_ctx *, u8 *, const u8 *)) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = TF_BLOCK_SIZE; + unsigned int nbytes; + int err; + + err = blkcipher_walk_virt(desc, walk); + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + /* Process three block batch */ + if (nbytes >= bsize * 3) { + do { + fn_3way(ctx, wdst, wsrc); + + wsrc += bsize * 3; + wdst += bsize * 3; + nbytes -= bsize * 3; + } while (nbytes >= bsize * 3); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + fn(ctx, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = blkcipher_walk_done(desc, walk, nbytes); + } + + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, twofish_enc_blk, twofish_enc_blk_3way); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way); +} + +static struct crypto_alg blk_ecb_alg = { + .cra_name = "ecb(twofish)", + .cra_driver_name = "ecb-twofish-3way", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .setkey = twofish_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}; + +static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = TF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 *iv = (u128 *)walk->iv; + + do { + u128_xor(dst, src, iv); + twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); + return nbytes; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_encrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = TF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ivs[3 - 1]; + u128 last_iv; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process three block batch */ + if (nbytes >= bsize * 3) { + do { + nbytes -= bsize * (3 - 1); + src -= 3 - 1; + dst -= 3 - 1; + + ivs[0] = src[0]; + ivs[1] = src[1]; + + twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); + + u128_xor(dst + 1, dst + 1, ivs + 0); + u128_xor(dst + 2, dst + 2, ivs + 1); + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * 3); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + for (;;) { + twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } + +done: + u128_xor(dst, dst, (u128 *)walk->iv); + *(u128 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_decrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static struct crypto_alg blk_cbc_alg = { + .cra_name = "cbc(twofish)", + .cra_driver_name = "cbc-twofish-3way", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = twofish_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}; + +static inline void u128_to_be128(be128 *dst, const u128 *src) +{ + dst->a = cpu_to_be64(src->a); + dst->b = cpu_to_be64(src->b); +} + +static inline void be128_to_u128(u128 *dst, const be128 *src) +{ + dst->a = be64_to_cpu(src->a); + dst->b = be64_to_cpu(src->b); +} + +static inline void u128_inc(u128 *i) +{ + i->b++; + if (!i->b) + i->a++; +} + +static void ctr_crypt_final(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + u8 *ctrblk = walk->iv; + u8 keystream[TF_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + twofish_enc_blk(ctx, keystream, ctrblk); + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + + crypto_inc(ctrblk, TF_BLOCK_SIZE); +} + +static unsigned int __ctr_crypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = TF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ctrblk; + be128 ctrblocks[3]; + + be128_to_u128(&ctrblk, (be128 *)walk->iv); + + /* Process three block batch */ + if (nbytes >= bsize * 3) { + do { + if (dst != src) { + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + } + + /* create ctrblks for parallel encrypt */ + u128_to_be128(&ctrblocks[0], &ctrblk); + u128_inc(&ctrblk); + u128_to_be128(&ctrblocks[1], &ctrblk); + u128_inc(&ctrblk); + u128_to_be128(&ctrblocks[2], &ctrblk); + u128_inc(&ctrblk); + + twofish_enc_blk_xor_3way(ctx, (u8 *)dst, + (u8 *)ctrblocks); + + src += 3; + dst += 3; + nbytes -= bsize * 3; + } while (nbytes >= bsize * 3); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (dst != src) + *dst = *src; + + u128_to_be128(&ctrblocks[0], &ctrblk); + u128_inc(&ctrblk); + + twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); + u128_xor(dst, dst, (u128 *)ctrblocks); + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + u128_to_be128((be128 *)walk->iv, &ctrblk); + return nbytes; +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE); + + while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) { + nbytes = __ctr_crypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + if (walk.nbytes) { + ctr_crypt_final(desc, &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + +static struct crypto_alg blk_ctr_alg = { + .cra_name = "ctr(twofish)", + .cra_driver_name = "ctr-twofish-3way", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = twofish_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}; + +int __init init(void) +{ + int err; + + err = crypto_register_alg(&blk_ecb_alg); + if (err) + goto ecb_err; + err = crypto_register_alg(&blk_cbc_alg); + if (err) + goto cbc_err; + err = crypto_register_alg(&blk_ctr_alg); + if (err) + goto ctr_err; + + return 0; + +ctr_err: + crypto_unregister_alg(&blk_cbc_alg); +cbc_err: + crypto_unregister_alg(&blk_ecb_alg); +ecb_err: + return err; +} + +void __exit fini(void) +{ + crypto_unregister_alg(&blk_ctr_alg); + crypto_unregister_alg(&blk_cbc_alg); + crypto_unregister_alg(&blk_ecb_alg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); +MODULE_ALIAS("twofish"); +MODULE_ALIAS("twofish-asm"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 07637745e23..404a846b1e4 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -828,6 +828,26 @@ config CRYPTO_TWOFISH_X86_64 See also: +config CRYPTO_TWOFISH_X86_64_3WAY + tristate "Twofish cipher algorithm (x86_64, 3-way parallel)" + depends on (X86 || UML_X86) && 64BIT + select CRYPTO_ALGAPI + select CRYPTO_TWOFISH_COMMON + select CRYPTO_TWOFISH_X86_64 + help + Twofish cipher algorithm (x86_64, 3-way parallel). + + Twofish was submitted as an AES (Advanced Encryption Standard) + candidate cipher by researchers at CounterPane Systems. It is a + 16 round block cipher supporting key sizes of 128, 192, and 256 + bits. + + This module provides Twofish cipher algorithm that processes three + blocks parallel, utilizing resources of out-of-order CPUs better. + + See also: + + comment "Compression" config CRYPTO_DEFLATE -- cgit v1.2.3 From a38f7907b926e4c6c7d389ad96cc38cec2e5a9e9 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 27 Sep 2011 07:23:50 +0200 Subject: crypto: Add userspace configuration API This patch adds a basic userspace configuration API for the crypto layer. With this it is possible to instantiate, remove and to show crypto algorithms from userspace. Signed-off-by: Steffen Klassert Signed-off-by: Herbert Xu --- crypto/Kconfig | 7 + crypto/Makefile | 1 + crypto/crypto_user.c | 372 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/cryptouser.h | 52 +++++++ include/linux/netlink.h | 1 + 5 files changed, 433 insertions(+) create mode 100644 crypto/crypto_user.c create mode 100644 include/linux/cryptouser.h (limited to 'crypto/Kconfig') diff --git a/crypto/Kconfig b/crypto/Kconfig index 404a846b1e4..a8442dca024 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -100,6 +100,13 @@ config CRYPTO_MANAGER2 select CRYPTO_BLKCIPHER2 select CRYPTO_PCOMP2 +config CRYPTO_USER + tristate "Userspace cryptographic algorithm configuration" + select CRYPTO_MANAGER + help + Userapace configuration for cryptographic instantiations such as + cbc(aes). + config CRYPTO_MANAGER_DISABLE_TESTS bool "Disable run-time self tests" default y diff --git a/crypto/Makefile b/crypto/Makefile index fa8cbbbca67..9e6eee2c05d 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_CRYPTO_PCOMP2) += pcompress.o cryptomgr-y := algboss.o testmgr.o obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o +obj-$(CONFIG_CRYPTO_USER) += crypto_user.o obj-$(CONFIG_CRYPTO_HMAC) += hmac.o obj-$(CONFIG_CRYPTO_VMAC) += vmac.o obj-$(CONFIG_CRYPTO_XCBC) += xcbc.o diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c new file mode 100644 index 00000000000..513cfe7bc3b --- /dev/null +++ b/crypto/crypto_user.c @@ -0,0 +1,372 @@ +/* + * Crypto user configuration API. + * + * Copyright (C) 2011 secunet Security Networks AG + * Copyright (C) 2011 Steffen Klassert + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include +#include +#include +#include "internal.h" + +DEFINE_MUTEX(crypto_cfg_mutex); + +/* The crypto netlink socket */ +static struct sock *crypto_nlsk; + +struct crypto_dump_info { + struct sk_buff *in_skb; + struct sk_buff *out_skb; + u32 nlmsg_seq; + u16 nlmsg_flags; +}; + +static struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact) +{ + int match; + struct crypto_alg *q, *alg = NULL; + + down_read(&crypto_alg_sem); + + if (list_empty(&crypto_alg_list)) + return NULL; + + list_for_each_entry(q, &crypto_alg_list, cra_list) { + + if ((q->cra_flags ^ p->cru_type) & p->cru_mask) + continue; + + if (strlen(p->cru_driver_name)) + match = !strcmp(q->cra_driver_name, + p->cru_driver_name); + else if (!exact) + match = !strcmp(q->cra_name, p->cru_name); + + if (match) { + alg = q; + break; + } + } + + up_read(&crypto_alg_sem); + + return alg; +} + +static int crypto_report_one(struct crypto_alg *alg, + struct crypto_user_alg *ualg, struct sk_buff *skb) +{ + memcpy(&ualg->cru_name, &alg->cra_name, sizeof(ualg->cru_name)); + memcpy(&ualg->cru_driver_name, &alg->cra_driver_name, + sizeof(ualg->cru_driver_name)); + memcpy(&ualg->cru_module_name, module_name(alg->cra_module), + CRYPTO_MAX_ALG_NAME); + + ualg->cru_flags = alg->cra_flags; + ualg->cru_refcnt = atomic_read(&alg->cra_refcnt); + + NLA_PUT_U32(skb, CRYPTOCFGA_PRIORITY_VAL, alg->cra_priority); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int crypto_report_alg(struct crypto_alg *alg, + struct crypto_dump_info *info) +{ + struct sk_buff *in_skb = info->in_skb; + struct sk_buff *skb = info->out_skb; + struct nlmsghdr *nlh; + struct crypto_user_alg *ualg; + int err = 0; + + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, info->nlmsg_seq, + CRYPTO_MSG_GETALG, sizeof(*ualg), info->nlmsg_flags); + if (!nlh) { + err = -EMSGSIZE; + goto out; + } + + ualg = nlmsg_data(nlh); + + err = crypto_report_one(alg, ualg, skb); + if (err) { + nlmsg_cancel(skb, nlh); + goto out; + } + + nlmsg_end(skb, nlh); + +out: + return err; +} + +static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, + struct nlattr **attrs) +{ + struct crypto_user_alg *p = nlmsg_data(in_nlh); + struct crypto_alg *alg; + struct sk_buff *skb; + struct crypto_dump_info info; + int err; + + if (!p->cru_driver_name) + return -EINVAL; + + alg = crypto_alg_match(p, 1); + if (!alg) + return -ENOENT; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + info.in_skb = in_skb; + info.out_skb = skb; + info.nlmsg_seq = in_nlh->nlmsg_seq; + info.nlmsg_flags = 0; + + err = crypto_report_alg(alg, &info); + if (err) + return err; + + return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).pid); +} + +static int crypto_dump_report(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct crypto_alg *alg; + struct crypto_dump_info info; + int err; + + if (cb->args[0]) + goto out; + + cb->args[0] = 1; + + info.in_skb = cb->skb; + info.out_skb = skb; + info.nlmsg_seq = cb->nlh->nlmsg_seq; + info.nlmsg_flags = NLM_F_MULTI; + + list_for_each_entry(alg, &crypto_alg_list, cra_list) { + err = crypto_report_alg(alg, &info); + if (err) + goto out_err; + } + +out: + return skb->len; +out_err: + return err; +} + +static int crypto_dump_report_done(struct netlink_callback *cb) +{ + return 0; +} + +static int crypto_update_alg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + struct crypto_alg *alg; + struct crypto_user_alg *p = nlmsg_data(nlh); + struct nlattr *priority = attrs[CRYPTOCFGA_PRIORITY_VAL]; + LIST_HEAD(list); + + if (priority && !strlen(p->cru_driver_name)) + return -EINVAL; + + alg = crypto_alg_match(p, 1); + if (!alg) + return -ENOENT; + + down_write(&crypto_alg_sem); + + crypto_remove_spawns(alg, &list, NULL); + + if (priority) + alg->cra_priority = nla_get_u32(priority); + + up_write(&crypto_alg_sem); + + crypto_remove_final(&list); + + return 0; +} + +static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + struct crypto_alg *alg; + struct crypto_user_alg *p = nlmsg_data(nlh); + + alg = crypto_alg_match(p, 1); + if (!alg) + return -ENOENT; + + /* We can not unregister core algorithms such as aes-generic. + * We would loose the reference in the crypto_alg_list to this algorithm + * if we try to unregister. Unregistering such an algorithm without + * removing the module is not possible, so we restrict to crypto + * instances that are build from templates. */ + if (!(alg->cra_flags & CRYPTO_ALG_INSTANCE)) + return -EINVAL; + + if (atomic_read(&alg->cra_refcnt) != 1) + return -EBUSY; + + return crypto_unregister_alg(alg); +} + +static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + int exact; + const char *name; + struct crypto_alg *alg; + struct crypto_user_alg *p = nlmsg_data(nlh); + struct nlattr *priority = attrs[CRYPTOCFGA_PRIORITY_VAL]; + + if (strlen(p->cru_driver_name)) + exact = 1; + + if (priority && !exact) + return -EINVAL; + + alg = crypto_alg_match(p, exact); + if (alg) + return -EEXIST; + + if (strlen(p->cru_driver_name)) + name = p->cru_driver_name; + else + name = p->cru_name; + + alg = crypto_alg_mod_lookup(name, p->cru_type, p->cru_mask); + if (IS_ERR(alg)) + return PTR_ERR(alg); + + down_write(&crypto_alg_sem); + + if (priority) + alg->cra_priority = nla_get_u32(priority); + + up_write(&crypto_alg_sem); + + crypto_mod_put(alg); + + return 0; +} + +#define MSGSIZE(type) sizeof(struct type) + +static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = { + [CRYPTO_MSG_NEWALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), + [CRYPTO_MSG_DELALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), + [CRYPTO_MSG_UPDATEALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), + [CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), +}; + +static const struct nla_policy crypto_policy[CRYPTOCFGA_MAX+1] = { + [CRYPTOCFGA_PRIORITY_VAL] = { .type = NLA_U32}, +}; + +#undef MSGSIZE + +static struct crypto_link { + int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); + int (*dump)(struct sk_buff *, struct netlink_callback *); + int (*done)(struct netlink_callback *); +} crypto_dispatch[CRYPTO_NR_MSGTYPES] = { + [CRYPTO_MSG_NEWALG - CRYPTO_MSG_BASE] = { .doit = crypto_add_alg}, + [CRYPTO_MSG_DELALG - CRYPTO_MSG_BASE] = { .doit = crypto_del_alg}, + [CRYPTO_MSG_UPDATEALG - CRYPTO_MSG_BASE] = { .doit = crypto_update_alg}, + [CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE] = { .doit = crypto_report, + .dump = crypto_dump_report, + .done = crypto_dump_report_done}, +}; + +static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct nlattr *attrs[CRYPTOCFGA_MAX+1]; + struct crypto_link *link; + int type, err; + + type = nlh->nlmsg_type; + if (type > CRYPTO_MSG_MAX) + return -EINVAL; + + type -= CRYPTO_MSG_BASE; + link = &crypto_dispatch[type]; + + if (security_netlink_recv(skb, CAP_NET_ADMIN)) + return -EPERM; + + if ((type == (CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE) && + (nlh->nlmsg_flags & NLM_F_DUMP))) { + if (link->dump == NULL) + return -EINVAL; + + return netlink_dump_start(crypto_nlsk, skb, nlh, + link->dump, link->done, 0); + } + + err = nlmsg_parse(nlh, crypto_msg_min[type], attrs, CRYPTOCFGA_MAX, + crypto_policy); + if (err < 0) + return err; + + if (link->doit == NULL) + return -EINVAL; + + return link->doit(skb, nlh, attrs); +} + +static void crypto_netlink_rcv(struct sk_buff *skb) +{ + mutex_lock(&crypto_cfg_mutex); + netlink_rcv_skb(skb, &crypto_user_rcv_msg); + mutex_unlock(&crypto_cfg_mutex); +} + +static int __init crypto_user_init(void) +{ + crypto_nlsk = netlink_kernel_create(&init_net, NETLINK_CRYPTO, + 0, crypto_netlink_rcv, + NULL, THIS_MODULE); + if (!crypto_nlsk) + return -ENOMEM; + + return 0; +} + +static void __exit crypto_user_exit(void) +{ + netlink_kernel_release(crypto_nlsk); +} + +module_init(crypto_user_init); +module_exit(crypto_user_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Steffen Klassert "); +MODULE_DESCRIPTION("Crypto userspace configuration API"); diff --git a/include/linux/cryptouser.h b/include/linux/cryptouser.h new file mode 100644 index 00000000000..b874e3879d8 --- /dev/null +++ b/include/linux/cryptouser.h @@ -0,0 +1,52 @@ +/* + * Crypto user configuration API. + * + * Copyright (C) 2011 secunet Security Networks AG + * Copyright (C) 2011 Steffen Klassert + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* Netlink configuration messages. */ +enum { + CRYPTO_MSG_BASE = 0x10, + CRYPTO_MSG_NEWALG = 0x10, + CRYPTO_MSG_DELALG, + CRYPTO_MSG_UPDATEALG, + CRYPTO_MSG_GETALG, + __CRYPTO_MSG_MAX +}; +#define CRYPTO_MSG_MAX (__CRYPTO_MSG_MAX - 1) +#define CRYPTO_NR_MSGTYPES (CRYPTO_MSG_MAX + 1 - CRYPTO_MSG_BASE) + +#define CRYPTO_MAX_NAME CRYPTO_MAX_ALG_NAME + +/* Netlink message attributes. */ +enum crypto_attr_type_t { + CRYPTOCFGA_UNSPEC, + CRYPTOCFGA_PRIORITY_VAL, /* __u32 */ + __CRYPTOCFGA_MAX + +#define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1) +}; + +struct crypto_user_alg { + char cru_name[CRYPTO_MAX_ALG_NAME]; + char cru_driver_name[CRYPTO_MAX_ALG_NAME]; + char cru_module_name[CRYPTO_MAX_ALG_NAME]; + __u32 cru_type; + __u32 cru_mask; + __u32 cru_refcnt; + __u32 cru_flags; +}; diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 2e17c5dbdcb..464ace04283 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -25,6 +25,7 @@ #define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */ #define NETLINK_ECRYPTFS 19 #define NETLINK_RDMA 20 +#define NETLINK_CRYPTO 21 /* Crypto layer */ #define MAX_LINKS 32 -- cgit v1.2.3 From ea8bdfcff17599e5d80f93e2ae194fbbab7f8d5e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 26 Oct 2011 17:15:10 +0200 Subject: crypto: user - Add dependency on NET Since the configuration interface relies on netlink we need to select NET. Signed-off-by: Herbert Xu --- crypto/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'crypto/Kconfig') diff --git a/crypto/Kconfig b/crypto/Kconfig index a8442dca024..259dea9c6df 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -103,6 +103,7 @@ config CRYPTO_MANAGER2 config CRYPTO_USER tristate "Userspace cryptographic algorithm configuration" select CRYPTO_MANAGER + select NET help Userapace configuration for cryptographic instantiations such as cbc(aes). -- cgit v1.2.3 From 5db017aa2809c49ca0a43b0f3ed1267e6be60883 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 1 Nov 2011 12:12:43 +1100 Subject: crypto: user - Depend on NET instead of selecting it Selecting NET causes all sorts of issues, including a dependency loop involving bluetooth. This patch makes it a dependency instead. Signed-off-by: Herbert Xu --- crypto/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'crypto/Kconfig') diff --git a/crypto/Kconfig b/crypto/Kconfig index 259dea9c6df..527a857d10b 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -102,8 +102,8 @@ config CRYPTO_MANAGER2 config CRYPTO_USER tristate "Userspace cryptographic algorithm configuration" + depends on NET select CRYPTO_MANAGER - select NET help Userapace configuration for cryptographic instantiations such as cbc(aes). -- cgit v1.2.3