From e5a9e9239b0cf1001d9b91497b4ff3ab4a190b1f Mon Sep 17 00:00:00 2001 From: Mounir IDRASSI Date: Tue, 4 Oct 2016 13:21:48 +0200 Subject: Crypto: Use SIMD optimized Serpent implementation from Botan. 2.5x speed gain factor. Update credits and copyrights notice. --- src/Crypto/Crypto.vcxproj | 6 ++++-- src/Crypto/Crypto.vcxproj.filters | 18 ++++++++++++------ src/Crypto/Sources | 3 ++- src/Crypto/cpu.h | 38 ++++++++++++++++++++++++++++++++++++++ src/Crypto/misc.h | 8 ++++++++ 5 files changed, 64 insertions(+), 9 deletions(-) (limited to 'src/Crypto') diff --git a/src/Crypto/Crypto.vcxproj b/src/Crypto/Crypto.vcxproj index 3db1e789..7573f1ec 100644 --- a/src/Crypto/Crypto.vcxproj +++ b/src/Crypto/Crypto.vcxproj @@ -221,7 +221,8 @@ - + + @@ -239,7 +240,8 @@ - + + diff --git a/src/Crypto/Crypto.vcxproj.filters b/src/Crypto/Crypto.vcxproj.filters index 91e83c60..ad933b0a 100644 --- a/src/Crypto/Crypto.vcxproj.filters +++ b/src/Crypto/Crypto.vcxproj.filters @@ -30,9 +30,6 @@ Source Files - - Source Files - Source Files @@ -51,6 +48,12 @@ Source Files + + Source Files + + + Source Files + @@ -80,9 +83,6 @@ Header Files - - Header Files - Header Files @@ -101,6 +101,12 @@ Header Files + + Header Files + + + Header Files + diff --git a/src/Crypto/Sources b/src/Crypto/Sources index c7601a02..07e66b71 100644 --- a/src/Crypto/Sources +++ b/src/Crypto/Sources @@ -16,7 +16,8 @@ SOURCES = \ Aestab.c \ cpu.c \ Rmd160.c \ - Serpent.c \ + SerpentFast.c \ + SerpentFast_simd.cpp \ Sha2.c \ Twofish.c \ GostCipher.c \ diff --git a/src/Crypto/cpu.h b/src/Crypto/cpu.h index 2d26e927..1057a373 100644 --- a/src/Crypto/cpu.h +++ b/src/Crypto/cpu.h @@ -69,8 +69,29 @@ extern void _mm_store_si128(__m128i *_P, __m128i _B); extern __m64 _m_pxor(__m64 _MM1, __m64 _MM2); extern __m128i _mm_set_epi64(__m64 _Q1, __m64 _Q0); extern __m128i _mm_setr_epi32(int _I0, int _I1, int _I2, int _I3); +extern __m128i _mm_loadu_si128(__m128i const*_P); +extern __m128i _mm_set_epi32(int _I3, int _I2, int _I1, int _I0); +extern __m128i _mm_set1_epi32(int _I); +extern void _mm_storeu_si128(__m128i *_P, __m128i _B); +extern __m128i _mm_or_si128(__m128i _A, __m128i _B); +extern __m128i _mm_slli_epi32(__m128i _A, int _Count); +extern __m128i _mm_srli_epi32(__m128i _A, int _Count); +extern __m128i _mm_add_epi32(__m128i _A, __m128i _B); +extern __m128i _mm_sub_epi32(__m128i _A, __m128i _B); +extern __m128i _mm_or_si128(__m128i _A, __m128i _B); +extern __m128i _mm_and_si128(__m128i _A, __m128i _B); +extern __m128i _mm_andnot_si128(__m128i _A, __m128i _B); +extern __m128i _mm_shufflehi_epi16(__m128i _A, int _Imm); +extern __m128i _mm_shufflelo_epi16(__m128i _A, int _Imm); +extern __m128i _mm_unpacklo_epi32(__m128i _A, __m128i _B); +extern __m128i _mm_unpackhi_epi32(__m128i _A, __m128i _B); +extern __m128i _mm_unpackhi_epi64(__m128i _A, __m128i _B); +extern __m128i _mm_srli_epi16(__m128i _A, int _Count); +extern __m128i _mm_slli_epi16(__m128i _A, int _Count); #define _mm_xor_si64 _m_pxor #define _mm_empty _m_empty +#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) (((fp3) << 6) | ((fp2) << 4) | \ + ((fp1) << 2) | ((fp0))) #if defined(__cplusplus) } #endif @@ -396,4 +417,21 @@ extern int g_hasMMX; AS2( add outputPtr, increment*16) +#if defined(TC_WINDOWS_DRIVER) || defined (_UEFI) +#ifdef __cplusplus +extern "C" { +#endif +extern unsigned __int64 __cdecl _rotl64(unsigned __int64,int); +extern unsigned __int64 __cdecl _rotr64(unsigned __int64,int); +extern unsigned int __cdecl _rotl(unsigned int,int); +extern unsigned int __cdecl _rotr(unsigned int,int); +extern unsigned char _rotr8(unsigned char value, unsigned char shift); +extern unsigned short _rotr16(unsigned short value, unsigned char shift); +extern unsigned char _rotl8(unsigned char value, unsigned char shift); +extern unsigned short _rotl16(unsigned short value, unsigned char shift); +#ifdef __cplusplus +} +#endif +#endif + #endif diff --git a/src/Crypto/misc.h b/src/Crypto/misc.h index 31fa1187..47d0288a 100644 --- a/src/Crypto/misc.h +++ b/src/Crypto/misc.h @@ -12,6 +12,10 @@ #include "Tcdefs.h" #endif // !defined(_UEFI) +#ifdef __cplusplus +extern "C" { +#endif + #if defined(_MSC_VER) && !defined(_UEFI) #if _MSC_VER >= 1400 #if !defined(TC_WINDOWS_DRIVER) && !defined(_UEFI) @@ -175,4 +179,8 @@ VC_INLINE void CorrectEndianess(uint64 *out, const uint64 *in, size_t byteCount) #define IsAligned16(p) IsAlignedOn(p, GetAlignmentOf(uint64)) +#ifdef __cplusplus +} +#endif + #endif -- cgit v1.2.3