From 0c2c0a846d42b05b44d62a3c3fcaf410660f8a4d Mon Sep 17 00:00:00 2001 From: Mounir IDRASSI Date: Wed, 30 Dec 2015 06:57:34 +0100 Subject: Cryptography: Optimize Whirlpool implementation by using public domain assembly code developed by Wei Dai --- src/Crypto/cpu.c | 198 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 src/Crypto/cpu.c (limited to 'src/Crypto/cpu.c') diff --git a/src/Crypto/cpu.c b/src/Crypto/cpu.c new file mode 100644 index 00000000..6896240c --- /dev/null +++ b/src/Crypto/cpu.c @@ -0,0 +1,198 @@ +/* cpu.c - written and placed in the public domain by Wei Dai */ + +#include "cpu.h" +#include "misc.h" + +#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY +#include +#include +#endif + +#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE +#include +#endif + +#ifdef CRYPTOPP_CPUID_AVAILABLE + +#if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64 + +int CpuId(uint32 input, uint32 *output) +{ + __cpuid((int *)output, input); + return 1; +} + +#else + +#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY + +#if defined(__cplusplus) +extern "C" { +#endif + +typedef void (*SigHandler)(int); + +static jmp_buf s_jmpNoCPUID; +static void SigIllHandlerCPUID(int p) +{ + longjmp(s_jmpNoCPUID, 1); +} + +#if CRYPTOPP_BOOL_X64 == 0 +static jmp_buf s_jmpNoSSE2; +static void SigIllHandlerSSE2(int p) +{ + longjmp(s_jmpNoSSE2, 1); +} +#endif +#if defined(__cplusplus) +} +#endif +#endif + +int CpuId(uint32 input, uint32 *output) +{ +#ifdef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY + __try + { + __asm + { + mov eax, input + cpuid + mov edi, output + mov [edi], eax + mov [edi+4], ebx + mov [edi+8], ecx + mov [edi+12], edx + } + } + __except (1) + { + return 0; + } + return 1; +#else + SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID); + if (oldHandler == SIG_ERR) + return 0; + + int result = 1; + if (setjmp(s_jmpNoCPUID)) + result = 0; + else + { + asm + ( + // save ebx in case -fPIC is being used +#if CRYPTOPP_BOOL_X86 + "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx" +#else + "pushq %%rbx; cpuid; mov %%ebx, %%edi; popq %%rbx" +#endif + : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3]) + : "a" (input) + ); + } + + signal(SIGILL, oldHandler); + return result; +#endif +} + +#endif + +static int TrySSE2() +{ +#if CRYPTOPP_BOOL_X64 + return 1; +#elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + __try + { +#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE + AS2(por xmm0, xmm0) // executing SSE2 instruction +#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE + __m128i x = _mm_setzero_si128(); + return _mm_cvtsi128_si32(x) == 0; +#endif + } + __except (1) + { + return 0; + } + return 1; +#else + SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2); + if (oldHandler == SIG_ERR) + return 0; + + int result = 1; + if (setjmp(s_jmpNoSSE2)) + result = 0; + else + { +#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE + __asm __volatile ("por %xmm0, %xmm0"); +#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE + __m128i x = _mm_setzero_si128(); + result = _mm_cvtsi128_si32(x) == 0; +#endif + } + + signal(SIGILL, oldHandler); + return result; +#endif +} + +int g_x86DetectionDone = 0; +int g_hasISSE = 0, g_hasSSE2 = 0, g_hasSSSE3 = 0, g_hasMMX = 0, g_hasAESNI = 0, g_hasCLMUL = 0, g_isP4 = 0; +uint32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; + +void DetectX86Features() +{ + uint32 cpuid[4], cpuid1[4], tmp; + if (!CpuId(0, cpuid)) + return; + if (!CpuId(1, cpuid1)) + return; + + g_hasMMX = (cpuid1[3] & (1 << 23)) != 0; + if ((cpuid1[3] & (1 << 26)) != 0) + g_hasSSE2 = TrySSE2(); + g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9)); + g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25)); + g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1)); + + if ((cpuid1[3] & (1 << 25)) != 0) + g_hasISSE = 1; + else + { + uint32 cpuid2[4]; + CpuId(0x080000000, cpuid2); + if (cpuid2[0] >= 0x080000001) + { + CpuId(0x080000001, cpuid2); + g_hasISSE = (cpuid2[3] & (1 << 22)) != 0; + } + } + + tmp = cpuid[2]; + cpuid[2] = cpuid[3]; + cpuid[3] = tmp; + if (memcmp(cpuid+1, "GenuineIntel", 12) == 0) + { + g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf; + g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1); + } + else if (memcmp(cpuid+1, "AuthenticAMD", 12) == 0) + { + CpuId(0x80000005, cpuid); + g_cacheLineSize = GETBYTE(cpuid[2], 0); + } + + if (!g_cacheLineSize) + g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; + + g_x86DetectionDone = 1; +} + +#endif -- cgit v1.2.3