diff options
Diffstat (limited to 'src/Crypto')
33 files changed, 1853 insertions, 1440 deletions
diff --git a/src/Crypto/Aes.h b/src/Crypto/Aes.h index e12c6fc8..db1bed27 100644 --- a/src/Crypto/Aes.h +++ b/src/Crypto/Aes.h @@ -35,6 +35,11 @@ #include "Common/Tcdefs.h" +#ifdef WOLFCRYPT_BACKEND + #include <wolfssl/options.h> + #include <wolfssl/wolfcrypt/aes.h> +#endif + #ifndef EXIT_SUCCESS #define EXIT_SUCCESS 0 #define EXIT_FAILURE 1 @@ -93,11 +98,19 @@ typedef union typedef struct { uint_32t ks[KS_LENGTH]; aes_inf inf; +#ifdef WOLFCRYPT_BACKEND + XtsAes wc_enc_xts; + Aes wc_enc_aes; +#endif } aes_encrypt_ctx; typedef struct { uint_32t ks[KS_LENGTH]; aes_inf inf; +#ifdef WOLFCRYPT_BACKEND + XtsAes wc_dec_xts; + Aes wc_dec_aes; +#endif } aes_decrypt_ctx; /* This routine must be called before first use if non-static */ @@ -152,6 +165,13 @@ AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, const aes_de #endif +#ifdef WOLFCRYPT_BACKEND +AES_RETURN xts_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]); +AES_RETURN xts_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]); +AES_RETURN xts_encrypt(const unsigned char *in, unsigned char *out, word64 length, word64 sector, const aes_encrypt_ctx cx[1]); +AES_RETURN xts_decrypt(const unsigned char *in, unsigned char *out, word64 length, word64 sector, const aes_decrypt_ctx cx[1]); +#endif + #if defined(AES_MODES) /* Multiple calls to the following subroutines for multiple block */ diff --git a/src/Crypto/Crypto.vcxproj b/src/Crypto/Crypto.vcxproj index c6e0aac5..97a472f7 100644 --- a/src/Crypto/Crypto.vcxproj +++ b/src/Crypto/Crypto.vcxproj @@ -200,26 +200,19 @@ </Command> <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)\%(Filename).obj;%(Outputs)</Outputs> </CustomBuild> - <CustomBuild Include="Gost89_x64.asm"> - <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild> - <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">echo %(Filename)%(Extension) & nasm.exe -Xvc -f win64 -Ox -o "$(TargetDir)\%(Filename).obj" -l "$(TargetDir)\%(Filename).lst" "%(FullPath)" -</Command> - <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)\%(Filename).obj;%(Outputs)</Outputs> - <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild> - <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">echo %(Filename)%(Extension) & nasm.exe -Xvc -f win64 -Ox -o "$(TargetDir)\%(Filename).obj" -l "$(TargetDir)\%(Filename).lst" "%(FullPath)" -</Command> - <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)\%(Filename).obj;%(Outputs)</Outputs> - </CustomBuild> </ItemGroup> <ItemGroup> <ClCompile Include="Aeskey.c" /> <ClCompile Include="Aestab.c" /> + <ClCompile Include="blake2s.c" /> + <ClCompile Include="blake2s_SSE2.c" /> + <ClCompile Include="blake2s_SSE41.c" /> + <ClCompile Include="blake2s_SSSE3.c" /> <ClCompile Include="Camellia.c" /> <ClCompile Include="chacha-xmm.c" /> <ClCompile Include="chacha256.c" /> <ClCompile Include="chachaRng.c" /> <ClCompile Include="cpu.c" /> - <ClCompile Include="GostCipher.c" /> <ClCompile Include="jitterentropy-base.c"> <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Disabled</Optimization> <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Disabled</Optimization> @@ -227,7 +220,6 @@ <ClCompile Include="kuznyechik.c" /> <ClCompile Include="kuznyechik_simd.c" /> <ClCompile Include="rdrand.c" /> - <ClCompile Include="Rmd160.c" /> <ClCompile Include="SerpentFast.c" /> <ClCompile Include="SerpentFast_simd.cpp" /> <ClCompile Include="Sha2.c" /> @@ -243,6 +235,9 @@ <ClInclude Include="Aes_hw_cpu.h" /> <ClInclude Include="Aesopt.h" /> <ClInclude Include="Aestab.h" /> + <ClInclude Include="blake2s-load-sse2.h" /> + <ClInclude Include="blake2s-load-sse41.h" /> + <ClInclude Include="blake2s-round.h" /> <ClInclude Include="Camellia.h" /> <ClInclude Include="chacha256.h" /> <ClInclude Include="chachaRng.h" /> @@ -250,13 +245,11 @@ <ClInclude Include="chacha_u4.h" /> <ClInclude Include="config.h" /> <ClInclude Include="cpu.h" /> - <ClInclude Include="GostCipher.h" /> <ClInclude Include="jitterentropy-base-user.h" /> <ClInclude Include="jitterentropy.h" /> <ClInclude Include="kuznyechik.h" /> <ClInclude Include="misc.h" /> <ClInclude Include="rdrand.h" /> - <ClInclude Include="Rmd160.h" /> <ClInclude Include="SerpentFast.h" /> <ClInclude Include="SerpentFast_sbox.h" /> <ClInclude Include="Sha2.h" /> diff --git a/src/Crypto/Crypto.vcxproj.filters b/src/Crypto/Crypto.vcxproj.filters index 541a0867..5d149bdd 100644 --- a/src/Crypto/Crypto.vcxproj.filters +++ b/src/Crypto/Crypto.vcxproj.filters @@ -24,9 +24,6 @@ <ClCompile Include="cpu.c"> <Filter>Source Files</Filter> </ClCompile> - <ClCompile Include="Rmd160.c"> - <Filter>Source Files</Filter> - </ClCompile> <ClCompile Include="Sha2.c"> <Filter>Source Files</Filter> </ClCompile> @@ -36,9 +33,6 @@ <ClCompile Include="Whirlpool.c"> <Filter>Source Files</Filter> </ClCompile> - <ClCompile Include="GostCipher.c"> - <Filter>Source Files</Filter> - </ClCompile> <ClCompile Include="kuznyechik.c"> <Filter>Source Files</Filter> </ClCompile> @@ -81,6 +75,18 @@ <ClCompile Include="t1ha2_selfcheck.c"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="blake2s.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="blake2s_SSE2.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="blake2s_SSE41.c"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="blake2s_SSSE3.c"> + <Filter>Source Files</Filter> + </ClCompile> </ItemGroup> <ItemGroup> <ClInclude Include="Aes.h"> @@ -107,9 +113,6 @@ <ClInclude Include="misc.h"> <Filter>Header Files</Filter> </ClInclude> - <ClInclude Include="Rmd160.h"> - <Filter>Header Files</Filter> - </ClInclude> <ClInclude Include="Sha2.h"> <Filter>Header Files</Filter> </ClInclude> @@ -119,9 +122,6 @@ <ClInclude Include="Whirlpool.h"> <Filter>Header Files</Filter> </ClInclude> - <ClInclude Include="GostCipher.h"> - <Filter>Header Files</Filter> - </ClInclude> <ClInclude Include="kuznyechik.h"> <Filter>Header Files</Filter> </ClInclude> @@ -164,6 +164,15 @@ <ClInclude Include="t1ha_selfcheck.h"> <Filter>Header Files</Filter> </ClInclude> + <ClInclude Include="blake2s-load-sse2.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="blake2s-load-sse41.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="blake2s-round.h"> + <Filter>Header Files</Filter> + </ClInclude> </ItemGroup> <ItemGroup> <CustomBuild Include="Aes_hw_cpu.asm"> @@ -175,9 +184,6 @@ <CustomBuild Include="Aes_x86.asm"> <Filter>Source Files</Filter> </CustomBuild> - <CustomBuild Include="Gost89_x64.asm"> - <Filter>Source Files</Filter> - </CustomBuild> <CustomBuild Include="Twofish_x64.S"> <Filter>Source Files</Filter> </CustomBuild> diff --git a/src/Crypto/Crypto_vs2019.vcxproj b/src/Crypto/Crypto_vs2019.vcxproj index 8d9ce46e..ccd512b9 100644 --- a/src/Crypto/Crypto_vs2019.vcxproj +++ b/src/Crypto/Crypto_vs2019.vcxproj @@ -297,24 +297,6 @@ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)\%(Filename).obj;%(Outputs)</Outputs> <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">$(TargetDir)\%(Filename).obj;%(Outputs)</Outputs> </CustomBuild> - <CustomBuild Include="Gost89_x64.asm"> - <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild> - <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">echo %(Filename)%(Extension) & nasm.exe -Xvc -f win64 -Ox -o "$(TargetDir)\%(Filename).obj" -l "$(TargetDir)\%(Filename).lst" "%(FullPath)" -</Command> - <Command Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">echo %(Filename)%(Extension) & nasm.exe -Xvc -f win64 -Ox -o "$(TargetDir)\%(Filename).obj" -l "$(TargetDir)\%(Filename).lst" "%(FullPath)" -</Command> - <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)\%(Filename).obj;%(Outputs)</Outputs> - <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">$(TargetDir)\%(Filename).obj;%(Outputs)</Outputs> - <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild> - <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">echo %(Filename)%(Extension) & nasm.exe -Xvc -f win64 -Ox -o "$(TargetDir)\%(Filename).obj" -l "$(TargetDir)\%(Filename).lst" "%(FullPath)" -</Command> - <Command Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">echo %(Filename)%(Extension) & nasm.exe -Xvc -f win64 -Ox -o "$(TargetDir)\%(Filename).obj" -l "$(TargetDir)\%(Filename).lst" "%(FullPath)" -</Command> - <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)\%(Filename).obj;%(Outputs)</Outputs> - <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">$(TargetDir)\%(Filename).obj;%(Outputs)</Outputs> - <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild> - <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild> - </CustomBuild> </ItemGroup> <ItemGroup> <ClCompile Include="Aescrypt.c"> @@ -325,12 +307,15 @@ </ClCompile> <ClCompile Include="Aeskey.c" /> <ClCompile Include="Aestab.c" /> + <ClCompile Include="blake2s.c" /> + <ClCompile Include="blake2s_SSE2.c" /> + <ClCompile Include="blake2s_SSE41.c" /> + <ClCompile Include="blake2s_SSSE3.c" /> <ClCompile Include="Camellia.c" /> <ClCompile Include="chacha-xmm.c" /> <ClCompile Include="chacha256.c" /> <ClCompile Include="chachaRng.c" /> <ClCompile Include="cpu.c" /> - <ClCompile Include="GostCipher.c" /> <ClCompile Include="jitterentropy-base.c"> <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Disabled</Optimization> <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Disabled</Optimization> @@ -345,7 +330,6 @@ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild> </ClCompile> - <ClCompile Include="Rmd160.c" /> <ClCompile Include="SerpentFast.c" /> <ClCompile Include="SerpentFast_simd.cpp" /> <ClCompile Include="Sha2.c" /> @@ -368,13 +352,11 @@ <ClInclude Include="chacha_u4.h" /> <ClInclude Include="config.h" /> <ClInclude Include="cpu.h" /> - <ClInclude Include="GostCipher.h" /> <ClInclude Include="jitterentropy-base-user.h" /> <ClInclude Include="jitterentropy.h" /> <ClInclude Include="kuznyechik.h" /> <ClInclude Include="misc.h" /> <ClInclude Include="rdrand.h" /> - <ClInclude Include="Rmd160.h" /> <ClInclude Include="SerpentFast.h" /> <ClInclude Include="SerpentFast_sbox.h" /> <ClInclude Include="Sha2.h" /> diff --git a/src/Crypto/GostCipher.c b/src/Crypto/GostCipher.c deleted file mode 100644 index ddd649cd..00000000 --- a/src/Crypto/GostCipher.c +++ /dev/null @@ -1,265 +0,0 @@ -/** @file -GOST89 implementation - -Copyright (c) 2016. Disk Cryptography Services for EFI (DCS), Alex Kolotnikov - -This program and the accompanying materials -are licensed and made available under the terms and conditions -of the Apache License, Version 2.0. -The full text of the license may be found at -https://opensource.org/licenses/Apache-2.0 - -Dynamic SBOX idea is from GostCrypt project. Copyright (c) 2008-2011 TrueCrypt Developers Association -**/ - - - -#include "GostCipher.h" -#include "Streebog.h" -#include "cpu.h" - -#if defined(CIPHER_GOST89) - -// Crypto Pro -byte S_CryptoPro[8][16] = { - {0x1,0x3,0xA,0x9,0x5,0xB,0x4,0xF,0x8,0x6,0x7,0xE,0xD,0x0,0x2,0xC}, - {0xD,0xE,0x4,0x1,0x7,0x0,0x5,0xA,0x3,0xC,0x8,0xF,0x6,0x2,0x9,0xB}, - {0x7,0x6,0x2,0x4,0xD,0x9,0xF,0x0,0xA,0x1,0x5,0xB,0x8,0xE,0xC,0x3}, - {0x7,0x6,0x4,0xB,0x9,0xC,0x2,0xA,0x1,0x8,0x0,0xE,0xF,0xD,0x3,0x5}, - {0x4,0xA,0x7,0xC,0x0,0xF,0x2,0x8,0xE,0x1,0x6,0x5,0xD,0xB,0x9,0x3}, - {0x7,0xF,0xC,0xE,0x9,0x4,0x1,0x0,0x3,0xB,0x5,0x2,0x6,0xA,0x8,0xD}, - {0x5,0xF,0x4,0x0,0x2,0xD,0xB,0x9,0x1,0x7,0x6,0x3,0xC,0xE,0xA,0x8}, - {0xA,0x4,0x5,0x6,0x8,0x1,0x3,0x7,0xD,0xC,0xE,0x0,0x9,0x2,0xB,0xF} - }; - -// TC26 -byte S_TC26[8][16] = -{ - { 0xc, 0x4, 0x6, 0x2, 0xa, 0x5, 0xb, 0x9, 0xe, 0x8, 0xd, 0x7, 0x0, 0x3, 0xf, 0x1 }, - { 0x6, 0x8, 0x2, 0x3, 0x9, 0xa, 0x5, 0xc, 0x1, 0xe, 0x4, 0x7, 0xb, 0xd, 0x0, 0xf }, - { 0xb, 0x3, 0x5, 0x8, 0x2, 0xf, 0xa, 0xd, 0xe, 0x1, 0x7, 0x4, 0xc, 0x9, 0x6, 0x0 }, - { 0xc, 0x8, 0x2, 0x1, 0xd, 0x4, 0xf, 0x6, 0x7, 0x0, 0xa, 0x5, 0x3, 0xe, 0x9, 0xb }, - { 0x7, 0xf, 0x5, 0xa, 0x8, 0x1, 0x6, 0xd, 0x0, 0x9, 0x3, 0xe, 0xb, 0x4, 0x2, 0xc }, - { 0x5, 0xd, 0xf, 0x6, 0x9, 0x2, 0xc, 0xa, 0xb, 0x7, 0x8, 0x1, 0x4, 0x3, 0xe, 0x0 }, - { 0x8, 0xe, 0x2, 0x5, 0x6, 0x9, 0x1, 0xc, 0xf, 0x4, 0xb, 0x0, 0xd, 0xa, 0x3, 0x7 }, - { 0x1, 0x7, 0xe, 0xd, 0x0, 0x5, 0x8, 0x3, 0x4, 0xf, 0xa, 0x6, 0x9, 0xc, 0xb, 0x2 }, -}; - -void gost_prepare_kds(gost_kds* kds) { - uint32 i; - // Build substitution tables. - for (i = 0; i < 256; ++i) { - uint32 p; - p = kds->sbox[7][i >> 4] << 4 | kds->sbox[6][i & 15]; - p = p << 24; p = p << 11 | p >> 21; - kds->sbox_cvt[i] = p; // S87 - - p = kds->sbox[5][i >> 4] << 4 | kds->sbox[4][i & 15]; - p = p << 16; p = p << 11 | p >> 21; - kds->sbox_cvt[256 + i] = p; // S65 - - p = kds->sbox[3][i >> 4] << 4 | kds->sbox[2][i & 15]; - p = p << 8; p = p << 11 | p >> 21; - kds->sbox_cvt[256 * 2 + i] = p; // S43 - - p = kds->sbox[1][i >> 4] << 4 | kds->sbox[0][i & 15]; - p = p << 11 | p >> 21; - kds->sbox_cvt[256 * 3 + i] = p; // S21 - } -} - - -static void xor_s_box(byte s_box[8][16], byte *seed) -{ - int i; - for (i = 0; i < 16; i++) - { - s_box[0][i] ^= (seed[ (i * 4) + 0 ] ) & 0xF; - s_box[1][i] ^= (seed[ (i * 4) + 0 ]>>4) & 0xF; - s_box[2][i] ^= (seed[ (i * 4) + 1 ] ) & 0xF; - s_box[3][i] ^= (seed[ (i * 4) + 1 ]>>4) & 0xF; - s_box[4][i] ^= (seed[ (i * 4) + 2 ] ) & 0xF; - s_box[5][i] ^= (seed[ (i * 4) + 2 ]>>4) & 0xF; - s_box[6][i] ^= (seed[ (i * 4) + 3 ] ) & 0xF; - s_box[7][i] ^= (seed[ (i * 4) + 3 ]>>4) & 0xF; - } -} - -void gost_set_key(const byte *key, gost_kds *ks, int useDynamicSbox) -{ - memcpy(ks->key, key, GOST_KEYSIZE); - memcpy(ks->sbox, S_TC26, sizeof(ks->sbox)); - - if (useDynamicSbox) - { - STREEBOG_CTX sctx; - byte sbox_seed[64]; -#if defined (DEVICE_DRIVER) && !defined (_WIN64) - KFLOATING_SAVE floatingPointState; - NTSTATUS saveStatus = STATUS_INVALID_PARAMETER; - if (HasSSE2() || HasSSE41()) - saveStatus = KeSaveFloatingPointState (&floatingPointState); -#endif - //Generate pseudorandom data based on the key - STREEBOG_init(&sctx); - STREEBOG_add(&sctx, ks->key, 32); - STREEBOG_finalize(&sctx, sbox_seed); - -#if defined (DEVICE_DRIVER) && !defined (_WIN64) - if (NT_SUCCESS (saveStatus)) - KeRestoreFloatingPointState (&floatingPointState); -#endif - - xor_s_box(ks->sbox, sbox_seed); - } - - gost_prepare_kds(ks); -} - -static uint32 f(uint32 v, uint32* sbox){ - byte* x =(byte*) &v; - /* Do substitutions */ - return sbox[x[3]] | sbox[256 + x[2]] | sbox[256*2 + x[1]] | sbox[256*3 + x[0]]; -} - -void gost_encrypt_block(uint64 in_, uint64* out_, gost_kds* kds) { - uint32* in = (uint32*)&in_; - uint32* out = (uint32*)out_; - uint32* key = (uint32*)kds->key; - uint32* sbox = kds->sbox_cvt; - - // As named in the GOST - uint32 n1 = in[0]; - uint32 n2 = in[1]; - - n2 ^= f(n1+key[0], sbox); - n1 ^= f(n2+key[1], sbox); - n2 ^= f(n1+key[2], sbox); - n1 ^= f(n2+key[3], sbox); - n2 ^= f(n1+key[4], sbox); - n1 ^= f(n2+key[5], sbox); - n2 ^= f(n1+key[6], sbox); - n1 ^= f(n2+key[7], sbox); - - n2 ^= f(n1+key[0], sbox); - n1 ^= f(n2+key[1], sbox); - n2 ^= f(n1+key[2], sbox); - n1 ^= f(n2+key[3], sbox); - n2 ^= f(n1+key[4], sbox); - n1 ^= f(n2+key[5], sbox); - n2 ^= f(n1+key[6], sbox); - n1 ^= f(n2+key[7], sbox); - - n2 ^= f(n1+key[0], sbox); - n1 ^= f(n2+key[1], sbox); - n2 ^= f(n1+key[2], sbox); - n1 ^= f(n2+key[3], sbox); - n2 ^= f(n1+key[4], sbox); - n1 ^= f(n2+key[5], sbox); - n2 ^= f(n1+key[6], sbox); - n1 ^= f(n2+key[7], sbox); - - n2 ^= f(n1+key[7], sbox); - n1 ^= f(n2+key[6], sbox); - n2 ^= f(n1+key[5], sbox); - n1 ^= f(n2+key[4], sbox); - n2 ^= f(n1+key[3], sbox); - n1 ^= f(n2+key[2], sbox); - n2 ^= f(n1+key[1], sbox); - n1 ^= f(n2+key[0], sbox); - - // There is no swap after the last round - out[0] = n2; - out[1] = n1; -} - -void gost_decrypt_block(uint64 in_, uint64* out_, gost_kds* kds) { - uint32* in = (uint32*)&in_; - uint32* out = (uint32*)out_; - uint32* key = (uint32*)kds->key; - uint32* sbox = kds->sbox_cvt; - - // As named in the GOST - uint32 n1 = in[0]; - uint32 n2 = in[1]; - - n2 ^= f(n1+key[0], sbox); - n1 ^= f(n2+key[1], sbox); - n2 ^= f(n1+key[2], sbox); - n1 ^= f(n2+key[3], sbox); - n2 ^= f(n1+key[4], sbox); - n1 ^= f(n2+key[5], sbox); - n2 ^= f(n1+key[6], sbox); - n1 ^= f(n2+key[7], sbox); - - n2 ^= f(n1+key[7], sbox); - n1 ^= f(n2+key[6], sbox); - n2 ^= f(n1+key[5], sbox); - n1 ^= f(n2+key[4], sbox); - n2 ^= f(n1+key[3], sbox); - n1 ^= f(n2+key[2], sbox); - n2 ^= f(n1+key[1], sbox); - n1 ^= f(n2+key[0], sbox); - - n2 ^= f(n1+key[7], sbox); - n1 ^= f(n2+key[6], sbox); - n2 ^= f(n1+key[5], sbox); - n1 ^= f(n2+key[4], sbox); - n2 ^= f(n1+key[3], sbox); - n1 ^= f(n2+key[2], sbox); - n2 ^= f(n1+key[1], sbox); - n1 ^= f(n2+key[0], sbox); - - n2 ^= f(n1+key[7], sbox); - n1 ^= f(n2+key[6], sbox); - n2 ^= f(n1+key[5], sbox); - n1 ^= f(n2+key[4], sbox); - n2 ^= f(n1+key[3], sbox); - n1 ^= f(n2+key[2], sbox); - n2 ^= f(n1+key[1], sbox); - n1 ^= f(n2+key[0], sbox); - - out[0] = n2; - out[1] = n1; -} - -#if defined(_M_AMD64) -void gost_encrypt_128_CBC_asm(const byte *in, byte *out, gost_kds *ks, uint64 count); -void gost_decrypt_128_CBC_asm(const byte *in, byte *out, gost_kds *ks, uint64 count); -#endif - -void gost_encrypt(const byte *in, byte *out, gost_kds *ks, int count) { -#if defined(_M_AMD64) - gost_encrypt_128_CBC_asm(in, out, ks, (uint64)count); -#else - while (count > 0) { - // encrypt two blocks in CBC mode - gost_encrypt_block(*((uint64*)in), (uint64*)out, ks); - *((gst_udword*)(out + 8)) = *((gst_udword*)(in + 8)) ^ *((gst_udword*)(out)); - *((gst_udword*)(out + 12)) = *((gst_udword*)(in + 12)) ^ *((gst_udword*)(out + 4)); - gost_encrypt_block(*((uint64*)(out + 8)), (uint64*)(out + 8), ks); - count--; - in += 16; - out += 16; - } -#endif -} - -void gost_decrypt(const byte *in, byte *out, gost_kds *ks, int count) { -#if defined(_M_AMD64) - gost_decrypt_128_CBC_asm(in, out, ks, (uint64)count); -#else - while (count > 0) { - // decrypt two blocks in CBC mode - gost_decrypt_block(*((uint64*)(in + 8)), (uint64*)(out + 8), ks); - *((gst_udword*)(out + 8)) ^= *((gst_udword*)(in));; - *((gst_udword*)(out + 12)) ^= *((gst_udword*)(in + 4));; - gost_decrypt_block(*((uint64*)(in)), (uint64*)(out), ks); - count--; - in += 16; - out += 16; - } -#endif -} - -#endif diff --git a/src/Crypto/GostCipher.h b/src/Crypto/GostCipher.h deleted file mode 100644 index bcb77207..00000000 --- a/src/Crypto/GostCipher.h +++ /dev/null @@ -1,68 +0,0 @@ - -/* - Copyright (c) 2008-2011 TrueCrypt Developers Association. All rights reserved. - - Governed by the TrueCrypt License 3.0 the full text of which is contained in - the file License.txt included in TrueCrypt binary and source code distribution - packages. -*/ - - - -#ifndef GOST_CIPHER_H -#define GOST_CIPHER_H - -#include "Common/Tcdefs.h" -#include "config.h" -#include "misc.h" - -#ifdef __cplusplus -extern "C" { -#endif - -//In unsigned chars -#define GOST_KEYSIZE 32 -#define GOST_BLOCKSIZE 8 -#define GOST_SBOX_SIZE 16 - -//Production setting, but can be turned off to compare the algorithm with other implementations -#define CIPHER_GOST89 -#define GOST_DYNAMIC_SBOXES - -#if defined(CIPHER_GOST89) - -#ifdef GST_WINDOWS_BOOT -typedef int gst_word; -typedef long gst_dword; -typedef unsigned int gst_uword; -typedef unsigned long gst_udword; -#else -typedef short gst_word; -typedef int gst_dword; -typedef unsigned short gst_uword; -typedef unsigned int gst_udword; -#endif - -typedef struct gost_kds -{ - CRYPTOPP_ALIGN_DATA(16) byte key[32]; - gst_udword sbox_cvt[256 * 4]; - byte sbox[8][16]; -} gost_kds; - -#define GOST_KS (sizeof(gost_kds)) - -void gost_encrypt(const byte *in, byte *out, gost_kds *ks, int count); -void gost_decrypt(const byte *in, byte *out, gost_kds *ks, int count); -void gost_set_key(const byte *key, gost_kds *ks, int useDynamicSbox); - -#else -#define GOST_KS (0) -#endif - -#ifdef __cplusplus -} -#endif - - -#endif diff --git a/src/Crypto/Makefile.inc b/src/Crypto/Makefile.inc index c8d2dfdf..e05d02ca 100644 --- a/src/Crypto/Makefile.inc +++ b/src/Crypto/Makefile.inc @@ -16,9 +16,6 @@ VC_MLEXE = ml64.exe TC_ASM_ERR_LOG = ..\Driver\build_errors_asm.log -"$(OBJ_PATH)\$(O)\gost89_$(TC_ARCH).obj": gost89_$(TC_ARCH).asm - nasm.exe $(TC_ASFLAGS) -o "$@" -l "$(OBJ_PATH)\$(O)\gost89_$(TC_ARCH).lst" gost89_$(TC_ARCH).asm 2>$(TC_ASM_ERR_LOG) - "$(OBJ_PATH)\$(O)\Aes_$(TC_ARCH).obj": Aes_$(TC_ARCH).asm nasm.exe $(TC_ASFLAGS) -o "$@" -l "$(OBJ_PATH)\$(O)\Aes_$(TC_ARCH).lst" Aes_$(TC_ARCH).asm 2>$(TC_ASM_ERR_LOG) diff --git a/src/Crypto/Rmd160.c b/src/Crypto/Rmd160.c deleted file mode 100644 index 0c6ec839..00000000 --- a/src/Crypto/Rmd160.c +++ /dev/null @@ -1,500 +0,0 @@ -// RIPEMD-160 written and placed in the public domain by Wei Dai - -/* - * This code implements the MD4 message-digest algorithm. - * The algorithm is due to Ron Rivest. This code was - * written by Colin Plumb in 1993, no copyright is claimed. - * This code is in the public domain; do with it what you wish. - */ - -/* Adapted for TrueCrypt */ -/* Adapted for VeraCrypt */ -#if !defined(_UEFI) -#include <memory.h> -#endif // !defined(_UEFI) - -#include "Common/Tcdefs.h" -#include "Common/Endian.h" -#include "Rmd160.h" - -#define F(x, y, z) (x ^ y ^ z) -#define G(x, y, z) (z ^ (x & (y^z))) -#define H(x, y, z) (z ^ (x | ~y)) -#define I(x, y, z) (y ^ (z & (x^y))) -#define J(x, y, z) (x ^ (y | ~z)) - -#define PUT_64BIT_LE(cp, value) do { \ - (cp)[7] = (byte) ((value) >> 56); \ - (cp)[6] = (byte) ((value) >> 48); \ - (cp)[5] = (byte) ((value) >> 40); \ - (cp)[4] = (byte) ((value) >> 32); \ - (cp)[3] = (byte) ((value) >> 24); \ - (cp)[2] = (byte) ((value) >> 16); \ - (cp)[1] = (byte) ((value) >> 8); \ - (cp)[0] = (byte) (value); } while (0) - -#define PUT_32BIT_LE(cp, value) do { \ - (cp)[3] = (byte) ((value) >> 24); \ - (cp)[2] = (byte) ((value) >> 16); \ - (cp)[1] = (byte) ((value) >> 8); \ - (cp)[0] = (byte) (value); } while (0) - -#ifndef TC_MINIMIZE_CODE_SIZE - -static byte PADDING[64] = { - 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -#else - -static byte PADDING[64]; - -#endif - -void RMD160Init (RMD160_CTX *ctx) -{ - ctx->count = 0; - ctx->state[0] = 0x67452301; - ctx->state[1] = 0xefcdab89; - ctx->state[2] = 0x98badcfe; - ctx->state[3] = 0x10325476; - ctx->state[4] = 0xc3d2e1f0; - PADDING[0] = 0x80; -} - -/* -* Update context to reflect the concatenation of another buffer full -* of bytes. -*/ -void RMD160Update (RMD160_CTX *ctx, const unsigned char *input, unsigned __int32 lenArg) -{ -#ifndef TC_WINDOWS_BOOT - uint64 len = lenArg; -#else - uint32 len = lenArg; -#endif - unsigned int have, need; - - /* Check how many bytes we already have and how many more we need. */ - have = (unsigned int) ((ctx->count) & (RIPEMD160_BLOCK_LENGTH - 1)); - need = RIPEMD160_BLOCK_LENGTH - have; - - /* Update bitcount */ - ctx->count += len; - - if (len >= need) { - if (have != 0) { - memcpy (ctx->buffer + have, input, (size_t) need); - RMD160Transform ((uint32 *) ctx->state, (const uint32 *) ctx->buffer); - input += need; - len -= need; - have = 0; - } - - /* Process data in RIPEMD160_BLOCK_LENGTH-byte chunks. */ - while (len >= RIPEMD160_BLOCK_LENGTH) { - RMD160Transform ((uint32 *) ctx->state, (const uint32 *) input); - input += RIPEMD160_BLOCK_LENGTH; - len -= RIPEMD160_BLOCK_LENGTH; - } - } - - /* Handle any remaining bytes of data. */ - if (len != 0) - memcpy (ctx->buffer + have, input, (size_t) len); -} - -/* -* Pad pad to 64-byte boundary with the bit pattern -* 1 0* (64-bit count of bits processed, MSB-first) -*/ -static void RMD160Pad(RMD160_CTX *ctx) -{ - byte count[8]; - uint32 padlen; - - /* Convert count to 8 bytes in little endian order. */ - -#ifndef TC_WINDOWS_BOOT - uint64 bitcount = ctx->count << 3; - PUT_64BIT_LE(count, bitcount); -#else - *(uint32 *) (count + 4) = 0; - *(uint32 *) (count + 0) = ctx->count << 3; -#endif - - /* Pad out to 56 mod 64. */ - padlen = RIPEMD160_BLOCK_LENGTH - - (uint32)((ctx->count) & (RIPEMD160_BLOCK_LENGTH - 1)); - if (padlen < 1 + 8) - padlen += RIPEMD160_BLOCK_LENGTH; - RMD160Update(ctx, PADDING, padlen - 8); /* padlen - 8 <= 64 */ - RMD160Update(ctx, count, 8); -} - -/* -* Final wrapup--call RMD160Pad, fill in digest and zero out ctx. -*/ -void RMD160Final(unsigned char *digest, RMD160_CTX *ctx) -{ - int i; - - RMD160Pad(ctx); - if (digest) { - for (i = 0; i < 5; i++) - PUT_32BIT_LE(digest + i * 4, ctx->state[i]); -#ifndef TC_WINDOWS_BOOT - burn (ctx, sizeof(*ctx)); -#endif - } -} - - -#ifndef TC_MINIMIZE_CODE_SIZE - -#define word32 unsigned __int32 - -#define k0 0 -#define k1 0x5a827999UL -#define k2 0x6ed9eba1UL -#define k3 0x8f1bbcdcUL -#define k4 0xa953fd4eUL -#define k5 0x50a28be6UL -#define k6 0x5c4dd124UL -#define k7 0x6d703ef3UL -#define k8 0x7a6d76e9UL -#define k9 0 - -static word32 rotlFixed (word32 x, unsigned int y) -{ - return (word32)((x<<y) | (x>>(sizeof(word32)*8-y))); -} - -#define Subround(f, a, b, c, d, e, x, s, k) \ - a += f(b, c, d) + x + k;\ - a = rotlFixed((word32)a, s) + e;\ - c = rotlFixed((word32)c, 10U) - -void RMD160Transform (unsigned __int32 *digest, const unsigned __int32 *data) -{ -#if BYTE_ORDER == LITTLE_ENDIAN - const word32 *X = data; -#else - word32 X[16]; - int i; -#endif - - word32 a1, b1, c1, d1, e1, a2, b2, c2, d2, e2; - a1 = a2 = digest[0]; - b1 = b2 = digest[1]; - c1 = c2 = digest[2]; - d1 = d2 = digest[3]; - e1 = e2 = digest[4]; - -#if BYTE_ORDER == BIG_ENDIAN - for (i = 0; i < 16; i++) - { - X[i] = LE32 (data[i]); - } -#endif - - Subround(F, a1, b1, c1, d1, e1, X[ 0], 11, k0); - Subround(F, e1, a1, b1, c1, d1, X[ 1], 14, k0); - Subround(F, d1, e1, a1, b1, c1, X[ 2], 15, k0); - Subround(F, c1, d1, e1, a1, b1, X[ 3], 12, k0); - Subround(F, b1, c1, d1, e1, a1, X[ 4], 5, k0); - Subround(F, a1, b1, c1, d1, e1, X[ 5], 8, k0); - Subround(F, e1, a1, b1, c1, d1, X[ 6], 7, k0); - Subround(F, d1, e1, a1, b1, c1, X[ 7], 9, k0); - Subround(F, c1, d1, e1, a1, b1, X[ 8], 11, k0); - Subround(F, b1, c1, d1, e1, a1, X[ 9], 13, k0); - Subround(F, a1, b1, c1, d1, e1, X[10], 14, k0); - Subround(F, e1, a1, b1, c1, d1, X[11], 15, k0); - Subround(F, d1, e1, a1, b1, c1, X[12], 6, k0); - Subround(F, c1, d1, e1, a1, b1, X[13], 7, k0); - Subround(F, b1, c1, d1, e1, a1, X[14], 9, k0); - Subround(F, a1, b1, c1, d1, e1, X[15], 8, k0); - - Subround(G, e1, a1, b1, c1, d1, X[ 7], 7, k1); - Subround(G, d1, e1, a1, b1, c1, X[ 4], 6, k1); - Subround(G, c1, d1, e1, a1, b1, X[13], 8, k1); - Subround(G, b1, c1, d1, e1, a1, X[ 1], 13, k1); - Subround(G, a1, b1, c1, d1, e1, X[10], 11, k1); - Subround(G, e1, a1, b1, c1, d1, X[ 6], 9, k1); - Subround(G, d1, e1, a1, b1, c1, X[15], 7, k1); - Subround(G, c1, d1, e1, a1, b1, X[ 3], 15, k1); - Subround(G, b1, c1, d1, e1, a1, X[12], 7, k1); - Subround(G, a1, b1, c1, d1, e1, X[ 0], 12, k1); - Subround(G, e1, a1, b1, c1, d1, X[ 9], 15, k1); - Subround(G, d1, e1, a1, b1, c1, X[ 5], 9, k1); - Subround(G, c1, d1, e1, a1, b1, X[ 2], 11, k1); - Subround(G, b1, c1, d1, e1, a1, X[14], 7, k1); - Subround(G, a1, b1, c1, d1, e1, X[11], 13, k1); - Subround(G, e1, a1, b1, c1, d1, X[ 8], 12, k1); - - Subround(H, d1, e1, a1, b1, c1, X[ 3], 11, k2); - Subround(H, c1, d1, e1, a1, b1, X[10], 13, k2); - Subround(H, b1, c1, d1, e1, a1, X[14], 6, k2); - Subround(H, a1, b1, c1, d1, e1, X[ 4], 7, k2); - Subround(H, e1, a1, b1, c1, d1, X[ 9], 14, k2); - Subround(H, d1, e1, a1, b1, c1, X[15], 9, k2); - Subround(H, c1, d1, e1, a1, b1, X[ 8], 13, k2); - Subround(H, b1, c1, d1, e1, a1, X[ 1], 15, k2); - Subround(H, a1, b1, c1, d1, e1, X[ 2], 14, k2); - Subround(H, e1, a1, b1, c1, d1, X[ 7], 8, k2); - Subround(H, d1, e1, a1, b1, c1, X[ 0], 13, k2); - Subround(H, c1, d1, e1, a1, b1, X[ 6], 6, k2); - Subround(H, b1, c1, d1, e1, a1, X[13], 5, k2); - Subround(H, a1, b1, c1, d1, e1, X[11], 12, k2); - Subround(H, e1, a1, b1, c1, d1, X[ 5], 7, k2); - Subround(H, d1, e1, a1, b1, c1, X[12], 5, k2); - - Subround(I, c1, d1, e1, a1, b1, X[ 1], 11, k3); - Subround(I, b1, c1, d1, e1, a1, X[ 9], 12, k3); - Subround(I, a1, b1, c1, d1, e1, X[11], 14, k3); - Subround(I, e1, a1, b1, c1, d1, X[10], 15, k3); - Subround(I, d1, e1, a1, b1, c1, X[ 0], 14, k3); - Subround(I, c1, d1, e1, a1, b1, X[ 8], 15, k3); - Subround(I, b1, c1, d1, e1, a1, X[12], 9, k3); - Subround(I, a1, b1, c1, d1, e1, X[ 4], 8, k3); - Subround(I, e1, a1, b1, c1, d1, X[13], 9, k3); - Subround(I, d1, e1, a1, b1, c1, X[ 3], 14, k3); - Subround(I, c1, d1, e1, a1, b1, X[ 7], 5, k3); - Subround(I, b1, c1, d1, e1, a1, X[15], 6, k3); - Subround(I, a1, b1, c1, d1, e1, X[14], 8, k3); - Subround(I, e1, a1, b1, c1, d1, X[ 5], 6, k3); - Subround(I, d1, e1, a1, b1, c1, X[ 6], 5, k3); - Subround(I, c1, d1, e1, a1, b1, X[ 2], 12, k3); - - Subround(J, b1, c1, d1, e1, a1, X[ 4], 9, k4); - Subround(J, a1, b1, c1, d1, e1, X[ 0], 15, k4); - Subround(J, e1, a1, b1, c1, d1, X[ 5], 5, k4); - Subround(J, d1, e1, a1, b1, c1, X[ 9], 11, k4); - Subround(J, c1, d1, e1, a1, b1, X[ 7], 6, k4); - Subround(J, b1, c1, d1, e1, a1, X[12], 8, k4); - Subround(J, a1, b1, c1, d1, e1, X[ 2], 13, k4); - Subround(J, e1, a1, b1, c1, d1, X[10], 12, k4); - Subround(J, d1, e1, a1, b1, c1, X[14], 5, k4); - Subround(J, c1, d1, e1, a1, b1, X[ 1], 12, k4); - Subround(J, b1, c1, d1, e1, a1, X[ 3], 13, k4); - Subround(J, a1, b1, c1, d1, e1, X[ 8], 14, k4); - Subround(J, e1, a1, b1, c1, d1, X[11], 11, k4); - Subround(J, d1, e1, a1, b1, c1, X[ 6], 8, k4); - Subround(J, c1, d1, e1, a1, b1, X[15], 5, k4); - Subround(J, b1, c1, d1, e1, a1, X[13], 6, k4); - - Subround(J, a2, b2, c2, d2, e2, X[ 5], 8, k5); - Subround(J, e2, a2, b2, c2, d2, X[14], 9, k5); - Subround(J, d2, e2, a2, b2, c2, X[ 7], 9, k5); - Subround(J, c2, d2, e2, a2, b2, X[ 0], 11, k5); - Subround(J, b2, c2, d2, e2, a2, X[ 9], 13, k5); - Subround(J, a2, b2, c2, d2, e2, X[ 2], 15, k5); - Subround(J, e2, a2, b2, c2, d2, X[11], 15, k5); - Subround(J, d2, e2, a2, b2, c2, X[ 4], 5, k5); - Subround(J, c2, d2, e2, a2, b2, X[13], 7, k5); - Subround(J, b2, c2, d2, e2, a2, X[ 6], 7, k5); - Subround(J, a2, b2, c2, d2, e2, X[15], 8, k5); - Subround(J, e2, a2, b2, c2, d2, X[ 8], 11, k5); - Subround(J, d2, e2, a2, b2, c2, X[ 1], 14, k5); - Subround(J, c2, d2, e2, a2, b2, X[10], 14, k5); - Subround(J, b2, c2, d2, e2, a2, X[ 3], 12, k5); - Subround(J, a2, b2, c2, d2, e2, X[12], 6, k5); - - Subround(I, e2, a2, b2, c2, d2, X[ 6], 9, k6); - Subround(I, d2, e2, a2, b2, c2, X[11], 13, k6); - Subround(I, c2, d2, e2, a2, b2, X[ 3], 15, k6); - Subround(I, b2, c2, d2, e2, a2, X[ 7], 7, k6); - Subround(I, a2, b2, c2, d2, e2, X[ 0], 12, k6); - Subround(I, e2, a2, b2, c2, d2, X[13], 8, k6); - Subround(I, d2, e2, a2, b2, c2, X[ 5], 9, k6); - Subround(I, c2, d2, e2, a2, b2, X[10], 11, k6); - Subround(I, b2, c2, d2, e2, a2, X[14], 7, k6); - Subround(I, a2, b2, c2, d2, e2, X[15], 7, k6); - Subround(I, e2, a2, b2, c2, d2, X[ 8], 12, k6); - Subround(I, d2, e2, a2, b2, c2, X[12], 7, k6); - Subround(I, c2, d2, e2, a2, b2, X[ 4], 6, k6); - Subround(I, b2, c2, d2, e2, a2, X[ 9], 15, k6); - Subround(I, a2, b2, c2, d2, e2, X[ 1], 13, k6); - Subround(I, e2, a2, b2, c2, d2, X[ 2], 11, k6); - - Subround(H, d2, e2, a2, b2, c2, X[15], 9, k7); - Subround(H, c2, d2, e2, a2, b2, X[ 5], 7, k7); - Subround(H, b2, c2, d2, e2, a2, X[ 1], 15, k7); - Subround(H, a2, b2, c2, d2, e2, X[ 3], 11, k7); - Subround(H, e2, a2, b2, c2, d2, X[ 7], 8, k7); - Subround(H, d2, e2, a2, b2, c2, X[14], 6, k7); - Subround(H, c2, d2, e2, a2, b2, X[ 6], 6, k7); - Subround(H, b2, c2, d2, e2, a2, X[ 9], 14, k7); - Subround(H, a2, b2, c2, d2, e2, X[11], 12, k7); - Subround(H, e2, a2, b2, c2, d2, X[ 8], 13, k7); - Subround(H, d2, e2, a2, b2, c2, X[12], 5, k7); - Subround(H, c2, d2, e2, a2, b2, X[ 2], 14, k7); - Subround(H, b2, c2, d2, e2, a2, X[10], 13, k7); - Subround(H, a2, b2, c2, d2, e2, X[ 0], 13, k7); - Subround(H, e2, a2, b2, c2, d2, X[ 4], 7, k7); - Subround(H, d2, e2, a2, b2, c2, X[13], 5, k7); - - Subround(G, c2, d2, e2, a2, b2, X[ 8], 15, k8); - Subround(G, b2, c2, d2, e2, a2, X[ 6], 5, k8); - Subround(G, a2, b2, c2, d2, e2, X[ 4], 8, k8); - Subround(G, e2, a2, b2, c2, d2, X[ 1], 11, k8); - Subround(G, d2, e2, a2, b2, c2, X[ 3], 14, k8); - Subround(G, c2, d2, e2, a2, b2, X[11], 14, k8); - Subround(G, b2, c2, d2, e2, a2, X[15], 6, k8); - Subround(G, a2, b2, c2, d2, e2, X[ 0], 14, k8); - Subround(G, e2, a2, b2, c2, d2, X[ 5], 6, k8); - Subround(G, d2, e2, a2, b2, c2, X[12], 9, k8); - Subround(G, c2, d2, e2, a2, b2, X[ 2], 12, k8); - Subround(G, b2, c2, d2, e2, a2, X[13], 9, k8); - Subround(G, a2, b2, c2, d2, e2, X[ 9], 12, k8); - Subround(G, e2, a2, b2, c2, d2, X[ 7], 5, k8); - Subround(G, d2, e2, a2, b2, c2, X[10], 15, k8); - Subround(G, c2, d2, e2, a2, b2, X[14], 8, k8); - - Subround(F, b2, c2, d2, e2, a2, X[12], 8, k9); - Subround(F, a2, b2, c2, d2, e2, X[15], 5, k9); - Subround(F, e2, a2, b2, c2, d2, X[10], 12, k9); - Subround(F, d2, e2, a2, b2, c2, X[ 4], 9, k9); - Subround(F, c2, d2, e2, a2, b2, X[ 1], 12, k9); - Subround(F, b2, c2, d2, e2, a2, X[ 5], 5, k9); - Subround(F, a2, b2, c2, d2, e2, X[ 8], 14, k9); - Subround(F, e2, a2, b2, c2, d2, X[ 7], 6, k9); - Subround(F, d2, e2, a2, b2, c2, X[ 6], 8, k9); - Subround(F, c2, d2, e2, a2, b2, X[ 2], 13, k9); - Subround(F, b2, c2, d2, e2, a2, X[13], 6, k9); - Subround(F, a2, b2, c2, d2, e2, X[14], 5, k9); - Subround(F, e2, a2, b2, c2, d2, X[ 0], 15, k9); - Subround(F, d2, e2, a2, b2, c2, X[ 3], 13, k9); - Subround(F, c2, d2, e2, a2, b2, X[ 9], 11, k9); - Subround(F, b2, c2, d2, e2, a2, X[11], 11, k9); - - c1 = digest[1] + c1 + d2; - digest[1] = digest[2] + d1 + e2; - digest[2] = digest[3] + e1 + a2; - digest[3] = digest[4] + a1 + b2; - digest[4] = digest[0] + b1 + c2; - digest[0] = c1; -} - -#else // TC_MINIMIZE_CODE_SIZE - -/* - Derived from source code of TrueCrypt 7.1a, which is - Copyright (c) 2008-2012 TrueCrypt Developers Association and which is governed - by the TrueCrypt License 3.0. - - Modifications and additions to the original source code (contained in this file) - and all other portions of this file are Copyright (c) 2013-2017 IDRIX - and are governed by the Apache License 2.0 the full text of which is - contained in the file License.txt included in VeraCrypt binary and source - code distribution packages. -*/ - -#pragma optimize ("tl", on) - -typedef unsigned __int32 uint32; -typedef unsigned __int8 byte; - -#include <stdlib.h> -#pragma intrinsic (_lrotl) - -static const byte OrderTab[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 7, 4, 13, 1, 10, 6, 15, 3, 12, 0, 9, 5, 2, 14, 11, 8, - 3, 10, 14, 4, 9, 15, 8, 1, 2, 7, 0, 6, 13, 11, 5, 12, - 1, 9, 11, 10, 0, 8, 12, 4, 13, 3, 7, 15, 14, 5, 6, 2, - 4, 0, 5, 9, 7, 12, 2, 10, 14, 1, 3, 8, 11, 6, 15, 13, - 5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12, - 6, 11, 3, 7, 0, 13, 5, 10, 14, 15, 8, 12, 4, 9, 1, 2, - 15, 5, 1, 3, 7, 14, 6, 9, 11, 8, 12, 2, 10, 0, 4, 13, - 8, 6, 4, 1, 3, 11, 15, 0, 5, 12, 2, 13, 9, 7, 10, 14, - 12, 15, 10, 4, 1, 5, 8, 7, 6, 2, 13, 14, 0, 3, 9, 11 -}; - -static const byte RolTab[] = { - 11, 14, 15, 12, 5, 8, 7, 9, 11, 13, 14, 15, 6, 7, 9, 8, - 7, 6, 8, 13, 11, 9, 7, 15, 7, 12, 15, 9, 11, 7, 13, 12, - 11, 13, 6, 7, 14, 9, 13, 15, 14, 8, 13, 6, 5, 12, 7, 5, - 11, 12, 14, 15, 14, 15, 9, 8, 9, 14, 5, 6, 8, 6, 5, 12, - 9, 15, 5, 11, 6, 8, 13, 12, 5, 12, 13, 14, 11, 8, 5, 6, - 8, 9, 9, 11, 13, 15, 15, 5, 7, 7, 8, 11, 14, 14, 12, 6, - 9, 13, 15, 7, 12, 8, 9, 11, 7, 7, 12, 7, 6, 15, 13, 11, - 9, 7, 15, 11, 8, 6, 6, 14, 12, 13, 5, 14, 13, 13, 7, 5, - 15, 5, 8, 11, 14, 14, 6, 14, 6, 9, 12, 9, 12, 5, 15, 8, - 8, 5, 12, 9, 12, 5, 14, 6, 8, 13, 6, 5, 15, 13, 11, 11 -}; - -static const uint32 KTab[] = { - 0x00000000UL, - 0x5A827999UL, - 0x6ED9EBA1UL, - 0x8F1BBCDCUL, - 0xA953FD4EUL, - 0x50A28BE6UL, - 0x5C4DD124UL, - 0x6D703EF3UL, - 0x7A6D76E9UL, - 0x00000000UL -}; - - -void RMD160Transform (unsigned __int32 *state, const unsigned __int32 *data) -{ - uint32 a, b, c, d, e; - uint32 a2, b2, c2, d2, e2; - byte pos; - uint32 tmp; - - a = state[0]; - b = state[1]; - c = state[2]; - d = state[3]; - e = state[4]; - - for (pos = 0; pos < 160; ++pos) - { - tmp = a + data[OrderTab[pos]] + KTab[pos >> 4]; - - switch (pos >> 4) - { - case 0: case 9: tmp += F (b, c, d); break; - case 1: case 8: tmp += G (b, c, d); break; - case 2: case 7: tmp += H (b, c, d); break; - case 3: case 6: tmp += I (b, c, d); break; - case 4: case 5: tmp += J (b, c, d); break; - } - - tmp = _lrotl (tmp, RolTab[pos]) + e; - a = e; - e = d; - d = _lrotl (c, 10); - c = b; - b = tmp; - - if (pos == 79) - { - a2 = a; - b2 = b; - c2 = c; - d2 = d; - e2 = e; - - a = state[0]; - b = state[1]; - c = state[2]; - d = state[3]; - e = state[4]; - } - } - - tmp = state[1] + c2 + d; - state[1] = state[2] + d2 + e; - state[2] = state[3] + e2 + a; - state[3] = state[4] + a2 + b; - state[4] = state[0] + b2 + c; - state[0] = tmp; -} - -#endif // TC_MINIMIZE_CODE_SIZE diff --git a/src/Crypto/Rmd160.h b/src/Crypto/Rmd160.h deleted file mode 100644 index 81b5d6f0..00000000 --- a/src/Crypto/Rmd160.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef TC_HEADER_Crypto_Ripemd160 -#define TC_HEADER_Crypto_Ripemd160 - -#include "Common/Tcdefs.h" - -#if defined(__cplusplus) -extern "C" -{ -#endif - -#define RIPEMD160_BLOCK_LENGTH 64 - -typedef struct RMD160Context -{ - unsigned __int32 state[5]; -#ifndef TC_WINDOWS_BOOT - uint64 count; -#else - uint32 count; -#endif - unsigned char buffer[RIPEMD160_BLOCK_LENGTH]; -} RMD160_CTX; - -void RMD160Init (RMD160_CTX *ctx); -void RMD160Transform (unsigned __int32 *state, const unsigned __int32 *data); -void RMD160Update (RMD160_CTX *ctx, const unsigned char *input, unsigned __int32 len); -void RMD160Final (unsigned char *digest, RMD160_CTX *ctx); - -#if defined(__cplusplus) -} -#endif - -#endif // TC_HEADER_Crypto_Ripemd160 diff --git a/src/Crypto/Sha2.h b/src/Crypto/Sha2.h index 7e90abff..1fbcb8d1 100644 --- a/src/Crypto/Sha2.h +++ b/src/Crypto/Sha2.h @@ -12,6 +12,13 @@ #include "Common/Endian.h" #include "Crypto/config.h" +#ifdef WOLFCRYPT_BACKEND + #include <wolfssl/options.h> + #include <wolfssl/wolfcrypt/sha256.h> + #include <wolfssl/wolfcrypt/sha512.h> + #include <wolfssl/wolfcrypt/hash.h> +#endif + #if defined(__cplusplus) extern "C" { #endif @@ -28,6 +35,10 @@ extern "C" { #define SHA2_ALIGN CRYPTOPP_ALIGN_DATA(16) #endif +#ifdef WOLFCRYPT_BACKEND +typedef struct wc_Sha512 sha512_ctx; +typedef struct wc_Sha256 sha256_ctx; +#else typedef struct { uint_64t count[2]; SHA2_ALIGN uint_64t hash[8]; @@ -39,6 +50,7 @@ typedef struct SHA2_ALIGN uint_32t hash[8]; SHA2_ALIGN uint_32t wbuf[16]; } sha256_ctx; +#endif void sha512_begin(sha512_ctx* ctx); diff --git a/src/Crypto/Sources b/src/Crypto/Sources index 9a1bef14..9542d4b6 100644 --- a/src/Crypto/Sources +++ b/src/Crypto/Sources @@ -7,7 +7,6 @@ NTTARGETFILES = \ "$(OBJ_PATH)\$(O)\Aes_$(TC_ARCH).obj" \ "$(OBJ_PATH)\$(O)\Aes_hw_cpu.obj" \ "$(OBJ_PATH)\$(O)\rdrand_ml.obj" \ - "$(OBJ_PATH)\$(O)\gost89_$(TC_ARCH).obj" \ "$(OBJ_PATH)\$(O)\Twofish_$(TC_ARCH).obj" \ "$(OBJ_PATH)\$(O)\Camellia_$(TC_ARCH).obj" \ "$(OBJ_PATH)\$(O)\Camellia_aesni_$(TC_ARCH).obj" \ @@ -22,19 +21,21 @@ NTTARGETFILES = \ SOURCES = \ Aes_$(TC_ARCH).asm \ - gost89_$(TC_ARCH).asm \ Aes_hw_cpu.asm \ rdrand_ml.asm \ rdseed_ml.asm \ Aeskey.c \ Aestab.c \ + blake2s.c \ + blake2s_SSE2.c \ + blake2s_SSE41.c \ + blake2s_SSSE3.c \ chacha-xmm.c \ chacha256.c \ chachaRng.c \ cpu.c \ jitterentropy-base.c \ rdrand.c \ - Rmd160.c \ SerpentFast.c \ SerpentFast_simd.cpp \ Sha2.c \ @@ -43,7 +44,6 @@ SOURCES = \ t1ha2_selfcheck.c \ Twofish.c \ Twofish_$(TC_ARCH).S \ - GostCipher.c \ Streebog.c \ kuznyechik.c \ kuznyechik_simd.c \ diff --git a/src/Crypto/Twofish.c b/src/Crypto/Twofish.c index f0906f15..ad93b66f 100644 --- a/src/Crypto/Twofish.c +++ b/src/Crypto/Twofish.c @@ -607,7 +607,7 @@ void twofish_set_key(TwofishInstance *instance, const u4byte in_key[]) byte S8[16]; uint32 S32[4]; } us; - int i; + unsigned int i; const byte* key = (const byte*) in_key; us.S32[0] = RS[0][key[0]] ^ RS[1][key[1]] ^ RS[2][key[2]] ^ RS[3][key[3]] ^ RS[4][key[4]] ^ RS[5][key[5]] ^ RS[6][key[6]] ^ RS[7][key[7]]; diff --git a/src/Crypto/Whirlpool.c b/src/Crypto/Whirlpool.c index 98ba318d..9af0cb32 100644 --- a/src/Crypto/Whirlpool.c +++ b/src/Crypto/Whirlpool.c @@ -894,7 +894,7 @@ static uint64 HashMultipleBlocks(WHIRLPOOL_CTX * const ctx, const uint64 *input, #if BYTE_ORDER == BIG_ENDIAN WhirlpoolTransform(ctx->state, input); #else - CorrectEndianess(dataBuf, input, 64); + CorrectEndianness(dataBuf, input, 64); WhirlpoolTransform(ctx->state, dataBuf); #endif input += 8; @@ -1008,7 +1008,7 @@ void WHIRLPOOL_finalize(WHIRLPOOL_CTX * const ctx, memset(data, 0, 32); } #if BYTE_ORDER == LITTLE_ENDIAN - CorrectEndianess(dataBuf, dataBuf, 32); + CorrectEndianness(dataBuf, dataBuf, 32); #endif dataBuf[4] = 0; @@ -1018,7 +1018,7 @@ void WHIRLPOOL_finalize(WHIRLPOOL_CTX * const ctx, WhirlpoolTransform(stateBuf, dataBuf); #if BYTE_ORDER == LITTLE_ENDIAN - CorrectEndianess(stateBuf, stateBuf, 64); + CorrectEndianness(stateBuf, stateBuf, 64); #endif memcpy(result, stateBuf, 64); } diff --git a/src/Crypto/blake2-impl.h b/src/Crypto/blake2-impl.h new file mode 100644 index 00000000..ad9d88fe --- /dev/null +++ b/src/Crypto/blake2-impl.h @@ -0,0 +1,86 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#ifndef BLAKE2_IMPL_H +#define BLAKE2_IMPL_H + +#define NATIVE_LITTLE_ENDIAN + +#include <stdint.h> +#include <string.h> + +#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) + #if defined(_MSC_VER) + #define BLAKE2_INLINE __inline + #elif defined(__GNUC__) + #define BLAKE2_INLINE __inline__ + #else + #define BLAKE2_INLINE + #endif +#else + #define BLAKE2_INLINE inline +#endif + +static BLAKE2_INLINE uint32_t load32( const void *src ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + uint32_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = ( const uint8_t * )src; + return (( uint32_t )( p[0] ) << 0) | + (( uint32_t )( p[1] ) << 8) | + (( uint32_t )( p[2] ) << 16) | + (( uint32_t )( p[3] ) << 24) ; +#endif +} + +static BLAKE2_INLINE void store16( void *dst, uint16_t w ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; +#endif +} + +static BLAKE2_INLINE void store32( void *dst, uint32_t w ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); +#endif +} + +static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c ) +{ + return ( w >> c ) | ( w << ( 32 - c ) ); +} + +/* prevents compiler optimizing out memset() */ +static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n) +{ + static void *(*const volatile memset_v)(void *, int, size_t) = &memset; + memset_v(v, 0, n); +} + +#endif diff --git a/src/Crypto/blake2.h b/src/Crypto/blake2.h new file mode 100644 index 00000000..490ed43b --- /dev/null +++ b/src/Crypto/blake2.h @@ -0,0 +1,102 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + +#ifndef BLAKE2_H +#define BLAKE2_H +#include "Common/Tcdefs.h" + +#if defined(_MSC_VER) +#ifdef TC_WINDOWS_BOOT +#define BLAKE2_PACKED(x) x +#else +#define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop)) +#endif + +#else +#define BLAKE2_PACKED(x) x __attribute__((packed)) +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + + enum blake2s_constant + { + BLAKE2S_BLOCKBYTES = 64, + BLAKE2S_OUTBYTES = 32, + BLAKE2S_KEYBYTES = 32, + BLAKE2S_SALTBYTES = 8, + BLAKE2S_PERSONALBYTES = 8 + }; + + typedef struct blake2s_state__ + { + uint32 h[8]; + uint32 t[2]; + uint32 f[2]; + uint8 buf[BLAKE2S_BLOCKBYTES]; + size_t buflen; + size_t outlen; + uint8 last_node; + } blake2s_state; + +#ifdef TC_WINDOWS_BOOT + #pragma pack(1) +#endif + + BLAKE2_PACKED(struct blake2s_param__ + { + uint8 digest_length; /* 1 */ + uint8 key_length; /* 2 */ + uint8 fanout; /* 3 */ + uint8 depth; /* 4 */ + uint32 leaf_length; /* 8 */ + uint32 node_offset; /* 12 */ + uint16 xof_length; /* 14 */ + uint8 node_depth; /* 15 */ + uint8 inner_length; /* 16 */ + /* uint8 reserved[0]; */ + uint8 salt[BLAKE2S_SALTBYTES]; /* 24 */ + uint8 personal[BLAKE2S_PERSONALBYTES]; /* 32 */ + }); + +#ifdef TC_WINDOWS_BOOT + #pragma pack() +#endif + + typedef struct blake2s_param__ blake2s_param; + + + /* Padded structs result in a compile-time error */ + enum { + BLAKE2_DUMMY_1 = 1/(int)(sizeof(blake2s_param) == BLAKE2S_OUTBYTES) + }; + + /* Streaming API */ + void blake2s_init( blake2s_state *S ); + void blake2s_init_param( blake2s_state *S, const blake2s_param *P ); + void blake2s_update( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final( blake2s_state *S, unsigned char *out ); + + /* Simple API */ + int blake2s( void *out, const void *in, size_t inlen ); + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/src/Crypto/blake2s-load-sse2.h b/src/Crypto/blake2s-load-sse2.h new file mode 100644 index 00000000..926eedda --- /dev/null +++ b/src/Crypto/blake2s-load-sse2.h @@ -0,0 +1,64 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + + +#ifndef BLAKE2S_LOAD_SSE2_H +#define BLAKE2S_LOAD_SSE2_H + +#define LOAD_MSG_0_1(buf) buf = _mm_set_epi32(m6,m4,m2,m0) +#define LOAD_MSG_0_2(buf) buf = _mm_set_epi32(m7,m5,m3,m1) +#define LOAD_MSG_0_3(buf) buf = _mm_set_epi32(m12,m10,m8,m14) +#define LOAD_MSG_0_4(buf) buf = _mm_set_epi32(m13,m11,m9,m15) +#define LOAD_MSG_1_1(buf) buf = _mm_set_epi32(m13,m9,m4,m14) +#define LOAD_MSG_1_2(buf) buf = _mm_set_epi32(m6,m15,m8,m10) +#define LOAD_MSG_1_3(buf) buf = _mm_set_epi32(m11,m0,m1,m5) +#define LOAD_MSG_1_4(buf) buf = _mm_set_epi32(m7,m2,m12,m3) +#define LOAD_MSG_2_1(buf) buf = _mm_set_epi32(m15,m5,m12,m11) +#define LOAD_MSG_2_2(buf) buf = _mm_set_epi32(m13,m2,m0,m8) +#define LOAD_MSG_2_3(buf) buf = _mm_set_epi32(m7,m3,m10,m9) +#define LOAD_MSG_2_4(buf) buf = _mm_set_epi32(m1,m6,m14,m4) +#define LOAD_MSG_3_1(buf) buf = _mm_set_epi32(m11,m13,m3,m7) +#define LOAD_MSG_3_2(buf) buf = _mm_set_epi32(m14,m12,m1,m9) +#define LOAD_MSG_3_3(buf) buf = _mm_set_epi32(m4,m5,m2,m15) +#define LOAD_MSG_3_4(buf) buf = _mm_set_epi32(m0,m10,m6,m8) +#define LOAD_MSG_4_1(buf) buf = _mm_set_epi32(m10,m2,m5,m9) +#define LOAD_MSG_4_2(buf) buf = _mm_set_epi32(m15,m4,m7,m0) +#define LOAD_MSG_4_3(buf) buf = _mm_set_epi32(m6,m11,m14,m3) +#define LOAD_MSG_4_4(buf) buf = _mm_set_epi32(m8,m12,m1,m13) +#define LOAD_MSG_5_1(buf) buf = _mm_set_epi32(m8,m0,m6,m2) +#define LOAD_MSG_5_2(buf) buf = _mm_set_epi32(m3,m11,m10,m12) +#define LOAD_MSG_5_3(buf) buf = _mm_set_epi32(m15,m7,m4,m1) +#define LOAD_MSG_5_4(buf) buf = _mm_set_epi32(m14,m5,m13,m9) +#define LOAD_MSG_6_1(buf) buf = _mm_set_epi32(m4,m14,m1,m12) +#define LOAD_MSG_6_2(buf) buf = _mm_set_epi32(m10,m13,m15,m5) +#define LOAD_MSG_6_3(buf) buf = _mm_set_epi32(m9,m6,m0,m8) +#define LOAD_MSG_6_4(buf) buf = _mm_set_epi32(m2,m3,m7,m11) +#define LOAD_MSG_7_1(buf) buf = _mm_set_epi32(m3,m12,m7,m13) +#define LOAD_MSG_7_2(buf) buf = _mm_set_epi32(m9,m1,m14,m11) +#define LOAD_MSG_7_3(buf) buf = _mm_set_epi32(m8,m15,m5,m2) +#define LOAD_MSG_7_4(buf) buf = _mm_set_epi32(m6,m4,m0,m10) +#define LOAD_MSG_8_1(buf) buf = _mm_set_epi32(m0,m11,m14,m6) +#define LOAD_MSG_8_2(buf) buf = _mm_set_epi32(m8,m3,m9,m15) +#define LOAD_MSG_8_3(buf) buf = _mm_set_epi32(m1,m13,m12,m10) +#define LOAD_MSG_8_4(buf) buf = _mm_set_epi32(m4,m7,m2,m5) +#define LOAD_MSG_9_1(buf) buf = _mm_set_epi32(m1,m7,m8,m10) +#define LOAD_MSG_9_2(buf) buf = _mm_set_epi32(m5,m6,m4,m2) +#define LOAD_MSG_9_3(buf) buf = _mm_set_epi32(m3,m9,m15,m13) +#define LOAD_MSG_9_4(buf) buf = _mm_set_epi32(m12,m14,m11,m0) + + +#endif diff --git a/src/Crypto/blake2s-load-sse41.h b/src/Crypto/blake2s-load-sse41.h new file mode 100644 index 00000000..9ac3f52a --- /dev/null +++ b/src/Crypto/blake2s-load-sse41.h @@ -0,0 +1,240 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + + +#ifndef BLAKE2S_LOAD_SSE41_H +#define BLAKE2S_LOAD_SSE41_H + +#define LOAD_MSG_0_1(buf) \ +buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2,0,2,0))); + +#define LOAD_MSG_0_2(buf) \ +buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(3,1,3,1))); + +#define LOAD_MSG_0_3(buf) \ +t0 = _mm_shuffle_epi32(m2, _MM_SHUFFLE(3,2,0,1)); \ +t1 = _mm_shuffle_epi32(m3, _MM_SHUFFLE(0,1,3,2)); \ +buf = _mm_blend_epi16(t0, t1, 0xC3); + +#define LOAD_MSG_0_4(buf) \ +t0 = _mm_blend_epi16(t0, t1, 0x3C); \ +buf = _mm_shuffle_epi32(t0, _MM_SHUFFLE(2,3,0,1)); + +#define LOAD_MSG_1_1(buf) \ +t0 = _mm_blend_epi16(m1, m2, 0x0C); \ +t1 = _mm_slli_si128(m3, 4); \ +t2 = _mm_blend_epi16(t0, t1, 0xF0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3)); + +#define LOAD_MSG_1_2(buf) \ +t0 = _mm_shuffle_epi32(m2,_MM_SHUFFLE(0,0,2,0)); \ +t1 = _mm_blend_epi16(m1,m3,0xC0); \ +t2 = _mm_blend_epi16(t0, t1, 0xF0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1)); + +#define LOAD_MSG_1_3(buf) \ +t0 = _mm_slli_si128(m1, 4); \ +t1 = _mm_blend_epi16(m2, t0, 0x30); \ +t2 = _mm_blend_epi16(m0, t1, 0xF0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,0,1,2)); + +#define LOAD_MSG_1_4(buf) \ +t0 = _mm_unpackhi_epi32(m0,m1); \ +t1 = _mm_slli_si128(m3, 4); \ +t2 = _mm_blend_epi16(t0, t1, 0x0C); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,0,1,2)); + +#define LOAD_MSG_2_1(buf) \ +t0 = _mm_unpackhi_epi32(m2,m3); \ +t1 = _mm_blend_epi16(m3,m1,0x0C); \ +t2 = _mm_blend_epi16(t0, t1, 0x0F); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2)); + +#define LOAD_MSG_2_2(buf) \ +t0 = _mm_unpacklo_epi32(m2,m0); \ +t1 = _mm_blend_epi16(t0, m0, 0xF0); \ +t2 = _mm_slli_si128(m3, 8); \ +buf = _mm_blend_epi16(t1, t2, 0xC0); + +#define LOAD_MSG_2_3(buf) \ +t0 = _mm_blend_epi16(m0, m2, 0x3C); \ +t1 = _mm_srli_si128(m1, 12); \ +t2 = _mm_blend_epi16(t0,t1,0x03); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(0,3,2,1)); + +#define LOAD_MSG_2_4(buf) \ +t0 = _mm_slli_si128(m3, 4); \ +t1 = _mm_blend_epi16(m0, m1, 0x33); \ +t2 = _mm_blend_epi16(t1, t0, 0xC0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,3,0)); + +#define LOAD_MSG_3_1(buf) \ +t0 = _mm_unpackhi_epi32(m0,m1); \ +t1 = _mm_unpackhi_epi32(t0, m2); \ +t2 = _mm_blend_epi16(t1, m3, 0x0C); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2)); + +#define LOAD_MSG_3_2(buf) \ +t0 = _mm_slli_si128(m2, 8); \ +t1 = _mm_blend_epi16(m3,m0,0x0C); \ +t2 = _mm_blend_epi16(t1, t0, 0xC0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3)); + +#define LOAD_MSG_3_3(buf) \ +t0 = _mm_blend_epi16(m0,m1,0x0F); \ +t1 = _mm_blend_epi16(t0, m3, 0xC0); \ +buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(0,1,2,3)); + +#define LOAD_MSG_3_4(buf) \ +t0 = _mm_alignr_epi8(m0, m1, 4); \ +buf = _mm_blend_epi16(t0, m2, 0x33); + +#define LOAD_MSG_4_1(buf) \ +t0 = _mm_unpacklo_epi64(m1,m2); \ +t1 = _mm_unpackhi_epi64(m0,m2); \ +t2 = _mm_blend_epi16(t0,t1,0x33); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3)); + +#define LOAD_MSG_4_2(buf) \ +t0 = _mm_unpackhi_epi64(m1,m3); \ +t1 = _mm_unpacklo_epi64(m0,m1); \ +buf = _mm_blend_epi16(t0,t1,0x33); + +#define LOAD_MSG_4_3(buf) \ +t0 = _mm_unpackhi_epi64(m3,m1); \ +t1 = _mm_unpackhi_epi64(m2,m0); \ +t2 = _mm_blend_epi16(t1,t0,0x33); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3)); + +#define LOAD_MSG_4_4(buf) \ +t0 = _mm_blend_epi16(m0,m2,0x03); \ +t1 = _mm_slli_si128(t0, 8); \ +t2 = _mm_blend_epi16(t1,m3,0x0F); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,3,1)); + +#define LOAD_MSG_5_1(buf) \ +t0 = _mm_unpackhi_epi32(m0,m1); \ +t1 = _mm_unpacklo_epi32(m0,m2); \ +buf = _mm_unpacklo_epi64(t0,t1); + +#define LOAD_MSG_5_2(buf) \ +t0 = _mm_srli_si128(m2, 4); \ +t1 = _mm_blend_epi16(m0,m3,0x03); \ +buf = _mm_blend_epi16(t1,t0,0x3C); + +#define LOAD_MSG_5_3(buf) \ +t0 = _mm_blend_epi16(m1,m0,0x0C); \ +t1 = _mm_srli_si128(m3, 4); \ +t2 = _mm_blend_epi16(t0,t1,0x30); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1)); + +#define LOAD_MSG_5_4(buf) \ +t0 = _mm_unpacklo_epi64(m2,m1); \ +t1 = _mm_shuffle_epi32(m3, _MM_SHUFFLE(2,0,1,0)); \ +t2 = _mm_srli_si128(t0, 4); \ +buf = _mm_blend_epi16(t1,t2,0x33); + +#define LOAD_MSG_6_1(buf) \ +t0 = _mm_slli_si128(m1, 12); \ +t1 = _mm_blend_epi16(m0,m3,0x33); \ +buf = _mm_blend_epi16(t1,t0,0xC0); + +#define LOAD_MSG_6_2(buf) \ +t0 = _mm_blend_epi16(m3,m2,0x30); \ +t1 = _mm_srli_si128(m1, 4); \ +t2 = _mm_blend_epi16(t0,t1,0x03); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,3,0)); + +#define LOAD_MSG_6_3(buf) \ +t0 = _mm_unpacklo_epi64(m0,m2); \ +t1 = _mm_srli_si128(m1, 4); \ +t2 = _mm_blend_epi16(t0,t1,0x0C); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2)); + +#define LOAD_MSG_6_4(buf) \ +t0 = _mm_unpackhi_epi32(m1,m2); \ +t1 = _mm_unpackhi_epi64(m0,t0); \ +buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(0,1,2,3)); + +#define LOAD_MSG_7_1(buf) \ +t0 = _mm_unpackhi_epi32(m0,m1); \ +t1 = _mm_blend_epi16(t0,m3,0x0F); \ +buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(2,0,3,1)); + +#define LOAD_MSG_7_2(buf) \ +t0 = _mm_blend_epi16(m2,m3,0x30); \ +t1 = _mm_srli_si128(m0,4); \ +t2 = _mm_blend_epi16(t0,t1,0x03); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,2,3)); + +#define LOAD_MSG_7_3(buf) \ +t0 = _mm_unpackhi_epi64(m0,m3); \ +t1 = _mm_unpacklo_epi64(m1,m2); \ +t2 = _mm_blend_epi16(t0,t1,0x3C); \ +buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(2,3,1,0)); + +#define LOAD_MSG_7_4(buf) \ +t0 = _mm_unpacklo_epi32(m0,m1); \ +t1 = _mm_unpackhi_epi32(m1,m2); \ +t2 = _mm_unpacklo_epi64(t0,t1); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3)); + +#define LOAD_MSG_8_1(buf) \ +t0 = _mm_unpackhi_epi32(m1,m3); \ +t1 = _mm_unpacklo_epi64(t0,m0); \ +t2 = _mm_blend_epi16(t1,m2,0xC0); \ +buf = _mm_shufflehi_epi16(t2,_MM_SHUFFLE(1,0,3,2)); + +#define LOAD_MSG_8_2(buf) \ +t0 = _mm_unpackhi_epi32(m0,m3); \ +t1 = _mm_blend_epi16(m2,t0,0xF0); \ +buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(0,2,1,3)); + +#define LOAD_MSG_8_3(buf) \ +t0 = _mm_unpacklo_epi64(m0,m3); \ +t1 = _mm_srli_si128(m2,8); \ +t2 = _mm_blend_epi16(t0,t1,0x03); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,3,2,0)); + +#define LOAD_MSG_8_4(buf) \ +t0 = _mm_blend_epi16(m1,m0,0x30); \ +buf = _mm_shuffle_epi32(t0,_MM_SHUFFLE(0,3,2,1)); + +#define LOAD_MSG_9_1(buf) \ +t0 = _mm_blend_epi16(m0,m2,0x03); \ +t1 = _mm_blend_epi16(m1,m2,0x30); \ +t2 = _mm_blend_epi16(t1,t0,0x0F); \ +buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(1,3,0,2)); + +#define LOAD_MSG_9_2(buf) \ +t0 = _mm_slli_si128(m0,4); \ +t1 = _mm_blend_epi16(m1,t0,0xC0); \ +buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(1,2,0,3)); + +#define LOAD_MSG_9_3(buf) \ +t0 = _mm_unpackhi_epi32(m0,m3); \ +t1 = _mm_unpacklo_epi32(m2,m3); \ +t2 = _mm_unpackhi_epi64(t0,t1); \ +buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,2,1,3)); + +#define LOAD_MSG_9_4(buf) \ +t0 = _mm_blend_epi16(m3,m2,0xC0); \ +t1 = _mm_unpacklo_epi32(m0,m3); \ +t2 = _mm_blend_epi16(t0,t1,0x0F); \ +buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(1,2,3,0)); + +#endif diff --git a/src/Crypto/blake2s-ref.c b/src/Crypto/blake2s-ref.c new file mode 100644 index 00000000..435630b9 --- /dev/null +++ b/src/Crypto/blake2s-ref.c @@ -0,0 +1,336 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + +#include <stdlib.h> +#include <STRING.H> + +#include "blake2.h" + +#pragma optimize ("tl", on) + +#pragma intrinsic(_lrotr) +#pragma intrinsic( memcpy ) +#pragma intrinsic( memset ) + +static const uint32 blake2s_IV[8] = +{ + 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL +}; + +static const uint8 blake2s_sigma[10][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , +}; + +static void blake2s_set_lastnode( blake2s_state *S ) +{ + S->f[1] = (uint32)-1; +} + +/* Some helper functions, not necessarily useful */ +static int blake2s_is_lastblock( const blake2s_state *S ) +{ + return S->f[0] != 0; +} + +static void blake2s_set_lastblock( blake2s_state *S ) +{ + if( S->last_node ) blake2s_set_lastnode( S ); + + S->f[0] = (uint32)-1; +} + +static void blake2s_increment_counter( blake2s_state *S, const uint32 inc ) +{ + S->t[0] += inc; + S->t[1] += ( S->t[0] < inc ); +} + +static void blake2s_init0( blake2s_state *S ) +{ + size_t i; + memset( S, 0, sizeof( blake2s_state ) ); + + for( i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; +} + +/* init2 xors IV with input parameter block */ +void blake2s_init_param( blake2s_state *S, const blake2s_param *P ) +{ + const unsigned char *p = ( const unsigned char * )( P ); + size_t i; + uint32 w; + + blake2s_init0( S ); + + /* IV XOR ParamBlock */ + for( i = 0; i < 8; ++i ) + { + memcpy (&w, &p[i * 4], sizeof (w)); + S->h[i] ^= w; + } + + S->outlen = P->digest_length; +} + + +/* Sequential blake2s initialization */ +void blake2s_init( blake2s_state *S ) +{ + blake2s_param P[1]; + + P->digest_length = 32; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + P->leaf_length = 0; + P->node_offset = 0; + P->xof_length = 0; + P->node_depth = 0; + P->inner_length = 0; + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + blake2s_init_param( S, P ); +} + +#ifndef TC_MINIMIZE_CODE_SIZE +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2s_sigma[r][2*i+0]]; \ + d = _lrotr(d ^ a, 16); \ + c = c + d; \ + b = _lrotr(b ^ c, 12); \ + a = a + b + m[blake2s_sigma[r][2*i+1]]; \ + d = _lrotr(d ^ a, 8); \ + c = c + d; \ + b = _lrotr(b ^ c, 7); \ + } while(0) + +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) +#else +#define G_BASE(r,i,a,b,c,d) \ + do { \ + v[a] = v[a] + v[b] + m[blake2s_sigma[r][2*i+0]]; \ + v[d] = _lrotr(v[d] ^ v[a], 16); \ + v[c] = v[c] + v[d]; \ + v[b] = _lrotr(v[b] ^ v[c], 12); \ + v[a] = v[a] + v[b] + m[blake2s_sigma[r][2*i+1]]; \ + v[d] = _lrotr(v[d] ^ v[a], 8); \ + v[c] = v[c] + v[d]; \ + v[b] = _lrotr(v[b] ^ v[c], 7); \ + } while(0) + +static void G(unsigned char r, unsigned char i, uint32* m, uint32* v, unsigned char a, unsigned char b, unsigned char c, unsigned char d) +{ + G_BASE(r,i,a,b,c,d); +} + +static void round_base (unsigned char r, uint32* m, uint32* v) +{ + G(r,0,m,v, 0, 4, 8, 12); + G(r,1,m,v, 1, 5, 9,13); + G(r,2,m,v, 2, 6,10,14); + G(r,3,m,v, 3, 7,11,15); + G(r,4,m,v, 0, 5,10,15); + G(r,5,m,v, 1, 6,11,12); + G(r,6,m,v, 2, 7, 8,13); + G(r,7,m,v, 3, 4, 9,14); +} + +#define ROUND(r) round_base(r,m,v) +#endif +static void blake2s_compress( blake2s_state *S, const uint8 in[BLAKE2S_BLOCKBYTES] ) +{ + uint32 m[16]; + uint32 v[16]; + int i; + + for( i = 0; i < 16; ++i ) { + memcpy (&m[i], in + i * sizeof( m[i] ), sizeof(uint32)); + } + + for( i = 0; i < 8; ++i ) { + v[i] = S->h[i]; + } + + v[ 8] = blake2s_IV[0]; + v[ 9] = blake2s_IV[1]; + v[10] = blake2s_IV[2]; + v[11] = blake2s_IV[3]; + v[12] = S->t[0] ^ blake2s_IV[4]; + v[13] = S->t[1] ^ blake2s_IV[5]; + v[14] = S->f[0] ^ blake2s_IV[6]; + v[15] = S->f[1] ^ blake2s_IV[7]; + + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + + for( i = 0; i < 8; ++i ) { + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } +} + +#undef G +#undef ROUND + +void blake2s_update( blake2s_state *S, const void *pin, size_t inlen ) +{ + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) + { + size_t left = S->buflen; + size_t fill = BLAKE2S_BLOCKBYTES - left; + if( inlen > fill ) + { + S->buflen = 0; + memcpy( S->buf + left, in, fill ); /* Fill buffer */ + blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); + blake2s_compress( S, S->buf ); /* Compress */ + in += fill; inlen -= fill; + while(inlen > BLAKE2S_BLOCKBYTES) { + blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES); + blake2s_compress( S, in ); + in += BLAKE2S_BLOCKBYTES; + inlen -= BLAKE2S_BLOCKBYTES; + } + } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; + } +} + +int blake2s_final( blake2s_state *S, unsigned char *out ) +{ + int i; + + if( blake2s_is_lastblock( S ) ) + return -1; + + blake2s_increment_counter( S, ( uint32 )S->buflen ); + blake2s_set_lastblock( S ); + memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ + blake2s_compress( S, S->buf ); + + for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + memcpy( out + sizeof( S->h[i] ) * i, &S->h[i], sizeof(uint32) ); + + return 0; +} + +#if defined(SUPERCOP) +int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) +{ + return blake2s( out, BLAKE2S_OUTBYTES, in, inlen, NULL, 0 ); +} +#endif + +#if defined(BLAKE2S_SELFTEST) +#include <string.h> +#include "blake2-kat.h" +int main( void ) +{ + uint8 key[BLAKE2S_KEYBYTES]; + uint8 buf[BLAKE2_KAT_LENGTH]; + size_t i, step; + + for( i = 0; i < BLAKE2S_KEYBYTES; ++i ) + key[i] = ( uint8 )i; + + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + buf[i] = ( uint8 )i; + + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + { + uint8 hash[BLAKE2S_OUTBYTES]; + blake2s( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES ); + + if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) ) + { + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8 hash[BLAKE2S_OUTBYTES]; + blake2s_state S; + uint8 * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2s_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2s_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2s_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2s_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES)) { + goto fail; + } + } + } + + puts( "ok" ); + return 0; +fail: + puts("error"); + return -1; +} +#endif diff --git a/src/Crypto/blake2s-round.h b/src/Crypto/blake2s-round.h new file mode 100644 index 00000000..590540bb --- /dev/null +++ b/src/Crypto/blake2s-round.h @@ -0,0 +1,159 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + + +#ifndef BLAKE2S_ROUND_H +#define BLAKE2S_ROUND_H + +#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) ) +#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) + +#define TOF(reg) _mm_castsi128_ps((reg)) +#define TOI(reg) _mm_castps_si128((reg)) + +#define LIKELY(x) __builtin_expect((x),1) + + +/* Microarchitecture-specific macros */ +#ifndef HAVE_XOP +#ifdef HAVE_SSSE3 +#define _mm_roti_epi32(r, c) ( \ + (8==-(c)) ? _mm_shuffle_epi8(r,r8) \ + : (16==-(c)) ? _mm_shuffle_epi8(r,r16) \ + : _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) ) +#else +#define _mm_roti_epi32(r, c) _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) +#endif +#else +/* ... */ +#endif + + +#define G1(row1,row2,row3,row4,buf) \ + row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \ + row4 = _mm_xor_si128( row4, row1 ); \ + row4 = _mm_roti_epi32(row4, -16); \ + row3 = _mm_add_epi32( row3, row4 ); \ + row2 = _mm_xor_si128( row2, row3 ); \ + row2 = _mm_roti_epi32(row2, -12); + +#define G2(row1,row2,row3,row4,buf) \ + row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \ + row4 = _mm_xor_si128( row4, row1 ); \ + row4 = _mm_roti_epi32(row4, -8); \ + row3 = _mm_add_epi32( row3, row4 ); \ + row2 = _mm_xor_si128( row2, row3 ); \ + row2 = _mm_roti_epi32(row2, -7); + +#define DIAGONALIZE(row1,row2,row3,row4) \ + row1 = _mm_shuffle_epi32( row1, _MM_SHUFFLE(2,1,0,3) ); \ + row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(1,0,3,2) ); \ + row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(0,3,2,1) ); + +#define UNDIAGONALIZE(row1,row2,row3,row4) \ + row1 = _mm_shuffle_epi32( row1, _MM_SHUFFLE(0,3,2,1) ); \ + row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(1,0,3,2) ); \ + row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(2,1,0,3) ); + +#if defined(HAVE_XOP) +#include "blake2s-load-xop.h" +#elif defined(HAVE_SSE41) +#include "blake2s-load-sse41.h" +#else +#include "blake2s-load-sse2.h" +#endif + +#define ROUND(r) \ + LOAD_MSG_ ##r ##_1(buf1); \ + G1(row1,row2,row3,row4,buf1); \ + LOAD_MSG_ ##r ##_2(buf2); \ + G2(row1,row2,row3,row4,buf2); \ + DIAGONALIZE(row1,row2,row3,row4); \ + LOAD_MSG_ ##r ##_3(buf3); \ + G1(row1,row2,row3,row4,buf3); \ + LOAD_MSG_ ##r ##_4(buf4); \ + G2(row1,row2,row3,row4,buf4); \ + UNDIAGONALIZE(row1,row2,row3,row4); \ + +// load32 is always called in SSE case which implies little endian +#define load32(x) *((uint32*) (x)) + +extern const uint32 blake2s_IV[8]; + +#if defined(HAVE_SSE41) +void blake2s_compress_sse41( blake2s_state *S, const uint8 block[BLAKE2S_BLOCKBYTES] ) +#elif defined (HAVE_SSSE3) +void blake2s_compress_ssse3( blake2s_state *S, const uint8 block[BLAKE2S_BLOCKBYTES] ) +#else +void blake2s_compress_sse2( blake2s_state *S, const uint8 block[BLAKE2S_BLOCKBYTES] ) +#endif +{ + __m128i row1, row2, row3, row4; + __m128i buf1, buf2, buf3, buf4; +#if defined(HAVE_SSE41) + __m128i t0, t1; +#if !defined(HAVE_XOP) + __m128i t2; +#endif +#endif + __m128i ff0, ff1; +#if defined(HAVE_SSSE3) && !defined(HAVE_XOP) + const __m128i r8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 ); + const __m128i r16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 ); +#endif +#if defined(HAVE_SSE41) + const __m128i m0 = LOADU( block + 00 ); + const __m128i m1 = LOADU( block + 16 ); + const __m128i m2 = LOADU( block + 32 ); + const __m128i m3 = LOADU( block + 48 ); +#else + const uint32 m0 = load32(block + 0 * sizeof(uint32)); + const uint32 m1 = load32(block + 1 * sizeof(uint32)); + const uint32 m2 = load32(block + 2 * sizeof(uint32)); + const uint32 m3 = load32(block + 3 * sizeof(uint32)); + const uint32 m4 = load32(block + 4 * sizeof(uint32)); + const uint32 m5 = load32(block + 5 * sizeof(uint32)); + const uint32 m6 = load32(block + 6 * sizeof(uint32)); + const uint32 m7 = load32(block + 7 * sizeof(uint32)); + const uint32 m8 = load32(block + 8 * sizeof(uint32)); + const uint32 m9 = load32(block + 9 * sizeof(uint32)); + const uint32 m10 = load32(block + 10 * sizeof(uint32)); + const uint32 m11 = load32(block + 11 * sizeof(uint32)); + const uint32 m12 = load32(block + 12 * sizeof(uint32)); + const uint32 m13 = load32(block + 13 * sizeof(uint32)); + const uint32 m14 = load32(block + 14 * sizeof(uint32)); + const uint32 m15 = load32(block + 15 * sizeof(uint32)); +#endif + row1 = ff0 = LOADU( &S->h[0] ); + row2 = ff1 = LOADU( &S->h[4] ); + row3 = _mm_loadu_si128( (__m128i const *)&blake2s_IV[0] ); + row4 = _mm_xor_si128( _mm_loadu_si128( (__m128i const *)&blake2s_IV[4] ), LOADU( &S->t[0] ) ); + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) ); + STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) ); +} + +#endif diff --git a/src/Crypto/blake2s.c b/src/Crypto/blake2s.c new file mode 100644 index 00000000..9850cae1 --- /dev/null +++ b/src/Crypto/blake2s.c @@ -0,0 +1,349 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + +#include "blake2.h" +#include "Common/Endian.h" +#include "Crypto/config.h" +#include "Crypto/cpu.h" +#include "Crypto/misc.h" + +// load32 is always called in SSE case which implies little endian +#define load32(x) *((uint32*) (x)) + +const uint32 blake2s_IV[8] = +{ + 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL +}; + +static const uint8 blake2s_sigma[10][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , +}; + +/* Some helper functions */ +#define blake2s_set_lastnode(S) S->f[1] = (uint32)-1; + +#define blake2s_is_lastblock(S) (S->f[0] != 0) + +#define blake2s_set_lastblock(S) { \ + if( S->last_node ) blake2s_set_lastnode( S ); \ + S->f[0] = (uint32)-1; \ + } + +#define blake2s_increment_counter(S,inc) { \ + uint64 t = ( (( uint64 )S->t[1]) << 32 ) | S->t[0]; \ + t += (inc); \ + S->t[0] = ( uint32 )( t ); \ + S->t[1] = ( uint32 )( t >> 32 ); \ + } + +/* init2 xors IV with input parameter block */ +void blake2s_init_param( blake2s_state *S, const blake2s_param *P ) +{ + size_t i; + /*blake2s_init0( S ); */ + const uint8 * v = ( const uint8 * )( blake2s_IV ); + const uint8 * p = ( const uint8 * )( P ); + uint8 * h = ( uint8 * )( S->h ); + /* IV XOR ParamBlock */ + memset( S, 0, sizeof( blake2s_state ) ); + + for( i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; + + S->outlen = P->digest_length; +} + + +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2s_sigma[r][2*i+0]]; \ + d = rotr32(d ^ a, 16); \ + c = c + d; \ + b = rotr32(b ^ c, 12); \ + a = a + b + m[blake2s_sigma[r][2*i+1]]; \ + d = rotr32(d ^ a, 8); \ + c = c + d; \ + b = rotr32(b ^ c, 7); \ + } while(0) + +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + +typedef void (*blake2s_compressFn)( blake2s_state *S, const uint8 block[BLAKE2S_BLOCKBYTES] ); + +blake2s_compressFn blake2s_compress_func = NULL; +#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 +extern int blake2s_has_sse2(); +extern int blake2s_has_ssse3(); +extern int blake2s_has_sse41(); +extern void blake2s_compress_sse2( blake2s_state *S, const uint8 block[BLAKE2S_BLOCKBYTES] ); +extern void blake2s_compress_ssse3( blake2s_state *S, const uint8 block[BLAKE2S_BLOCKBYTES] ); +extern void blake2s_compress_sse41( blake2s_state *S, const uint8 block[BLAKE2S_BLOCKBYTES] ); +#endif + + +static void blake2s_compress_std( blake2s_state *S, const uint8 in[BLAKE2S_BLOCKBYTES] ) +{ + uint32 m[16]; + uint32 v[16]; + size_t i; + + for( i = 0; i < 16; ++i ) { + m[i] = *((uint32*) (in + i * sizeof( m[i] ))); + } + + for( i = 0; i < 8; ++i ) { + v[i] = S->h[i]; + } + + v[ 8] = blake2s_IV[0]; + v[ 9] = blake2s_IV[1]; + v[10] = blake2s_IV[2]; + v[11] = blake2s_IV[3]; + v[12] = S->t[0] ^ blake2s_IV[4]; + v[13] = S->t[1] ^ blake2s_IV[5]; + v[14] = S->f[0] ^ blake2s_IV[6]; + v[15] = S->f[1] ^ blake2s_IV[7]; + + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + + for( i = 0; i < 8; ++i ) { + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } +} + +#undef G +#undef ROUND + + +/* Some sort of default parameter block initialization, for sequential blake2s */ +void blake2s_init( blake2s_state *S ) +{ + blake2s_param P[1]; + + P->digest_length = BLAKE2S_OUTBYTES; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + P->leaf_length = 0; + P->node_offset = 0; + P->xof_length = 0; + P->node_depth = 0; + P->inner_length = 0; + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + blake2s_init_param( S, P ); + + if (!blake2s_compress_func) + { +#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 + if (HasSSE2() && blake2s_has_sse2()) + { + if (HasSSE41() && blake2s_has_sse41()) + { + blake2s_compress_func = blake2s_compress_sse41; + } + else + if (HasSSSE3() && blake2s_has_ssse3()) + { + blake2s_compress_func = blake2s_compress_ssse3; + } + else + blake2s_compress_func = blake2s_compress_sse2; + } + else +#endif + blake2s_compress_func = blake2s_compress_std; + } +} + +void blake2s_update( blake2s_state *S, const void *pin, size_t inlen ) +{ + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) + { + size_t left = S->buflen; + size_t fill = BLAKE2S_BLOCKBYTES - left; + if( inlen > fill ) + { + S->buflen = 0; + memcpy( S->buf + left, in, fill ); /* Fill buffer */ + blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); + blake2s_compress_func( S, S->buf ); /* Compress */ + in += fill; inlen -= fill; + while(inlen > BLAKE2S_BLOCKBYTES) { + blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES); + blake2s_compress_func( S, in ); + in += BLAKE2S_BLOCKBYTES; + inlen -= BLAKE2S_BLOCKBYTES; + } + } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; + } +} + +int blake2s_final( blake2s_state *S, unsigned char *out ) +{ + size_t i; + + if( blake2s_is_lastblock( S ) ) + return -1; + + blake2s_increment_counter( S, (uint32)S->buflen ); + blake2s_set_lastblock( S ); + memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ + blake2s_compress_func( S, S->buf ); + + for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + { +#if BYTE_ORDER == LITTLE_ENDIAN + *((uint32*) out) = S->h[i]; +#else + uint32 w = S->h[i] ; + out[0] = (uint8)(w >> 0); + out[1] = (uint8)(w >> 8); + out[2] = (uint8)(w >> 16); + out[3] = (uint8)(w >> 24); +#endif + out += sizeof (uint32); + } + + return 0; +} + +/* inlen, at least, should be uint64. Others can be size_t. */ +int blake2s( void *out, const void *in, size_t inlen) +{ + blake2s_state S[1]; + + /* Verify parameters */ + if ( NULL == in && inlen > 0 ) return -1; + + if ( NULL == out ) return -1; + + blake2s_init( S ); + + blake2s_update( S, ( const uint8 * )in, inlen ); + blake2s_final( S, (unsigned char*) out ); + return 0; +} + +#if defined(SUPERCOP) +int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) +{ + return blake2s( out, BLAKE2S_OUTBYTES, in, inlen, NULL, 0 ); +} +#endif + +#if defined(BLAKE2S_SELFTEST) +#include <string.h> +#include "blake2-kat.h" +int main( void ) +{ + uint8 key[BLAKE2S_KEYBYTES]; + uint8 buf[BLAKE2_KAT_LENGTH]; + size_t i, step; + + for( i = 0; i < BLAKE2S_KEYBYTES; ++i ) + key[i] = ( uint8 )i; + + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + buf[i] = ( uint8 )i; + + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + { + uint8 hash[BLAKE2S_OUTBYTES]; + blake2s( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES ); + + if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) ) + { + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8 hash[BLAKE2S_OUTBYTES]; + blake2s_state S; + uint8 * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2s_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2s_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2s_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2s_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES)) { + goto fail; + } + } + } + + puts( "ok" ); + return 0; +fail: + puts("error"); + return -1; +} +#endif diff --git a/src/Crypto/blake2s_SSE2.c b/src/Crypto/blake2s_SSE2.c new file mode 100644 index 00000000..41ea0a6c --- /dev/null +++ b/src/Crypto/blake2s_SSE2.c @@ -0,0 +1,39 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + +#include "blake2.h" +#include "Common/Endian.h" +#include "Crypto/config.h" +#include "Crypto/cpu.h" +#include "Crypto/misc.h" + +#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE + +#include "blake2s-round.h" + +int blake2s_has_sse2() +{ + return 1; +} + +#else +int blake2s_has_sse2() +{ + return 0; +} + +#endif diff --git a/src/Crypto/blake2s_SSE41.c b/src/Crypto/blake2s_SSE41.c new file mode 100644 index 00000000..99e394c1 --- /dev/null +++ b/src/Crypto/blake2s_SSE41.c @@ -0,0 +1,52 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + +#include "blake2.h" +#include "Common/Endian.h" +#include "Crypto/config.h" +#include "Crypto/cpu.h" +#include "Crypto/misc.h" + +#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE +#if CRYPTOPP_BOOL_SSE41_INTRINSICS_AVAILABLE + +#define HAVE_SSE41 + +#if CRYPTOPP_SSSE3_AVAILABLE +#define HAVE_SSSE3 +#endif + +#include "blake2s-round.h" + +int blake2s_has_sse41() +{ + return 1; +} + +#else +int blake2s_has_sse41() +{ + return 0; +} + +#endif +#else +int blake2s_has_sse41() +{ + return 0; +} +#endif diff --git a/src/Crypto/blake2s_SSSE3.c b/src/Crypto/blake2s_SSSE3.c new file mode 100644 index 00000000..4f3252c3 --- /dev/null +++ b/src/Crypto/blake2s_SSSE3.c @@ -0,0 +1,47 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + +#include "blake2.h" +#include "Common/Endian.h" +#include "Crypto/config.h" +#include "Crypto/cpu.h" +#include "Crypto/misc.h" + +#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE +#if CRYPTOPP_BOOL_SSSE3_INTRINSICS_AVAILABLE + +#define HAVE_SSSE3 + +#include "blake2s-round.h" + +int blake2s_has_ssse3() +{ + return 1; +} + +#else +int blake2s_has_ssse3() +{ + return 0; +} +#endif +#else +int blake2s_has_ssse3() +{ + return 0; +} +#endif diff --git a/src/Crypto/chacha-xmm.c b/src/Crypto/chacha-xmm.c index 198d0b5b..478de594 100644 --- a/src/Crypto/chacha-xmm.c +++ b/src/Crypto/chacha-xmm.c @@ -139,22 +139,15 @@ void chacha_ECRYPT_encrypt_bytes(size_t bytes, uint32* x, const uint8* m, uint8* #endif if (!bytes) return; - for (;;) { - salsa20_wordtobyte(output,x, r); - x[12] = PLUSONE(x[12]); - if (!x[12]) { - x[13] = PLUSONE(x[13]); - /* stopping at 2^70 bytes per nonce is user's responsibility */ - } - if (bytes <= 64) { - for (i = 0;i < bytes;++i) out[i] = m[i] ^ output[i]; - return; - } - for (i = 0;i < 64;++i) out[i] = m[i] ^ output[i]; - bytes -= 64; - out += 64; - m += 64; + // bytes is now guaranteed to be between 1 and 63 + salsa20_wordtobyte(output,x, r); + x[12] = PLUSONE(x[12]); + if (!x[12]) { + x[13] = PLUSONE(x[13]); + /* stopping at 2^70 bytes per nonce is user's responsibility */ } + + for (i = 0;i < bytes;++i) out[i] = m[i] ^ output[i]; } #endif diff --git a/src/Crypto/config.h b/src/Crypto/config.h index 8e1e41fc..867c13dd 100644 --- a/src/Crypto/config.h +++ b/src/Crypto/config.h @@ -113,13 +113,13 @@ #define CRYPTOPP_X64_ASM_AVAILABLE #endif -#if !defined(CRYPTOPP_DISABLE_SSE2) && (defined(CRYPTOPP_MSVC6PP_OR_LATER) || defined(__SSE2__)) && !defined(_M_ARM) && !defined(_M_ARM64) +#if !defined(CRYPTOPP_DISABLE_SSE2) && (defined(CRYPTOPP_MSVC6PP_OR_LATER) || defined(__SSE2__)) && !defined(_M_ARM) && !defined(_M_ARM64) && !defined(__arm__) && !defined(__aarch64__) && !defined(__arm64__) #define CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE 1 #else #define CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE 0 #endif -#if !defined(CRYPTOPP_DISABLE_ASM) && !defined(CRYPTOPP_DISABLE_SSSE3) && !defined(_M_ARM) && !defined(_M_ARM64) && ( \ +#if !defined(CRYPTOPP_DISABLE_ASM) && !defined(CRYPTOPP_DISABLE_SSSE3) && !defined(_M_ARM) && !defined(_M_ARM64) && !defined(__arm__) && !defined(__aarch64__) && !defined(__arm64__) && ( \ defined(__SSSE3__) || (_MSC_VER >= 1500) || \ (CRYPTOPP_GCC_VERSION >= 40300) || (__INTEL_COMPILER >= 1000) || (__SUNPRO_CC >= 0x5110) || \ (CRYPTOPP_LLVM_CLANG_VERSION >= 20300) || (CRYPTOPP_APPLE_CLANG_VERSION >= 40000)) @@ -128,6 +128,12 @@ #define CRYPTOPP_SSSE3_AVAILABLE 0 # endif +#if !defined(CRYPTOPP_DISABLE_SSSE3) && (defined(__SSSE3__) || (_MSC_VER >= 1500)) && !defined(_M_ARM) && !defined(_M_ARM64) && !defined(__arm__) && !defined(__aarch64__) && !defined(__arm64__) + #define CRYPTOPP_BOOL_SSSE3_INTRINSICS_AVAILABLE 1 +#else + #define CRYPTOPP_BOOL_SSSE3_INTRINSICS_AVAILABLE 0 +#endif + #if !defined(CRYPTOPP_DISABLE_SSSE3) && !defined(CRYPTOPP_DISABLE_AESNI) && CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && (CRYPTOPP_GCC_VERSION >= 40400 || _MSC_FULL_VER >= 150030729 || __INTEL_COMPILER >= 1110 || defined(__AES__)) #define CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE 1 #else @@ -140,7 +146,7 @@ #define CRYPTOPP_BOOL_ALIGN16 0 #endif -#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE && (defined(__SSE4_1__) || defined(__INTEL_COMPILER) || defined(_MSC_VER)) +#if CRYPTOPP_BOOL_SSSE3_INTRINSICS_AVAILABLE && (defined(__SSE4_1__) || defined(__INTEL_COMPILER) || defined(_MSC_VER)) #define CRYPTOPP_BOOL_SSE41_INTRINSICS_AVAILABLE 1 #else #define CRYPTOPP_BOOL_SSE41_INTRINSICS_AVAILABLE 0 diff --git a/src/Crypto/cpu.c b/src/Crypto/cpu.c index 99b81700..effde6ba 100644 --- a/src/Crypto/cpu.c +++ b/src/Crypto/cpu.c @@ -280,7 +280,7 @@ static int Detect_MS_HyperV_AES () // when Hyper-V is enabled on older versions of Windows Server (i.e. 2008 R2), the AES-NI capability // gets masked out for all applications, even running on the host. // We try to detect Hyper-V virtual CPU and perform a dummy AES-NI operation to check its real presence - uint32 cpuid[4]; + uint32 cpuid[4] = {0}; char HvProductName[13]; CpuId(0x40000000, cpuid); @@ -330,7 +330,9 @@ void DetectX86Features() g_hasSSE42 = g_hasSSE2 && (cpuid1[2] & (1 << 20)); g_hasSSE41 = g_hasSSE2 && (cpuid1[2] & (1 << 19)); g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9)); +#ifndef CRYPTOPP_DISABLE_AESNI g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25)); +#endif g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1)); #if !defined (_UEFI) && ((defined(__AES__) && defined(__PCLMUL__)) || defined(__INTEL_COMPILER) || CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE) @@ -346,7 +348,6 @@ void DetectX86Features() g_hasISSE = 1; else { - uint32 cpuid2[4]; CpuId(0x080000000, cpuid2); if (cpuid2[0] >= 0x080000001) { diff --git a/src/Crypto/cpu.h b/src/Crypto/cpu.h index e4e05a0c..2661bf1c 100644 --- a/src/Crypto/cpu.h +++ b/src/Crypto/cpu.h @@ -134,6 +134,11 @@ extern __m128i _mm_slli_epi16(__m128i _A, int _Count); extern __m128i _mm_shuffle_epi32 (__m128i a, int imm8); extern __m128i _mm_set_epi64x (__int64 e1, __int64 e0); extern __m128i _mm_set1_epi64x (__int64 a); +extern __m128i _mm_castps_si128(__m128); +extern __m128 _mm_castsi128_ps(__m128i); +extern __m128 _mm_shuffle_ps(__m128 _A, __m128 _B, unsigned int _Imm8); +extern __m128i _mm_srli_si128(__m128i _A, int _Imm); +extern __m128i _mm_slli_si128(__m128i _A, int _Imm); #define _mm_xor_si64 _m_pxor #define _mm_empty _m_empty #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) (((fp3) << 6) | ((fp2) << 4) | \ @@ -147,13 +152,13 @@ extern __m128i _mm_set1_epi64x (__int64 a); #endif #endif -#if CRYPTOPP_SSSE3_AVAILABLE || defined(__INTEL_COMPILER) -#if defined (_MSC_VER) && !defined (TC_WINDOWS_BOOT) +#if CRYPTOPP_BOOL_SSSE3_INTRINSICS_AVAILABLE || defined(__INTEL_COMPILER) #if defined(TC_WINDOWS_DRIVER) || defined (_UEFI) #if defined(__cplusplus) extern "C" { #endif extern __m128i _mm_shuffle_epi8 (__m128i a, __m128i b); +extern __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int n); #if defined(__cplusplus) } #endif @@ -162,13 +167,14 @@ extern __m128i _mm_shuffle_epi8 (__m128i a, __m128i b); #endif #endif -#if defined(__SSE4_1__) || defined(__INTEL_COMPILER) || defined(_MSC_VER) +#if CRYPTOPP_BOOL_SSE41_INTRINSICS_AVAILABLE || defined(__INTEL_COMPILER) #if defined(TC_WINDOWS_DRIVER) || defined (_UEFI) #if defined(__cplusplus) extern "C" { #endif extern int _mm_extract_epi32(__m128i src, const int ndx); extern __m128i _mm_insert_epi32(__m128i dst, int s, const int ndx); +extern __m128i _mm_blend_epi16 (__m128i v1, __m128i v2, const int mask); #if defined(_M_X64) extern __m128i _mm_insert_epi64(__m128i dst, __int64 s, const int ndx); #endif @@ -200,7 +206,6 @@ extern __m128i _mm_aesdeclast_si128(__m128i v, __m128i rkey); #include <wmmintrin.h> #endif #endif -#endif #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 @@ -209,6 +214,9 @@ extern "C" { #endif #define CRYPTOPP_CPUID_AVAILABLE +#if !defined(CRYPTOPP_DISABLE_AESNI) && !defined(WOLFCRYPT_BACKEND) +#define TC_AES_HW_CPU +#endif // these should not be used directly extern volatile int g_x86DetectionDone; @@ -230,7 +238,7 @@ extern volatile int g_isIntel; extern volatile int g_isAMD; extern volatile uint32 g_cacheLineSize; void DetectX86Features(); // must be called at the start of the program/driver -int CpuId(uint32 input, uint32 *output); +int CpuId(uint32 input, uint32 output[4]); // disable all CPU extended features (e.g. SSE, AVX, AES) that may have // been enabled by DetectX86Features. void DisableCPUExtendedFeatures (); diff --git a/src/Crypto/gost89_x64.asm b/src/Crypto/gost89_x64.asm deleted file mode 100644 index 5f5892fe..00000000 --- a/src/Crypto/gost89_x64.asm +++ /dev/null @@ -1,483 +0,0 @@ -; -; GOST89 implementation x64 -; -; Copyright (c) 2016. Disk Cryptography Services for EFI (DCS), Alex Kolotnikov -; -; This program and the accompanying materials -; are licensed and made available under the terms and conditions -; of the Apache License, Version 2.0. -; -; The full text of the license may be found at -; https://opensource.org/licenses/Apache-2.0 -; -; Some ideas from article https://xakep.ru/2013/10/19/shifrovanie-gost-28147-89/ -; - -[section .bss align=16] - -;/////////////////////////////////////////////////////////////////// -;// Win64 registers to save -;/////////////////////////////////////////////////////////////////// -%macro SaveRegs 0 - sub rsp,8*8+10*16 - mov [rsp], rbx - mov [rsp+8], rbp - mov [rsp+8*2], rdi - mov [rsp+8*3], rsi - mov [rsp+8*4], r12 - mov [rsp+8*5], r13 - mov [rsp+8*6], r14 - mov [rsp+8*7], r15 -%endmacro - -%macro RestoreRegs 0 - mov rbx, [rsp] - mov rbp, [rsp+8] - mov rdi, [rsp+8*2] - mov rsi, [rsp+8*3] - mov r12, [rsp+8*4] - mov r13, [rsp+8*5] - mov r14, [rsp+8*6] - mov r15, [rsp+8*7] - add rsp,8*8+10*16 -%endmacro - -[section .text align=16] -;/////////////////////////////////////////////////////////////////// -;// Crypting 2 blocks -;/////////////////////////////////////////////////////////////////// -%macro gost_round2 2 ; 1 - pos1, 2 - pos2 - ; 1st - ; 1-2 byte - add ecx, r13d ; add key - movzx edi, cl - movzx esi, ch - xor r14d, dword [r8 + 32 + 256*3*4 + rdi*4] - xor r14d, dword [r8 + 32 + 256*2*4 + rsi*4] - shr ecx, 16 - ; 3-4 байт - movzx edi, cl - xor r14d, dword [r8 + 32 + 256*4 + rdi*4] - movzx esi, ch - xor r14d, dword [r8 + 32 + rsi*4] - mov edx, [r8 + %1*4] ; read key for second step - - ; 2nd - ; 1-2 byte - add eax, r10d ; read key - movzx r15d, al - movzx ebp, ah - xor r11d, dword [r8 + 32 + 256*3*4 + r15*4] - xor r11d, dword [r8 + 32 + 256*2*4 + rbp*4] - shr eax, 16 - ; 3-4 байт - movzx r15d, al - xor r11d, dword [r8 + 32 + 256*4 + r15*4] - movzx ebp, ah - xor r11d, dword [r8 + 32 + rbp*4] - mov ebx, [r8 + %1*4] ; read key for second step - - ; second step - ; 1st - ; 1-2 byte - add edx, r14d ; add key - movzx edi, dl - movzx esi, dh - xor r13d, dword [r8 + 32 + 256*3*4 + rdi*4] - xor r13d, dword [r8 + 32 + 256*2*4 + rsi*4] - shr edx, 16 - ; 3-4 байт - movzx edi, dl - xor r13d, dword [r8 + 32 + 256*4 + rdi*4] - movzx esi, dh - xor r13d, dword [r8 + 32 + rsi*4] - mov ecx, [r8 + %2*4] ; read key - - ; 2nd - ; 1-2 byte - add ebx, r11d; ; add key - movzx r15d, bl; - movzx ebp, bh; - xor r10d, dword [r8 + 32 + 256*3*4 + r15*4] - xor r10d, dword [r8 + 32 + 256*2*4 + rbp*4] - shr ebx, 16 - ; 3-4 байт - movzx r15d, bl - xor r10d, dword [r8 + 32 + 256*4 + r15*4] - movzx ebp, bh - xor r10d, dword [r8 + 32 + rbp*4] - mov eax, [r8 + %2*4] ; read key -%endmacro - -; input: r8 - &key, rcx - &IN -; returns: (r13) & (r10) -GostEncrypt2x64: - ; 1st - mov r13d, [rcx] - mov r14, [rcx] - shr r14, 32 - - ; 2nd - mov r10d, [rcx + 16] - mov r11, [rcx + 16] - shr r11, 32 - - mov ecx, [r8] - mov eax, ecx - - gost_round2 1, 2 - gost_round2 3, 4 - gost_round2 5, 6 - gost_round2 7, 0 - - gost_round2 1, 2 - gost_round2 3, 4 - gost_round2 5, 6 - gost_round2 7, 0 - - gost_round2 1, 2 - gost_round2 3, 4 - gost_round2 5, 6 - gost_round2 7, 7 - - gost_round2 6, 5 - gost_round2 4, 3 - gost_round2 2, 1 - gost_round2 0, 0 - - shl r13, 32 ; combine - or r13, r14 - - shl r10, 32 ; combine - or r10, r11 - ret - -; input: r8 - &key, rcx - &IN -; returns: (r13) & (r10) -GostDecrypt2x64: - ; 1st - mov r13d, [rcx] - mov r14, [rcx] - shr r14, 32 - - ; 2nd - mov r10d, [rcx + 16] - mov r11, [rcx + 16] - shr r11, 32 - - mov ecx, [r8] - mov eax, ecx - - gost_round2 1, 2 - gost_round2 3, 4 - gost_round2 5, 6 - gost_round2 7, 7 - - gost_round2 6, 5 - gost_round2 4, 3 - gost_round2 2, 1 - gost_round2 0, 7 - - gost_round2 6, 5 - gost_round2 4, 3 - gost_round2 2, 1 - gost_round2 0, 7 - - gost_round2 6, 5 - gost_round2 4, 3 - gost_round2 2, 1 - gost_round2 0, 0 - - shl r13, 32 ; combine - or r13, r14 - - shl r10, 32 ; combine - or r10, r11 -ret - -;/////////////////////////////////////////////////////////////////// -;// Crypting 1 block -;/////////////////////////////////////////////////////////////////// -%macro gost_round1 2 ; 1 - pos1, 2 - pos2 - ; 1-2 byte - add ecx, r13d ; add key - movzx edi, cl - movzx esi, ch - xor r14d, dword [r8 + 32 + 256*3*4 + rdi*4] - xor r14d, dword [r8 + 32 + 256*2*4 + rsi*4] - shr ecx, 16 - ; 3-4 байт - movzx edi, cl - xor r14d, dword [r8 + 32 + 256*4 + rdi*4] - movzx esi, ch - xor r14d, dword [r8 + 32 + rsi*4] - mov edx, [r8 + %1*4] ; read key for second step - - ; second step - ; 1-2 byte - add edx, r14d ; add key - movzx edi, dl - movzx esi, dh - xor r13d, dword [r8 + 32 + 256*3*4 + rdi*4] - xor r13d, dword [r8 + 32 + 256*2*4 + rsi*4] - shr edx, 16 - ; 3-4 байт - movzx edi, dl - xor r13d, dword [r8 + 32 + 256*4 + rdi*4] - movzx esi, dh - xor r13d, dword [r8 + 32 + rsi*4] - mov ecx, [r8 + %2*4] ; read key -%endmacro - -; input: r8 - &gost_kds rcx - &IN -; returns: r13 -GostEncrypt1x64: - mov r13d, [rcx] - mov r14, [rcx] - shr r14, 32 - mov ecx, [r8] - - gost_round1 1, 2 - gost_round1 3, 4 - gost_round1 5, 6 - gost_round1 7, 0 - - gost_round1 1, 2 - gost_round1 3, 4 - gost_round1 5, 6 - gost_round1 7, 0 - - gost_round1 1, 2 - gost_round1 3, 4 - gost_round1 5, 6 - gost_round1 7, 7 - - gost_round1 6, 5 - gost_round1 4, 3 - gost_round1 2, 1 - gost_round1 0, 0 - - shl r13, 32 ; combine - or r13, r14 -ret - -; input: r8 - &gost_kds rcx - IN -; returns: r13 -GostDecrypt1x64: - mov r13d, [rcx] - mov r14, [rcx] - shr r14, 32 - mov ecx, [r8] - - gost_round1 1, 2 - gost_round1 3, 4 - gost_round1 5, 6 - gost_round1 7, 7 - - gost_round1 6, 5 - gost_round1 4, 3 - gost_round1 2, 1 - gost_round1 0, 7 - - gost_round1 6, 5 - gost_round1 4, 3 - gost_round1 2, 1 - gost_round1 0, 7 - - gost_round1 6, 5 - gost_round1 4, 3 - gost_round1 2, 1 - gost_round1 0, 0 - - shl r13, 32 ; combine - or r13, r14 -ret - -global gost_encrypt_128_CBC_asm ; gost_encrypt_128_CBC_asm(uint64* in, uint64* out, gost_kds* kds, uint64 count); -; rcx - &in -; rdx - &out -; r8 - &gost_kds -; r9 - count -gost_encrypt_128_CBC_asm: - SaveRegs ; Saving - - sub rsp, 32 - mov [rsp], rdx ; Save out addr - mov [rsp + 8], rcx ; Save in addr - mov [rsp + 16], r8 ; key addr - -.do: - mov [rsp + 24], r9 ; Save count - cmp r9, 2 - jge .blk2 - cmp r9, 1 - jge .blk1 - jmp .end - -; One 128 block encryption -.blk1: - mov rcx, [rsp + 8] ; set in addr - call GostEncrypt1x64 - - mov rdx, [rsp] ; Restore out - mov rcx, [rsp + 8] ; restore in - - mov [rdx], r13 - mov rax, [rcx + 8] - xor rax, r13 ; CBC - - add rdx, 8 ;next 8 bytes - mov [rdx], rax - - mov rcx, rdx - call GostEncrypt1x64 - - mov rdx, [rsp] ; Restore out addr - mov rcx, [rsp+8] ; Restore in addr - - mov [rdx + 8], r13 - - add rdx,16 - mov [rsp], rdx - - add rcx, 16 - mov [rsp+8], rcx - - mov r9, [rsp + 24] - dec r9 - - jmp .do - -.blk2: - mov rcx, [rsp + 8] ; set in addr - call GostEncrypt2x64 - - mov rdx, [rsp] ; Restore out - mov rcx, [rsp + 8] ; restore in - - mov [rdx], r13 - - mov rax, [rcx + 8] - xor rax, r13 ; CBC - - mov [rdx + 16], r10 - - mov rbx, [rcx + 24] - xor rbx, r10 ; CBC - - mov [rdx + 8], rax - mov [rdx + 24], rbx - - add rdx, 8 ;next 8 bytes - - mov rcx, rdx - call GostEncrypt2x64 - - mov rdx, [rsp] ; Restore out addr - mov rcx, [rsp+8] ; Restore in addr - - mov [rdx + 8], r13 - mov [rdx + 24], r10 - - add rdx,32 - mov [rsp], rdx - - add rcx, 32 - mov [rsp+8], rcx - - mov r9, [rsp + 24] - sub r9, 2 - - jmp .do - -.end: - add rsp, 32 ; Load out addr - RestoreRegs ; Load -ret - -global gost_decrypt_128_CBC_asm ; gost_decrypt_128_CBC_asm(uint64* in, uint64* out, const gost_kds* kds, uint64 count); -; rcx - &in -; rdx - &out -; r8 - &gost_kds -; r9 - count -gost_decrypt_128_CBC_asm: - SaveRegs ; Saving - - sub rsp, 32 - mov [rsp], rdx ; Save out addr - mov [rsp+8], rcx ; Save in addr - mov [rsp+16], r8 ; key addr - -.do: - mov [rsp + 24], r9 ; Save count - cmp r9, 2 - jge .blk2 - cmp r9, 1 - jge .blk1 - jmp .end - -; One 128 block decryption -.blk1: - add rcx, 8 - call GostDecrypt1x64 - mov rdx, [rsp] ; Restore out - mov rcx, [rsp + 8] ; Restore in - mov rax, [rcx] - xor rax, r13 ; CBC - mov [rdx + 8], rax - - call GostDecrypt1x64 - - mov rdx, [rsp] ; Restore out addr - mov rcx, [rsp+8] ; Restore in addr - - mov [rdx], r13 - - add rdx,16 - mov [rsp], rdx - - add rcx, 16 - mov [rsp+8], rcx - - mov r9, [rsp + 24] - dec r9 - - jmp .do - -.blk2: - add rcx, 8 - call GostDecrypt2x64 - mov rdx, [rsp] ; Restore out - mov rcx, [rsp + 8] ; Restore in - - mov rax, [rcx] - xor rax, r13 ; CBC - mov [rdx + 8], rax - - mov rbx, [rcx+16] - xor rbx, r10 ; CBC - mov [rdx + 24], rbx - - call GostDecrypt2x64 - - mov rdx, [rsp] ; Restore out addr - mov rcx, [rsp+8] ; Restore in addr - - mov [rdx], r13 - mov [rdx+16], r10 - - add rdx,32 - mov [rsp], rdx - - add rcx,32 - mov [rsp+8], rcx - - mov r9, [rsp + 24] - sub r9, 2 - - jmp .do - -.end: - add rsp, 32 ; Load out addr - RestoreRegs ; Load -ret diff --git a/src/Crypto/jitterentropy-base-user.h b/src/Crypto/jitterentropy-base-user.h index 3a33dcd6..aaefb41a 100644 --- a/src/Crypto/jitterentropy-base-user.h +++ b/src/Crypto/jitterentropy-base-user.h @@ -79,12 +79,14 @@ static VC_INLINE void jent_get_nstime(uint64 *out) #endif * out = v.QuadPart; #else - *out = __rdtsc();; + *out = __rdtsc(); #endif } #else +#include <sys/types.h> + #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 /* taken from Linux kernel */ diff --git a/src/Crypto/jitterentropy-base.c b/src/Crypto/jitterentropy-base.c index c3856b41..b7512532 100644 --- a/src/Crypto/jitterentropy-base.c +++ b/src/Crypto/jitterentropy-base.c @@ -53,7 +53,7 @@ #ifdef TC_WINDOWS_DRIVER -#define UINT64_MAX 0xffffffffffffffffU +#define UINT64_MAX 0xffffffffffffffffui64 #else #include <stdint.h> #endif @@ -345,7 +345,7 @@ static uint64_t jent_loop_shuffle(struct rand_data *ec, * We add a lower boundary value to ensure we have a minimum * RNG loop count. */ - return (shuffle + (1<<min)); + return (shuffle + (1ULL<<min)); } /** diff --git a/src/Crypto/misc.h b/src/Crypto/misc.h index 47d0288a..25313d1d 100644 --- a/src/Crypto/misc.h +++ b/src/Crypto/misc.h @@ -151,7 +151,7 @@ VC_INLINE uint64 ByteReverseWord64(uint64 value) #endif } -VC_INLINE void CorrectEndianess(uint64 *out, const uint64 *in, size_t byteCount) +VC_INLINE void CorrectEndianness(uint64 *out, const uint64 *in, size_t byteCount) { size_t i, count = byteCount/sizeof(uint64); diff --git a/src/Crypto/wolfCrypt.c b/src/Crypto/wolfCrypt.c new file mode 100644 index 00000000..39ab93a7 --- /dev/null +++ b/src/Crypto/wolfCrypt.c @@ -0,0 +1,243 @@ +/* See src/Crypto/wolfCrypt.md */ + +#include "Aes.h" +#include "Sha2.h" +#include "../Common/Crypto.h" +#include <wolfssl/wolfcrypt/hmac.h> + + +AES_RETURN aes_init() +{ +#if defined( AES_ERR_CHK ) + return EXIT_SUCCESS; +#else + return; +#endif +} + +AES_RETURN aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]) +{ + int ret = 0; + + ret = wc_AesInit(&cx->wc_enc_aes, NULL, INVALID_DEVID); + + if (key_len == 128 || key_len == 192 || key_len == 256) + key_len = key_len/8; + + if (ret == 0) { + ret = wc_AesSetKey(&cx->wc_enc_aes, key, key_len, NULL, AES_ENCRYPTION); + } + +#if defined( AES_ERR_CHK ) + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +#else + return; +#endif +} + +AES_RETURN aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]) +{ + int ret = 0; + + ret = wc_AesInit(&cx->wc_dec_aes, NULL, INVALID_DEVID); + + if (key_len == 128 || key_len == 192 || key_len == 256) + key_len = key_len/8; + + if (ret == 0) { + ret = wc_AesSetKey(&cx->wc_dec_aes, key, key_len, NULL, AES_DECRYPTION); + } + +#if defined( AES_ERR_CHK ) + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +#else + return; +#endif +} + +AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ + return aes_encrypt_key(key, 128, cx); +} + +AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ + return aes_encrypt_key(key, 192, cx); +} + +AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ + return aes_encrypt_key(key, 256, cx); +} + +AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ + return aes_decrypt_key(key, 128, cx); +} + +AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ + return aes_decrypt_key(key, 192, cx); +} + +AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ + return aes_decrypt_key(key, 256, cx); +} + +AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]) +{ + int ret = wc_AesEncryptDirect(&cx->wc_enc_aes, out, in); +#if defined( AES_ERR_CHK ) + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +#else + return; +#endif + +} + +AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]) +{ + int ret = wc_AesDecryptDirect(&cx->wc_dec_aes, out, in); +#if defined( AES_ERR_CHK ) + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +#else + return; +#endif + +} + +AES_RETURN xts_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]) +{ + int ret = 0; + + cx->wc_enc_xts.aes = cx->wc_enc_aes; + + ret = wc_AesInit(&cx->wc_enc_xts.tweak, NULL, INVALID_DEVID); + + if (key_len == 128 || key_len == 192 || key_len == 256) + key_len = key_len/8; + + if (ret == 0) { + ret = wc_AesSetKey(&cx->wc_enc_xts.tweak, key, key_len, NULL, AES_ENCRYPTION); + } +#if defined( AES_ERR_CHK ) + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +#else + return; +#endif +} + +AES_RETURN xts_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]) +{ + int ret = 0; + + cx->wc_dec_xts.aes = cx->wc_dec_aes; + + ret = wc_AesInit(&cx->wc_dec_xts.tweak, NULL, INVALID_DEVID); + + if (key_len == 128 || key_len == 192 || key_len == 256) + key_len = key_len/8; + + if (ret == 0) { + ret = wc_AesSetKey(&cx->wc_dec_xts.tweak, key, key_len, NULL, AES_ENCRYPTION); + } + +#if defined( AES_ERR_CHK ) + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +#else + return; +#endif +} + +AES_RETURN xts_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ + return xts_encrypt_key(key, 256, cx); +} + +AES_RETURN xts_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ + return xts_decrypt_key(key, 256, cx); +} + +AES_RETURN xts_encrypt(const unsigned char *in, unsigned char *out, word64 length, word64 sector, const aes_encrypt_ctx cx[1]) +{ + int ret = wc_AesXtsEncryptConsecutiveSectors(&cx->wc_enc_xts, out, in, length, sector, ENCRYPTION_DATA_UNIT_SIZE); + +#if defined( AES_ERR_CHK ) + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +#else + return; +#endif + +} + +AES_RETURN xts_decrypt(const unsigned char *in, unsigned char *out, word64 length, word64 sector, const aes_decrypt_ctx cx[1]) +{ + int ret = wc_AesXtsDecryptConsecutiveSectors(&cx->wc_dec_xts, out, in, length, sector, ENCRYPTION_DATA_UNIT_SIZE); + +#if defined( AES_ERR_CHK ) + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +#else + return; +#endif +} + + +void sha256_begin(sha256_ctx* ctx) +{ + wc_InitSha256(ctx); +} + +void sha256_hash(const unsigned char * source, uint_32t sourceLen, sha256_ctx *ctx) +{ + wc_Sha256Update(ctx, source, sourceLen); +} + +void sha256_end(unsigned char * result, sha256_ctx* ctx) +{ + wc_Sha256Final(ctx, result); +} + +void sha256(unsigned char * result, const unsigned char* source, uint_32t sourceLen) +{ + wc_Sha256 sha256; + wc_InitSha256(&sha256); + wc_Sha256Update(&sha256, source, sourceLen); + wc_Sha256Final(&sha256, result); + wc_Sha256Free(&sha256); +} + +void sha512_begin(sha512_ctx* ctx) +{ + wc_InitSha512(ctx); +} + +void sha512_hash(const unsigned char * source, uint_64t sourceLen, sha512_ctx *ctx) +{ + wc_Sha512Update(ctx, source, sourceLen); +} + +void sha512_end(unsigned char * result, sha512_ctx* ctx) +{ + wc_Sha512Final(ctx, result); +} + +void sha512(unsigned char * result, const unsigned char* source, uint_64t sourceLen) +{ + wc_Sha512 sha512; + wc_InitSha512(&sha512); + wc_Sha512Update(&sha512, source, sourceLen); + wc_Sha512Final(&sha512, result); + wc_Sha512Free(&sha512); +} + +void derive_key_sha512 (char *pwd, int pwd_len, char *salt, int salt_len, uint32 iterations, char *dk, int dklen) { + (void) iterations; + wc_HKDF(WC_SHA512, (byte*)pwd, (word32)pwd_len, (byte*)salt, (word32)salt_len, NULL, 0, (byte*)dk, (word32)dklen); +} + +void derive_key_sha256 (char *pwd, int pwd_len, char *salt, int salt_len, uint32 iterations, char *dk, int dklen) { + (void) iterations; + wc_HKDF(WC_SHA256, (byte*)pwd, (word32)pwd_len, (byte*)salt, (word32)salt_len, NULL, 0, (byte*)dk, (word32)dklen); +} diff --git a/src/Crypto/wolfCrypt.md b/src/Crypto/wolfCrypt.md new file mode 100644 index 00000000..32ccf242 --- /dev/null +++ b/src/Crypto/wolfCrypt.md @@ -0,0 +1,25 @@ +# wolfSSL as crypto provider for VeraCrypt + +[wolfCrypt](https://www.wolfssl.com/products/wolfcrypt/) is wolfSSL's cutting edge crypto engine and a +potential FIPS solution for users of VeraCrypt. Follow the steps below to setup VeraCrypt with wolfCrypt. + +## Building wolfSSL + +Clone wolfSSL and build it as shown below. + +``` +git clone https://github.com/wolfssl/wolfssl && cd wolfssl +./autogen.sh +./configure --enable-xts CFLAGS="-DNO_OLD_WC_NAMES" +make +sudo make install +``` + +## Building VeraCrypt with wolfSSL + +Build VeraCrypt with the `WOLFCRYPT` command line option. + +``` +make WXSTATIC=1 wxbuild && make WXSTATIC=1 clean && make WXSTATIC=1 WOLFCRYPT=1 && make WXSTATIC=1 WOLFCRYPT=1 package +``` + |