From a239789317646e3b0fa69a48af97144c49d67c79 Mon Sep 17 00:00:00 2001 From: Mounir IDRASSI Date: Mon, 21 Mar 2022 00:23:48 +0100 Subject: Add missing Blake2s source files --- src/Crypto/blake2-impl.h | 86 ++++++++++ src/Crypto/blake2.h | 102 ++++++++++++ src/Crypto/blake2s-load-sse2.h | 64 ++++++++ src/Crypto/blake2s-load-sse41.h | 240 +++++++++++++++++++++++++++ src/Crypto/blake2s-ref.c | 336 ++++++++++++++++++++++++++++++++++++++ src/Crypto/blake2s-round.h | 159 ++++++++++++++++++ src/Crypto/blake2s.c | 349 ++++++++++++++++++++++++++++++++++++++++ src/Crypto/blake2s_SSE2.c | 39 +++++ src/Crypto/blake2s_SSE41.c | 52 ++++++ src/Crypto/blake2s_SSSE3.c | 47 ++++++ 10 files changed, 1474 insertions(+) create mode 100644 src/Crypto/blake2-impl.h create mode 100644 src/Crypto/blake2.h create mode 100644 src/Crypto/blake2s-load-sse2.h create mode 100644 src/Crypto/blake2s-load-sse41.h create mode 100644 src/Crypto/blake2s-ref.c create mode 100644 src/Crypto/blake2s-round.h create mode 100644 src/Crypto/blake2s.c create mode 100644 src/Crypto/blake2s_SSE2.c create mode 100644 src/Crypto/blake2s_SSE41.c create mode 100644 src/Crypto/blake2s_SSSE3.c diff --git a/src/Crypto/blake2-impl.h b/src/Crypto/blake2-impl.h new file mode 100644 index 00000000..ad9d88fe --- /dev/null +++ b/src/Crypto/blake2-impl.h @@ -0,0 +1,86 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#ifndef BLAKE2_IMPL_H +#define BLAKE2_IMPL_H + +#define NATIVE_LITTLE_ENDIAN + +#include +#include + +#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) + #if defined(_MSC_VER) + #define BLAKE2_INLINE __inline + #elif defined(__GNUC__) + #define BLAKE2_INLINE __inline__ + #else + #define BLAKE2_INLINE + #endif +#else + #define BLAKE2_INLINE inline +#endif + +static BLAKE2_INLINE uint32_t load32( const void *src ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + uint32_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = ( const uint8_t * )src; + return (( uint32_t )( p[0] ) << 0) | + (( uint32_t )( p[1] ) << 8) | + (( uint32_t )( p[2] ) << 16) | + (( uint32_t )( p[3] ) << 24) ; +#endif +} + +static BLAKE2_INLINE void store16( void *dst, uint16_t w ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; +#endif +} + +static BLAKE2_INLINE void store32( void *dst, uint32_t w ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); +#endif +} + +static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c ) +{ + return ( w >> c ) | ( w << ( 32 - c ) ); +} + +/* prevents compiler optimizing out memset() */ +static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n) +{ + static void *(*const volatile memset_v)(void *, int, size_t) = &memset; + memset_v(v, 0, n); +} + +#endif diff --git a/src/Crypto/blake2.h b/src/Crypto/blake2.h new file mode 100644 index 00000000..490ed43b --- /dev/null +++ b/src/Crypto/blake2.h @@ -0,0 +1,102 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + +#ifndef BLAKE2_H +#define BLAKE2_H +#include "Common/Tcdefs.h" + +#if defined(_MSC_VER) +#ifdef TC_WINDOWS_BOOT +#define BLAKE2_PACKED(x) x +#else +#define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop)) +#endif + +#else +#define BLAKE2_PACKED(x) x __attribute__((packed)) +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + + enum blake2s_constant + { + BLAKE2S_BLOCKBYTES = 64, + BLAKE2S_OUTBYTES = 32, + BLAKE2S_KEYBYTES = 32, + BLAKE2S_SALTBYTES = 8, + BLAKE2S_PERSONALBYTES = 8 + }; + + typedef struct blake2s_state__ + { + uint32 h[8]; + uint32 t[2]; + uint32 f[2]; + uint8 buf[BLAKE2S_BLOCKBYTES]; + size_t buflen; + size_t outlen; + uint8 last_node; + } blake2s_state; + +#ifdef TC_WINDOWS_BOOT + #pragma pack(1) +#endif + + BLAKE2_PACKED(struct blake2s_param__ + { + uint8 digest_length; /* 1 */ + uint8 key_length; /* 2 */ + uint8 fanout; /* 3 */ + uint8 depth; /* 4 */ + uint32 leaf_length; /* 8 */ + uint32 node_offset; /* 12 */ + uint16 xof_length; /* 14 */ + uint8 node_depth; /* 15 */ + uint8 inner_length; /* 16 */ + /* uint8 reserved[0]; */ + uint8 salt[BLAKE2S_SALTBYTES]; /* 24 */ + uint8 personal[BLAKE2S_PERSONALBYTES]; /* 32 */ + }); + +#ifdef TC_WINDOWS_BOOT + #pragma pack() +#endif + + typedef struct blake2s_param__ blake2s_param; + + + /* Padded structs result in a compile-time error */ + enum { + BLAKE2_DUMMY_1 = 1/(int)(sizeof(blake2s_param) == BLAKE2S_OUTBYTES) + }; + + /* Streaming API */ + void blake2s_init( blake2s_state *S ); + void blake2s_init_param( blake2s_state *S, const blake2s_param *P ); + void blake2s_update( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final( blake2s_state *S, unsigned char *out ); + + /* Simple API */ + int blake2s( void *out, const void *in, size_t inlen ); + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/src/Crypto/blake2s-load-sse2.h b/src/Crypto/blake2s-load-sse2.h new file mode 100644 index 00000000..926eedda --- /dev/null +++ b/src/Crypto/blake2s-load-sse2.h @@ -0,0 +1,64 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + + +#ifndef BLAKE2S_LOAD_SSE2_H +#define BLAKE2S_LOAD_SSE2_H + +#define LOAD_MSG_0_1(buf) buf = _mm_set_epi32(m6,m4,m2,m0) +#define LOAD_MSG_0_2(buf) buf = _mm_set_epi32(m7,m5,m3,m1) +#define LOAD_MSG_0_3(buf) buf = _mm_set_epi32(m12,m10,m8,m14) +#define LOAD_MSG_0_4(buf) buf = _mm_set_epi32(m13,m11,m9,m15) +#define LOAD_MSG_1_1(buf) buf = _mm_set_epi32(m13,m9,m4,m14) +#define LOAD_MSG_1_2(buf) buf = _mm_set_epi32(m6,m15,m8,m10) +#define LOAD_MSG_1_3(buf) buf = _mm_set_epi32(m11,m0,m1,m5) +#define LOAD_MSG_1_4(buf) buf = _mm_set_epi32(m7,m2,m12,m3) +#define LOAD_MSG_2_1(buf) buf = _mm_set_epi32(m15,m5,m12,m11) +#define LOAD_MSG_2_2(buf) buf = _mm_set_epi32(m13,m2,m0,m8) +#define LOAD_MSG_2_3(buf) buf = _mm_set_epi32(m7,m3,m10,m9) +#define LOAD_MSG_2_4(buf) buf = _mm_set_epi32(m1,m6,m14,m4) +#define LOAD_MSG_3_1(buf) buf = _mm_set_epi32(m11,m13,m3,m7) +#define LOAD_MSG_3_2(buf) buf = _mm_set_epi32(m14,m12,m1,m9) +#define LOAD_MSG_3_3(buf) buf = _mm_set_epi32(m4,m5,m2,m15) +#define LOAD_MSG_3_4(buf) buf = _mm_set_epi32(m0,m10,m6,m8) +#define LOAD_MSG_4_1(buf) buf = _mm_set_epi32(m10,m2,m5,m9) +#define LOAD_MSG_4_2(buf) buf = _mm_set_epi32(m15,m4,m7,m0) +#define LOAD_MSG_4_3(buf) buf = _mm_set_epi32(m6,m11,m14,m3) +#define LOAD_MSG_4_4(buf) buf = _mm_set_epi32(m8,m12,m1,m13) +#define LOAD_MSG_5_1(buf) buf = _mm_set_epi32(m8,m0,m6,m2) +#define LOAD_MSG_5_2(buf) buf = _mm_set_epi32(m3,m11,m10,m12) +#define LOAD_MSG_5_3(buf) buf = _mm_set_epi32(m15,m7,m4,m1) +#define LOAD_MSG_5_4(buf) buf = _mm_set_epi32(m14,m5,m13,m9) +#define LOAD_MSG_6_1(buf) buf = _mm_set_epi32(m4,m14,m1,m12) +#define LOAD_MSG_6_2(buf) buf = _mm_set_epi32(m10,m13,m15,m5) +#define LOAD_MSG_6_3(buf) buf = _mm_set_epi32(m9,m6,m0,m8) +#define LOAD_MSG_6_4(buf) buf = _mm_set_epi32(m2,m3,m7,m11) +#define LOAD_MSG_7_1(buf) buf = _mm_set_epi32(m3,m12,m7,m13) +#define LOAD_MSG_7_2(buf) buf = _mm_set_epi32(m9,m1,m14,m11) +#define LOAD_MSG_7_3(buf) buf = _mm_set_epi32(m8,m15,m5,m2) +#define LOAD_MSG_7_4(buf) buf = _mm_set_epi32(m6,m4,m0,m10) +#define LOAD_MSG_8_1(buf) buf = _mm_set_epi32(m0,m11,m14,m6) +#define LOAD_MSG_8_2(buf) buf = _mm_set_epi32(m8,m3,m9,m15) +#define LOAD_MSG_8_3(buf) buf = _mm_set_epi32(m1,m13,m12,m10) +#define LOAD_MSG_8_4(buf) buf = _mm_set_epi32(m4,m7,m2,m5) +#define LOAD_MSG_9_1(buf) buf = _mm_set_epi32(m1,m7,m8,m10) +#define LOAD_MSG_9_2(buf) buf = _mm_set_epi32(m5,m6,m4,m2) +#define LOAD_MSG_9_3(buf) buf = _mm_set_epi32(m3,m9,m15,m13) +#define LOAD_MSG_9_4(buf) buf = _mm_set_epi32(m12,m14,m11,m0) + + +#endif diff --git a/src/Crypto/blake2s-load-sse41.h b/src/Crypto/blake2s-load-sse41.h new file mode 100644 index 00000000..9ac3f52a --- /dev/null +++ b/src/Crypto/blake2s-load-sse41.h @@ -0,0 +1,240 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + + +#ifndef BLAKE2S_LOAD_SSE41_H +#define BLAKE2S_LOAD_SSE41_H + +#define LOAD_MSG_0_1(buf) \ +buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2,0,2,0))); + +#define LOAD_MSG_0_2(buf) \ +buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(3,1,3,1))); + +#define LOAD_MSG_0_3(buf) \ +t0 = _mm_shuffle_epi32(m2, _MM_SHUFFLE(3,2,0,1)); \ +t1 = _mm_shuffle_epi32(m3, _MM_SHUFFLE(0,1,3,2)); \ +buf = _mm_blend_epi16(t0, t1, 0xC3); + +#define LOAD_MSG_0_4(buf) \ +t0 = _mm_blend_epi16(t0, t1, 0x3C); \ +buf = _mm_shuffle_epi32(t0, _MM_SHUFFLE(2,3,0,1)); + +#define LOAD_MSG_1_1(buf) \ +t0 = _mm_blend_epi16(m1, m2, 0x0C); \ +t1 = _mm_slli_si128(m3, 4); \ +t2 = _mm_blend_epi16(t0, t1, 0xF0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3)); + +#define LOAD_MSG_1_2(buf) \ +t0 = _mm_shuffle_epi32(m2,_MM_SHUFFLE(0,0,2,0)); \ +t1 = _mm_blend_epi16(m1,m3,0xC0); \ +t2 = _mm_blend_epi16(t0, t1, 0xF0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1)); + +#define LOAD_MSG_1_3(buf) \ +t0 = _mm_slli_si128(m1, 4); \ +t1 = _mm_blend_epi16(m2, t0, 0x30); \ +t2 = _mm_blend_epi16(m0, t1, 0xF0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,0,1,2)); + +#define LOAD_MSG_1_4(buf) \ +t0 = _mm_unpackhi_epi32(m0,m1); \ +t1 = _mm_slli_si128(m3, 4); \ +t2 = _mm_blend_epi16(t0, t1, 0x0C); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,0,1,2)); + +#define LOAD_MSG_2_1(buf) \ +t0 = _mm_unpackhi_epi32(m2,m3); \ +t1 = _mm_blend_epi16(m3,m1,0x0C); \ +t2 = _mm_blend_epi16(t0, t1, 0x0F); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2)); + +#define LOAD_MSG_2_2(buf) \ +t0 = _mm_unpacklo_epi32(m2,m0); \ +t1 = _mm_blend_epi16(t0, m0, 0xF0); \ +t2 = _mm_slli_si128(m3, 8); \ +buf = _mm_blend_epi16(t1, t2, 0xC0); + +#define LOAD_MSG_2_3(buf) \ +t0 = _mm_blend_epi16(m0, m2, 0x3C); \ +t1 = _mm_srli_si128(m1, 12); \ +t2 = _mm_blend_epi16(t0,t1,0x03); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(0,3,2,1)); + +#define LOAD_MSG_2_4(buf) \ +t0 = _mm_slli_si128(m3, 4); \ +t1 = _mm_blend_epi16(m0, m1, 0x33); \ +t2 = _mm_blend_epi16(t1, t0, 0xC0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,3,0)); + +#define LOAD_MSG_3_1(buf) \ +t0 = _mm_unpackhi_epi32(m0,m1); \ +t1 = _mm_unpackhi_epi32(t0, m2); \ +t2 = _mm_blend_epi16(t1, m3, 0x0C); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2)); + +#define LOAD_MSG_3_2(buf) \ +t0 = _mm_slli_si128(m2, 8); \ +t1 = _mm_blend_epi16(m3,m0,0x0C); \ +t2 = _mm_blend_epi16(t1, t0, 0xC0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3)); + +#define LOAD_MSG_3_3(buf) \ +t0 = _mm_blend_epi16(m0,m1,0x0F); \ +t1 = _mm_blend_epi16(t0, m3, 0xC0); \ +buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(0,1,2,3)); + +#define LOAD_MSG_3_4(buf) \ +t0 = _mm_alignr_epi8(m0, m1, 4); \ +buf = _mm_blend_epi16(t0, m2, 0x33); + +#define LOAD_MSG_4_1(buf) \ +t0 = _mm_unpacklo_epi64(m1,m2); \ +t1 = _mm_unpackhi_epi64(m0,m2); \ +t2 = _mm_blend_epi16(t0,t1,0x33); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3)); + +#define LOAD_MSG_4_2(buf) \ +t0 = _mm_unpackhi_epi64(m1,m3); \ +t1 = _mm_unpacklo_epi64(m0,m1); \ +buf = _mm_blend_epi16(t0,t1,0x33); + +#define LOAD_MSG_4_3(buf) \ +t0 = _mm_unpackhi_epi64(m3,m1); \ +t1 = _mm_unpackhi_epi64(m2,m0); \ +t2 = _mm_blend_epi16(t1,t0,0x33); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3)); + +#define LOAD_MSG_4_4(buf) \ +t0 = _mm_blend_epi16(m0,m2,0x03); \ +t1 = _mm_slli_si128(t0, 8); \ +t2 = _mm_blend_epi16(t1,m3,0x0F); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,3,1)); + +#define LOAD_MSG_5_1(buf) \ +t0 = _mm_unpackhi_epi32(m0,m1); \ +t1 = _mm_unpacklo_epi32(m0,m2); \ +buf = _mm_unpacklo_epi64(t0,t1); + +#define LOAD_MSG_5_2(buf) \ +t0 = _mm_srli_si128(m2, 4); \ +t1 = _mm_blend_epi16(m0,m3,0x03); \ +buf = _mm_blend_epi16(t1,t0,0x3C); + +#define LOAD_MSG_5_3(buf) \ +t0 = _mm_blend_epi16(m1,m0,0x0C); \ +t1 = _mm_srli_si128(m3, 4); \ +t2 = _mm_blend_epi16(t0,t1,0x30); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1)); + +#define LOAD_MSG_5_4(buf) \ +t0 = _mm_unpacklo_epi64(m2,m1); \ +t1 = _mm_shuffle_epi32(m3, _MM_SHUFFLE(2,0,1,0)); \ +t2 = _mm_srli_si128(t0, 4); \ +buf = _mm_blend_epi16(t1,t2,0x33); + +#define LOAD_MSG_6_1(buf) \ +t0 = _mm_slli_si128(m1, 12); \ +t1 = _mm_blend_epi16(m0,m3,0x33); \ +buf = _mm_blend_epi16(t1,t0,0xC0); + +#define LOAD_MSG_6_2(buf) \ +t0 = _mm_blend_epi16(m3,m2,0x30); \ +t1 = _mm_srli_si128(m1, 4); \ +t2 = _mm_blend_epi16(t0,t1,0x03); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,3,0)); + +#define LOAD_MSG_6_3(buf) \ +t0 = _mm_unpacklo_epi64(m0,m2); \ +t1 = _mm_srli_si128(m1, 4); \ +t2 = _mm_blend_epi16(t0,t1,0x0C); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2)); + +#define LOAD_MSG_6_4(buf) \ +t0 = _mm_unpackhi_epi32(m1,m2); \ +t1 = _mm_unpackhi_epi64(m0,t0); \ +buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(0,1,2,3)); + +#define LOAD_MSG_7_1(buf) \ +t0 = _mm_unpackhi_epi32(m0,m1); \ +t1 = _mm_blend_epi16(t0,m3,0x0F); \ +buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(2,0,3,1)); + +#define LOAD_MSG_7_2(buf) \ +t0 = _mm_blend_epi16(m2,m3,0x30); \ +t1 = _mm_srli_si128(m0,4); \ +t2 = _mm_blend_epi16(t0,t1,0x03); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,2,3)); + +#define LOAD_MSG_7_3(buf) \ +t0 = _mm_unpackhi_epi64(m0,m3); \ +t1 = _mm_unpacklo_epi64(m1,m2); \ +t2 = _mm_blend_epi16(t0,t1,0x3C); \ +buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(2,3,1,0)); + +#define LOAD_MSG_7_4(buf) \ +t0 = _mm_unpacklo_epi32(m0,m1); \ +t1 = _mm_unpackhi_epi32(m1,m2); \ +t2 = _mm_unpacklo_epi64(t0,t1); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3)); + +#define LOAD_MSG_8_1(buf) \ +t0 = _mm_unpackhi_epi32(m1,m3); \ +t1 = _mm_unpacklo_epi64(t0,m0); \ +t2 = _mm_blend_epi16(t1,m2,0xC0); \ +buf = _mm_shufflehi_epi16(t2,_MM_SHUFFLE(1,0,3,2)); + +#define LOAD_MSG_8_2(buf) \ +t0 = _mm_unpackhi_epi32(m0,m3); \ +t1 = _mm_blend_epi16(m2,t0,0xF0); \ +buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(0,2,1,3)); + +#define LOAD_MSG_8_3(buf) \ +t0 = _mm_unpacklo_epi64(m0,m3); \ +t1 = _mm_srli_si128(m2,8); \ +t2 = _mm_blend_epi16(t0,t1,0x03); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,3,2,0)); + +#define LOAD_MSG_8_4(buf) \ +t0 = _mm_blend_epi16(m1,m0,0x30); \ +buf = _mm_shuffle_epi32(t0,_MM_SHUFFLE(0,3,2,1)); + +#define LOAD_MSG_9_1(buf) \ +t0 = _mm_blend_epi16(m0,m2,0x03); \ +t1 = _mm_blend_epi16(m1,m2,0x30); \ +t2 = _mm_blend_epi16(t1,t0,0x0F); \ +buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(1,3,0,2)); + +#define LOAD_MSG_9_2(buf) \ +t0 = _mm_slli_si128(m0,4); \ +t1 = _mm_blend_epi16(m1,t0,0xC0); \ +buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(1,2,0,3)); + +#define LOAD_MSG_9_3(buf) \ +t0 = _mm_unpackhi_epi32(m0,m3); \ +t1 = _mm_unpacklo_epi32(m2,m3); \ +t2 = _mm_unpackhi_epi64(t0,t1); \ +buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,2,1,3)); + +#define LOAD_MSG_9_4(buf) \ +t0 = _mm_blend_epi16(m3,m2,0xC0); \ +t1 = _mm_unpacklo_epi32(m0,m3); \ +t2 = _mm_blend_epi16(t0,t1,0x0F); \ +buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(1,2,3,0)); + +#endif diff --git a/src/Crypto/blake2s-ref.c b/src/Crypto/blake2s-ref.c new file mode 100644 index 00000000..435630b9 --- /dev/null +++ b/src/Crypto/blake2s-ref.c @@ -0,0 +1,336 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + +#include +#include + +#include "blake2.h" + +#pragma optimize ("tl", on) + +#pragma intrinsic(_lrotr) +#pragma intrinsic( memcpy ) +#pragma intrinsic( memset ) + +static const uint32 blake2s_IV[8] = +{ + 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL +}; + +static const uint8 blake2s_sigma[10][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , +}; + +static void blake2s_set_lastnode( blake2s_state *S ) +{ + S->f[1] = (uint32)-1; +} + +/* Some helper functions, not necessarily useful */ +static int blake2s_is_lastblock( const blake2s_state *S ) +{ + return S->f[0] != 0; +} + +static void blake2s_set_lastblock( blake2s_state *S ) +{ + if( S->last_node ) blake2s_set_lastnode( S ); + + S->f[0] = (uint32)-1; +} + +static void blake2s_increment_counter( blake2s_state *S, const uint32 inc ) +{ + S->t[0] += inc; + S->t[1] += ( S->t[0] < inc ); +} + +static void blake2s_init0( blake2s_state *S ) +{ + size_t i; + memset( S, 0, sizeof( blake2s_state ) ); + + for( i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; +} + +/* init2 xors IV with input parameter block */ +void blake2s_init_param( blake2s_state *S, const blake2s_param *P ) +{ + const unsigned char *p = ( const unsigned char * )( P ); + size_t i; + uint32 w; + + blake2s_init0( S ); + + /* IV XOR ParamBlock */ + for( i = 0; i < 8; ++i ) + { + memcpy (&w, &p[i * 4], sizeof (w)); + S->h[i] ^= w; + } + + S->outlen = P->digest_length; +} + + +/* Sequential blake2s initialization */ +void blake2s_init( blake2s_state *S ) +{ + blake2s_param P[1]; + + P->digest_length = 32; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + P->leaf_length = 0; + P->node_offset = 0; + P->xof_length = 0; + P->node_depth = 0; + P->inner_length = 0; + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + blake2s_init_param( S, P ); +} + +#ifndef TC_MINIMIZE_CODE_SIZE +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2s_sigma[r][2*i+0]]; \ + d = _lrotr(d ^ a, 16); \ + c = c + d; \ + b = _lrotr(b ^ c, 12); \ + a = a + b + m[blake2s_sigma[r][2*i+1]]; \ + d = _lrotr(d ^ a, 8); \ + c = c + d; \ + b = _lrotr(b ^ c, 7); \ + } while(0) + +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) +#else +#define G_BASE(r,i,a,b,c,d) \ + do { \ + v[a] = v[a] + v[b] + m[blake2s_sigma[r][2*i+0]]; \ + v[d] = _lrotr(v[d] ^ v[a], 16); \ + v[c] = v[c] + v[d]; \ + v[b] = _lrotr(v[b] ^ v[c], 12); \ + v[a] = v[a] + v[b] + m[blake2s_sigma[r][2*i+1]]; \ + v[d] = _lrotr(v[d] ^ v[a], 8); \ + v[c] = v[c] + v[d]; \ + v[b] = _lrotr(v[b] ^ v[c], 7); \ + } while(0) + +static void G(unsigned char r, unsigned char i, uint32* m, uint32* v, unsigned char a, unsigned char b, unsigned char c, unsigned char d) +{ + G_BASE(r,i,a,b,c,d); +} + +static void round_base (unsigned char r, uint32* m, uint32* v) +{ + G(r,0,m,v, 0, 4, 8, 12); + G(r,1,m,v, 1, 5, 9,13); + G(r,2,m,v, 2, 6,10,14); + G(r,3,m,v, 3, 7,11,15); + G(r,4,m,v, 0, 5,10,15); + G(r,5,m,v, 1, 6,11,12); + G(r,6,m,v, 2, 7, 8,13); + G(r,7,m,v, 3, 4, 9,14); +} + +#define ROUND(r) round_base(r,m,v) +#endif +static void blake2s_compress( blake2s_state *S, const uint8 in[BLAKE2S_BLOCKBYTES] ) +{ + uint32 m[16]; + uint32 v[16]; + int i; + + for( i = 0; i < 16; ++i ) { + memcpy (&m[i], in + i * sizeof( m[i] ), sizeof(uint32)); + } + + for( i = 0; i < 8; ++i ) { + v[i] = S->h[i]; + } + + v[ 8] = blake2s_IV[0]; + v[ 9] = blake2s_IV[1]; + v[10] = blake2s_IV[2]; + v[11] = blake2s_IV[3]; + v[12] = S->t[0] ^ blake2s_IV[4]; + v[13] = S->t[1] ^ blake2s_IV[5]; + v[14] = S->f[0] ^ blake2s_IV[6]; + v[15] = S->f[1] ^ blake2s_IV[7]; + + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + + for( i = 0; i < 8; ++i ) { + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } +} + +#undef G +#undef ROUND + +void blake2s_update( blake2s_state *S, const void *pin, size_t inlen ) +{ + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) + { + size_t left = S->buflen; + size_t fill = BLAKE2S_BLOCKBYTES - left; + if( inlen > fill ) + { + S->buflen = 0; + memcpy( S->buf + left, in, fill ); /* Fill buffer */ + blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); + blake2s_compress( S, S->buf ); /* Compress */ + in += fill; inlen -= fill; + while(inlen > BLAKE2S_BLOCKBYTES) { + blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES); + blake2s_compress( S, in ); + in += BLAKE2S_BLOCKBYTES; + inlen -= BLAKE2S_BLOCKBYTES; + } + } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; + } +} + +int blake2s_final( blake2s_state *S, unsigned char *out ) +{ + int i; + + if( blake2s_is_lastblock( S ) ) + return -1; + + blake2s_increment_counter( S, ( uint32 )S->buflen ); + blake2s_set_lastblock( S ); + memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ + blake2s_compress( S, S->buf ); + + for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + memcpy( out + sizeof( S->h[i] ) * i, &S->h[i], sizeof(uint32) ); + + return 0; +} + +#if defined(SUPERCOP) +int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) +{ + return blake2s( out, BLAKE2S_OUTBYTES, in, inlen, NULL, 0 ); +} +#endif + +#if defined(BLAKE2S_SELFTEST) +#include +#include "blake2-kat.h" +int main( void ) +{ + uint8 key[BLAKE2S_KEYBYTES]; + uint8 buf[BLAKE2_KAT_LENGTH]; + size_t i, step; + + for( i = 0; i < BLAKE2S_KEYBYTES; ++i ) + key[i] = ( uint8 )i; + + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + buf[i] = ( uint8 )i; + + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + { + uint8 hash[BLAKE2S_OUTBYTES]; + blake2s( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES ); + + if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) ) + { + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8 hash[BLAKE2S_OUTBYTES]; + blake2s_state S; + uint8 * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2s_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2s_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2s_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2s_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES)) { + goto fail; + } + } + } + + puts( "ok" ); + return 0; +fail: + puts("error"); + return -1; +} +#endif diff --git a/src/Crypto/blake2s-round.h b/src/Crypto/blake2s-round.h new file mode 100644 index 00000000..590540bb --- /dev/null +++ b/src/Crypto/blake2s-round.h @@ -0,0 +1,159 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + + +#ifndef BLAKE2S_ROUND_H +#define BLAKE2S_ROUND_H + +#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) ) +#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) + +#define TOF(reg) _mm_castsi128_ps((reg)) +#define TOI(reg) _mm_castps_si128((reg)) + +#define LIKELY(x) __builtin_expect((x),1) + + +/* Microarchitecture-specific macros */ +#ifndef HAVE_XOP +#ifdef HAVE_SSSE3 +#define _mm_roti_epi32(r, c) ( \ + (8==-(c)) ? _mm_shuffle_epi8(r,r8) \ + : (16==-(c)) ? _mm_shuffle_epi8(r,r16) \ + : _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) ) +#else +#define _mm_roti_epi32(r, c) _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) +#endif +#else +/* ... */ +#endif + + +#define G1(row1,row2,row3,row4,buf) \ + row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \ + row4 = _mm_xor_si128( row4, row1 ); \ + row4 = _mm_roti_epi32(row4, -16); \ + row3 = _mm_add_epi32( row3, row4 ); \ + row2 = _mm_xor_si128( row2, row3 ); \ + row2 = _mm_roti_epi32(row2, -12); + +#define G2(row1,row2,row3,row4,buf) \ + row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \ + row4 = _mm_xor_si128( row4, row1 ); \ + row4 = _mm_roti_epi32(row4, -8); \ + row3 = _mm_add_epi32( row3, row4 ); \ + row2 = _mm_xor_si128( row2, row3 ); \ + row2 = _mm_roti_epi32(row2, -7); + +#define DIAGONALIZE(row1,row2,row3,row4) \ + row1 = _mm_shuffle_epi32( row1, _MM_SHUFFLE(2,1,0,3) ); \ + row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(1,0,3,2) ); \ + row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(0,3,2,1) ); + +#define UNDIAGONALIZE(row1,row2,row3,row4) \ + row1 = _mm_shuffle_epi32( row1, _MM_SHUFFLE(0,3,2,1) ); \ + row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(1,0,3,2) ); \ + row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(2,1,0,3) ); + +#if defined(HAVE_XOP) +#include "blake2s-load-xop.h" +#elif defined(HAVE_SSE41) +#include "blake2s-load-sse41.h" +#else +#include "blake2s-load-sse2.h" +#endif + +#define ROUND(r) \ + LOAD_MSG_ ##r ##_1(buf1); \ + G1(row1,row2,row3,row4,buf1); \ + LOAD_MSG_ ##r ##_2(buf2); \ + G2(row1,row2,row3,row4,buf2); \ + DIAGONALIZE(row1,row2,row3,row4); \ + LOAD_MSG_ ##r ##_3(buf3); \ + G1(row1,row2,row3,row4,buf3); \ + LOAD_MSG_ ##r ##_4(buf4); \ + G2(row1,row2,row3,row4,buf4); \ + UNDIAGONALIZE(row1,row2,row3,row4); \ + +// load32 is always called in SSE case which implies little endian +#define load32(x) *((uint32*) (x)) + +extern const uint32 blake2s_IV[8]; + +#if defined(HAVE_SSE41) +void blake2s_compress_sse41( blake2s_state *S, const uint8 block[BLAKE2S_BLOCKBYTES] ) +#elif defined (HAVE_SSSE3) +void blake2s_compress_ssse3( blake2s_state *S, const uint8 block[BLAKE2S_BLOCKBYTES] ) +#else +void blake2s_compress_sse2( blake2s_state *S, const uint8 block[BLAKE2S_BLOCKBYTES] ) +#endif +{ + __m128i row1, row2, row3, row4; + __m128i buf1, buf2, buf3, buf4; +#if defined(HAVE_SSE41) + __m128i t0, t1; +#if !defined(HAVE_XOP) + __m128i t2; +#endif +#endif + __m128i ff0, ff1; +#if defined(HAVE_SSSE3) && !defined(HAVE_XOP) + const __m128i r8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 ); + const __m128i r16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 ); +#endif +#if defined(HAVE_SSE41) + const __m128i m0 = LOADU( block + 00 ); + const __m128i m1 = LOADU( block + 16 ); + const __m128i m2 = LOADU( block + 32 ); + const __m128i m3 = LOADU( block + 48 ); +#else + const uint32 m0 = load32(block + 0 * sizeof(uint32)); + const uint32 m1 = load32(block + 1 * sizeof(uint32)); + const uint32 m2 = load32(block + 2 * sizeof(uint32)); + const uint32 m3 = load32(block + 3 * sizeof(uint32)); + const uint32 m4 = load32(block + 4 * sizeof(uint32)); + const uint32 m5 = load32(block + 5 * sizeof(uint32)); + const uint32 m6 = load32(block + 6 * sizeof(uint32)); + const uint32 m7 = load32(block + 7 * sizeof(uint32)); + const uint32 m8 = load32(block + 8 * sizeof(uint32)); + const uint32 m9 = load32(block + 9 * sizeof(uint32)); + const uint32 m10 = load32(block + 10 * sizeof(uint32)); + const uint32 m11 = load32(block + 11 * sizeof(uint32)); + const uint32 m12 = load32(block + 12 * sizeof(uint32)); + const uint32 m13 = load32(block + 13 * sizeof(uint32)); + const uint32 m14 = load32(block + 14 * sizeof(uint32)); + const uint32 m15 = load32(block + 15 * sizeof(uint32)); +#endif + row1 = ff0 = LOADU( &S->h[0] ); + row2 = ff1 = LOADU( &S->h[4] ); + row3 = _mm_loadu_si128( (__m128i const *)&blake2s_IV[0] ); + row4 = _mm_xor_si128( _mm_loadu_si128( (__m128i const *)&blake2s_IV[4] ), LOADU( &S->t[0] ) ); + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) ); + STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) ); +} + +#endif diff --git a/src/Crypto/blake2s.c b/src/Crypto/blake2s.c new file mode 100644 index 00000000..9850cae1 --- /dev/null +++ b/src/Crypto/blake2s.c @@ -0,0 +1,349 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + +#include "blake2.h" +#include "Common/Endian.h" +#include "Crypto/config.h" +#include "Crypto/cpu.h" +#include "Crypto/misc.h" + +// load32 is always called in SSE case which implies little endian +#define load32(x) *((uint32*) (x)) + +const uint32 blake2s_IV[8] = +{ + 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL +}; + +static const uint8 blake2s_sigma[10][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , +}; + +/* Some helper functions */ +#define blake2s_set_lastnode(S) S->f[1] = (uint32)-1; + +#define blake2s_is_lastblock(S) (S->f[0] != 0) + +#define blake2s_set_lastblock(S) { \ + if( S->last_node ) blake2s_set_lastnode( S ); \ + S->f[0] = (uint32)-1; \ + } + +#define blake2s_increment_counter(S,inc) { \ + uint64 t = ( (( uint64 )S->t[1]) << 32 ) | S->t[0]; \ + t += (inc); \ + S->t[0] = ( uint32 )( t ); \ + S->t[1] = ( uint32 )( t >> 32 ); \ + } + +/* init2 xors IV with input parameter block */ +void blake2s_init_param( blake2s_state *S, const blake2s_param *P ) +{ + size_t i; + /*blake2s_init0( S ); */ + const uint8 * v = ( const uint8 * )( blake2s_IV ); + const uint8 * p = ( const uint8 * )( P ); + uint8 * h = ( uint8 * )( S->h ); + /* IV XOR ParamBlock */ + memset( S, 0, sizeof( blake2s_state ) ); + + for( i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; + + S->outlen = P->digest_length; +} + + +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2s_sigma[r][2*i+0]]; \ + d = rotr32(d ^ a, 16); \ + c = c + d; \ + b = rotr32(b ^ c, 12); \ + a = a + b + m[blake2s_sigma[r][2*i+1]]; \ + d = rotr32(d ^ a, 8); \ + c = c + d; \ + b = rotr32(b ^ c, 7); \ + } while(0) + +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + +typedef void (*blake2s_compressFn)( blake2s_state *S, const uint8 block[BLAKE2S_BLOCKBYTES] ); + +blake2s_compressFn blake2s_compress_func = NULL; +#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 +extern int blake2s_has_sse2(); +extern int blake2s_has_ssse3(); +extern int blake2s_has_sse41(); +extern void blake2s_compress_sse2( blake2s_state *S, const uint8 block[BLAKE2S_BLOCKBYTES] ); +extern void blake2s_compress_ssse3( blake2s_state *S, const uint8 block[BLAKE2S_BLOCKBYTES] ); +extern void blake2s_compress_sse41( blake2s_state *S, const uint8 block[BLAKE2S_BLOCKBYTES] ); +#endif + + +static void blake2s_compress_std( blake2s_state *S, const uint8 in[BLAKE2S_BLOCKBYTES] ) +{ + uint32 m[16]; + uint32 v[16]; + size_t i; + + for( i = 0; i < 16; ++i ) { + m[i] = *((uint32*) (in + i * sizeof( m[i] ))); + } + + for( i = 0; i < 8; ++i ) { + v[i] = S->h[i]; + } + + v[ 8] = blake2s_IV[0]; + v[ 9] = blake2s_IV[1]; + v[10] = blake2s_IV[2]; + v[11] = blake2s_IV[3]; + v[12] = S->t[0] ^ blake2s_IV[4]; + v[13] = S->t[1] ^ blake2s_IV[5]; + v[14] = S->f[0] ^ blake2s_IV[6]; + v[15] = S->f[1] ^ blake2s_IV[7]; + + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + + for( i = 0; i < 8; ++i ) { + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } +} + +#undef G +#undef ROUND + + +/* Some sort of default parameter block initialization, for sequential blake2s */ +void blake2s_init( blake2s_state *S ) +{ + blake2s_param P[1]; + + P->digest_length = BLAKE2S_OUTBYTES; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + P->leaf_length = 0; + P->node_offset = 0; + P->xof_length = 0; + P->node_depth = 0; + P->inner_length = 0; + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + blake2s_init_param( S, P ); + + if (!blake2s_compress_func) + { +#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 + if (HasSSE2() && blake2s_has_sse2()) + { + if (HasSSE41() && blake2s_has_sse41()) + { + blake2s_compress_func = blake2s_compress_sse41; + } + else + if (HasSSSE3() && blake2s_has_ssse3()) + { + blake2s_compress_func = blake2s_compress_ssse3; + } + else + blake2s_compress_func = blake2s_compress_sse2; + } + else +#endif + blake2s_compress_func = blake2s_compress_std; + } +} + +void blake2s_update( blake2s_state *S, const void *pin, size_t inlen ) +{ + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) + { + size_t left = S->buflen; + size_t fill = BLAKE2S_BLOCKBYTES - left; + if( inlen > fill ) + { + S->buflen = 0; + memcpy( S->buf + left, in, fill ); /* Fill buffer */ + blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); + blake2s_compress_func( S, S->buf ); /* Compress */ + in += fill; inlen -= fill; + while(inlen > BLAKE2S_BLOCKBYTES) { + blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES); + blake2s_compress_func( S, in ); + in += BLAKE2S_BLOCKBYTES; + inlen -= BLAKE2S_BLOCKBYTES; + } + } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; + } +} + +int blake2s_final( blake2s_state *S, unsigned char *out ) +{ + size_t i; + + if( blake2s_is_lastblock( S ) ) + return -1; + + blake2s_increment_counter( S, (uint32)S->buflen ); + blake2s_set_lastblock( S ); + memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ + blake2s_compress_func( S, S->buf ); + + for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + { +#if BYTE_ORDER == LITTLE_ENDIAN + *((uint32*) out) = S->h[i]; +#else + uint32 w = S->h[i] ; + out[0] = (uint8)(w >> 0); + out[1] = (uint8)(w >> 8); + out[2] = (uint8)(w >> 16); + out[3] = (uint8)(w >> 24); +#endif + out += sizeof (uint32); + } + + return 0; +} + +/* inlen, at least, should be uint64. Others can be size_t. */ +int blake2s( void *out, const void *in, size_t inlen) +{ + blake2s_state S[1]; + + /* Verify parameters */ + if ( NULL == in && inlen > 0 ) return -1; + + if ( NULL == out ) return -1; + + blake2s_init( S ); + + blake2s_update( S, ( const uint8 * )in, inlen ); + blake2s_final( S, (unsigned char*) out ); + return 0; +} + +#if defined(SUPERCOP) +int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) +{ + return blake2s( out, BLAKE2S_OUTBYTES, in, inlen, NULL, 0 ); +} +#endif + +#if defined(BLAKE2S_SELFTEST) +#include +#include "blake2-kat.h" +int main( void ) +{ + uint8 key[BLAKE2S_KEYBYTES]; + uint8 buf[BLAKE2_KAT_LENGTH]; + size_t i, step; + + for( i = 0; i < BLAKE2S_KEYBYTES; ++i ) + key[i] = ( uint8 )i; + + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + buf[i] = ( uint8 )i; + + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + { + uint8 hash[BLAKE2S_OUTBYTES]; + blake2s( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES ); + + if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) ) + { + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8 hash[BLAKE2S_OUTBYTES]; + blake2s_state S; + uint8 * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2s_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2s_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2s_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2s_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES)) { + goto fail; + } + } + } + + puts( "ok" ); + return 0; +fail: + puts("error"); + return -1; +} +#endif diff --git a/src/Crypto/blake2s_SSE2.c b/src/Crypto/blake2s_SSE2.c new file mode 100644 index 00000000..41ea0a6c --- /dev/null +++ b/src/Crypto/blake2s_SSE2.c @@ -0,0 +1,39 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + +#include "blake2.h" +#include "Common/Endian.h" +#include "Crypto/config.h" +#include "Crypto/cpu.h" +#include "Crypto/misc.h" + +#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE + +#include "blake2s-round.h" + +int blake2s_has_sse2() +{ + return 1; +} + +#else +int blake2s_has_sse2() +{ + return 0; +} + +#endif diff --git a/src/Crypto/blake2s_SSE41.c b/src/Crypto/blake2s_SSE41.c new file mode 100644 index 00000000..99e394c1 --- /dev/null +++ b/src/Crypto/blake2s_SSE41.c @@ -0,0 +1,52 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + +#include "blake2.h" +#include "Common/Endian.h" +#include "Crypto/config.h" +#include "Crypto/cpu.h" +#include "Crypto/misc.h" + +#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE +#if CRYPTOPP_BOOL_SSE41_INTRINSICS_AVAILABLE + +#define HAVE_SSE41 + +#if CRYPTOPP_SSSE3_AVAILABLE +#define HAVE_SSSE3 +#endif + +#include "blake2s-round.h" + +int blake2s_has_sse41() +{ + return 1; +} + +#else +int blake2s_has_sse41() +{ + return 0; +} + +#endif +#else +int blake2s_has_sse41() +{ + return 0; +} +#endif diff --git a/src/Crypto/blake2s_SSSE3.c b/src/Crypto/blake2s_SSSE3.c new file mode 100644 index 00000000..4f3252c3 --- /dev/null +++ b/src/Crypto/blake2s_SSSE3.c @@ -0,0 +1,47 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +/* Adapted for VeraCrypt */ + +#include "blake2.h" +#include "Common/Endian.h" +#include "Crypto/config.h" +#include "Crypto/cpu.h" +#include "Crypto/misc.h" + +#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE +#if CRYPTOPP_BOOL_SSSE3_INTRINSICS_AVAILABLE + +#define HAVE_SSSE3 + +#include "blake2s-round.h" + +int blake2s_has_ssse3() +{ + return 1; +} + +#else +int blake2s_has_ssse3() +{ + return 0; +} +#endif +#else +int blake2s_has_ssse3() +{ + return 0; +} +#endif -- cgit v1.2.3