From ba5da0946c3abaa93d1161ca512c3c326cda3736 Mon Sep 17 00:00:00 2001 From: Mounir IDRASSI Date: Fri, 8 Feb 2019 01:48:12 +0100 Subject: Windows: Add implementation of ChaCha20 based random generator. Use it for driver need of random bytes (currently only wipe bytes but more to come later). --- src/Crypto/chacha_u1.h | 102 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 src/Crypto/chacha_u1.h (limited to 'src/Crypto/chacha_u1.h') diff --git a/src/Crypto/chacha_u1.h b/src/Crypto/chacha_u1.h new file mode 100644 index 00000000..e77bc1ea --- /dev/null +++ b/src/Crypto/chacha_u1.h @@ -0,0 +1,102 @@ +/* +u1.h version $Date: 2014/09/08 17:44:28 $ +D. J. Bernstein +Romain Dolbeau +Public domain. +*/ + +// Modified by kerukuro for use in cppcrypto. + +// if (!bytes) return; + while (bytes >=64) { + __m128i x_0, x_1, x_2, x_3; + __m128i t_1; + const __m128i rot16 = _mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2); + const __m128i rot8 = _mm_set_epi8(14,13,12,15,10,9,8,11,6,5,4,7,2,1,0,3); + uint32 in12, in13; + + x_0 = _mm_load_si128((__m128i*)(x + 0)); + x_1 = _mm_load_si128((__m128i*)(x + 4)); + x_2 = _mm_load_si128((__m128i*)(x + 8)); + x_3 = _mm_load_si128((__m128i*)(x + 12)); + + for (i = 0 ; i < r ; ++i) { + x_0 = _mm_add_epi32(x_0, x_1); + x_3 = _mm_xor_si128(x_3, x_0); + x_3 = _mm_shuffle_epi8(x_3, rot16); + + x_2 = _mm_add_epi32(x_2, x_3); + x_1 = _mm_xor_si128(x_1, x_2); + + t_1 = x_1; + x_1 = _mm_slli_epi32(x_1, 12); + t_1 = _mm_srli_epi32(t_1, 20); + x_1 = _mm_xor_si128(x_1, t_1); + + x_0 = _mm_add_epi32(x_0, x_1); + x_3 = _mm_xor_si128(x_3, x_0); + x_0 = _mm_shuffle_epi32(x_0, 0x93); + x_3 = _mm_shuffle_epi8(x_3, rot8); + + x_2 = _mm_add_epi32(x_2, x_3); + x_3 = _mm_shuffle_epi32(x_3, 0x4e); + x_1 = _mm_xor_si128(x_1, x_2); + x_2 = _mm_shuffle_epi32(x_2, 0x39); + + t_1 = x_1; + x_1 = _mm_slli_epi32(x_1, 7); + t_1 = _mm_srli_epi32(t_1, 25); + x_1 = _mm_xor_si128(x_1, t_1); + + x_0 = _mm_add_epi32(x_0, x_1); + x_3 = _mm_xor_si128(x_3, x_0); + x_3 = _mm_shuffle_epi8(x_3, rot16); + + x_2 = _mm_add_epi32(x_2, x_3); + x_1 = _mm_xor_si128(x_1, x_2); + + t_1 = x_1; + x_1 = _mm_slli_epi32(x_1, 12); + t_1 = _mm_srli_epi32(t_1, 20); + x_1 = _mm_xor_si128(x_1, t_1); + + x_0 = _mm_add_epi32(x_0, x_1); + x_3 = _mm_xor_si128(x_3, x_0); + x_0 = _mm_shuffle_epi32(x_0, 0x39); + x_3 = _mm_shuffle_epi8(x_3, rot8); + + x_2 = _mm_add_epi32(x_2, x_3); + x_3 = _mm_shuffle_epi32(x_3, 0x4e); + x_1 = _mm_xor_si128(x_1, x_2); + x_2 = _mm_shuffle_epi32(x_2, 0x93); + + t_1 = x_1; + x_1 = _mm_slli_epi32(x_1, 7); + t_1 = _mm_srli_epi32(t_1, 25); + x_1 = _mm_xor_si128(x_1, t_1); + } + x_0 = _mm_add_epi32(x_0, _mm_loadu_si128((__m128i*)(x + 0))); + x_1 = _mm_add_epi32(x_1, _mm_loadu_si128((__m128i*)(x + 4))); + x_2 = _mm_add_epi32(x_2, _mm_loadu_si128((__m128i*)(x + 8))); + x_3 = _mm_add_epi32(x_3, _mm_loadu_si128((__m128i*)(x + 12))); + x_0 = _mm_xor_si128(x_0, _mm_loadu_si128((__m128i*)(m + 0))); + x_1 = _mm_xor_si128(x_1, _mm_loadu_si128((__m128i*)(m + 16))); + x_2 = _mm_xor_si128(x_2, _mm_loadu_si128((__m128i*)(m + 32))); + x_3 = _mm_xor_si128(x_3, _mm_loadu_si128((__m128i*)(m + 48))); + _mm_storeu_si128((__m128i*)(out + 0), x_0); + _mm_storeu_si128((__m128i*)(out + 16), x_1); + _mm_storeu_si128((__m128i*)(out + 32), x_2); + _mm_storeu_si128((__m128i*)(out + 48), x_3); + + in12 = x[12]; + in13 = x[13]; + in12 ++; + if (in12 == 0) + in13 ++; + x[12] = in12; + x[13] = in13; + + bytes -= 64; + out += 64; + m += 64; + } -- cgit v1.2.3