VeraCrypt
aboutsummaryrefslogtreecommitdiff
path: root/src/Crypto
diff options
context:
space:
mode:
authorMounir IDRASSI <mounir.idrassi@idrix.fr>2019-02-26 01:50:27 +0100
committerMounir IDRASSI <mounir.idrassi@idrix.fr>2019-03-01 00:35:13 +0100
commitcf48b532b447faa969347fef183c6e8921c4ded2 (patch)
treedf0d4539325b6a7a89d0dac0b19c5a1c17293cc6 /src/Crypto
parent29b749bdd96668d1aa9259b70be436e807b46af2 (diff)
downloadVeraCrypt-cf48b532b447faa969347fef183c6e8921c4ded2.tar.gz
VeraCrypt-cf48b532b447faa969347fef183c6e8921c4ded2.zip
Windows: Implement RAM encryption for keys on 64-bit machines using ChaCha12 cipher and t1ha non-cryptographic fast hash (https://github.com/leo-yuriev/t1ha)
Diffstat (limited to 'src/Crypto')
-rw-r--r--src/Crypto/Sources3
-rw-r--r--src/Crypto/t1ha.h261
-rw-r--r--src/Crypto/t1ha2.c323
-rw-r--r--src/Crypto/t1ha2_selfcheck.c186
-rw-r--r--src/Crypto/t1ha_bits.h904
-rw-r--r--src/Crypto/t1ha_selfcheck.c99
-rw-r--r--src/Crypto/t1ha_selfcheck.h76
7 files changed, 1852 insertions, 0 deletions
diff --git a/src/Crypto/Sources b/src/Crypto/Sources
index 36fa89e7..2db68a7a 100644
--- a/src/Crypto/Sources
+++ b/src/Crypto/Sources
@@ -37,6 +37,9 @@ SOURCES = \
SerpentFast.c \
SerpentFast_simd.cpp \
Sha2.c \
+ t1ha_selfcheck.c \
+ t1ha2.c \
+ t1ha2_selfcheck.c \
Twofish.c \
Twofish_$(TC_ARCH).S \
GostCipher.c \
diff --git a/src/Crypto/t1ha.h b/src/Crypto/t1ha.h
new file mode 100644
index 00000000..97615b51
--- /dev/null
+++ b/src/Crypto/t1ha.h
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com,
+ * Fast Positive Hash.
+ *
+ * Portions Copyright (c) 2010-2018 Leonid Yuriev <leo@yuriev.ru>,
+ * The 1Hippeus project (t1h).
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgement in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" }
+ * by [Positive Technologies](https://www.ptsecurity.ru)
+ *
+ * Briefly, it is a 64-bit Hash Function:
+ * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64,
+ * but portable and without penalties it can run on any 64-bit CPU.
+ * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash
+ * and all others portable hash-functions (which do not use specific
+ * hardware tricks).
+ * 3. Not suitable for cryptography.
+ *
+ * The Future will Positive. Всё будет хорошо.
+ *
+ * ACKNOWLEDGEMENT:
+ * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев)
+ * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta!
+ */
+
+#pragma once
+
+/*****************************************************************************
+ *
+ * PLEASE PAY ATTENTION TO THE FOLLOWING NOTES
+ * about macros definitions which controls t1ha behaviour and/or performance.
+ *
+ *
+ * 1) T1HA_SYS_UNALIGNED_ACCESS = Defines the system/platform/CPU/architecture
+ * abilities for unaligned data access.
+ *
+ * By default, when the T1HA_SYS_UNALIGNED_ACCESS not defined,
+ * it will defined on the basis hardcoded knowledge about of capabilities
+ * of most common CPU architectures. But you could override this
+ * default behavior when build t1ha library itself:
+ *
+ * // To disable unaligned access at all.
+ * #define T1HA_SYS_UNALIGNED_ACCESS 0
+ *
+ * // To enable unaligned access, but indicate that it significally slow.
+ * #define T1HA_SYS_UNALIGNED_ACCESS 1
+ *
+ * // To enable unaligned access, and indicate that it effecient.
+ * #define T1HA_SYS_UNALIGNED_ACCESS 2
+ *
+ *
+ * 2) T1HA_USE_FAST_ONESHOT_READ = Controls the data reads at the end of buffer.
+ *
+ * When defined to non-zero, t1ha will use 'one shot' method for reading
+ * up to 8 bytes at the end of data. In this case just the one 64-bit read
+ * will be performed even when the available less than 8 bytes.
+ *
+ * This is little bit faster that switching by length of data tail.
+ * Unfortunately this will triggering a false-positive alarms from Valgrind,
+ * AddressSanitizer and other similar tool.
+ *
+ * By default, t1ha defines it to 1, but you could override this
+ * default behavior when build t1ha library itself:
+ *
+ * // For little bit faster and small code.
+ * #define T1HA_USE_FAST_ONESHOT_READ 1
+ *
+ * // For calmness if doubt.
+ * #define T1HA_USE_FAST_ONESHOT_READ 0
+ *
+ *
+ * 3) T1HA0_RUNTIME_SELECT = Controls choice fastest function in runtime.
+ *
+ * t1ha library offers the t1ha0() function as the fastest for current CPU.
+ * But actual CPU's features/capabilities and may be significantly different,
+ * especially on x86 platform. Therefore, internally, t1ha0() may require
+ * dynamic dispatching for choice best implementation.
+ *
+ * By default, t1ha enables such runtime choice and (may be) corresponding
+ * indirect calls if it reasonable, but you could override this default
+ * behavior when build t1ha library itself:
+ *
+ * // To enable runtime choice of fastest implementation.
+ * #define T1HA0_RUNTIME_SELECT 1
+ *
+ * // To disable runtime choice of fastest implementation.
+ * #define T1HA0_RUNTIME_SELECT 0
+ *
+ * When T1HA0_RUNTIME_SELECT is nonzero the t1ha0_resolve() function could
+ * be used to get actual t1ha0() implementation address at runtime. This is
+ * useful for two cases:
+ * - calling by local pointer-to-function usually is little
+ * bit faster (less overhead) than via a PLT thru the DSO boundary.
+ * - GNU Indirect functions (see below) don't supported by environment
+ * and calling by t1ha0_funcptr is not available and/or expensive.
+ *
+ * 4) T1HA_USE_INDIRECT_FUNCTIONS = Controls usage of GNU Indirect functions.
+ *
+ * In continue of T1HA0_RUNTIME_SELECT the T1HA_USE_INDIRECT_FUNCTIONS
+ * controls usage of ELF indirect functions feature. In general, when
+ * available, this reduces overhead of indirect function's calls though
+ * a DSO-bundary (https://sourceware.org/glibc/wiki/GNU_IFUNC).
+ *
+ * By default, t1ha engage GNU Indirect functions when it available
+ * and useful, but you could override this default behavior when build
+ * t1ha library itself:
+ *
+ * // To enable use of GNU ELF Indirect functions.
+ * #define T1HA_USE_INDIRECT_FUNCTIONS 1
+ *
+ * // To disable use of GNU ELF Indirect functions. This may be useful
+ * // if the actual toolchain or the system's loader don't support ones.
+ * #define T1HA_USE_INDIRECT_FUNCTIONS 0
+ *
+ * 5) T1HA0_AESNI_AVAILABLE = Controls AES-NI detection and dispatching on x86.
+ *
+ * In continue of T1HA0_RUNTIME_SELECT the T1HA0_AESNI_AVAILABLE controls
+ * detection and usage of AES-NI CPU's feature. On the other hand, this
+ * requires compiling parts of t1ha library with certain properly options,
+ * and could be difficult or inconvenient in some cases.
+ *
+ * By default, t1ha engade AES-NI for t1ha0() on the x86 platform, but
+ * you could override this default behavior when build t1ha library itself:
+ *
+ * // To disable detection and usage of AES-NI instructions for t1ha0().
+ * // This may be useful when you unable to build t1ha library properly
+ * // or known that AES-NI will be unavailable at the deploy.
+ * #define T1HA0_AESNI_AVAILABLE 0
+ *
+ * // To force detection and usage of AES-NI instructions for t1ha0(),
+ * // but I don't known reasons to anybody would need this.
+ * #define T1HA0_AESNI_AVAILABLE 1
+ *
+ * 6) T1HA0_DISABLED, T1HA1_DISABLED, T1HA2_DISABLED = Controls availability of
+ * t1ha functions.
+ *
+ * In some cases could be useful to import/use only few of t1ha functions
+ * or just the one. So, this definitions allows disable corresponding parts
+ * of t1ha library.
+ *
+ * // To disable t1ha0(), t1ha0_32le(), t1ha0_32be() and all AES-NI.
+ * #define T1HA0_DISABLED
+ *
+ * // To disable t1ha1_le() and t1ha1_be().
+ * #define T1HA1_DISABLED
+ *
+ * // To disable t1ha2_atonce(), t1ha2_atonce128() and so on.
+ * #define T1HA2_DISABLED
+ *
+ *****************************************************************************/
+
+#define T1HA_VERSION_MAJOR 2
+#define T1HA_VERSION_MINOR 1
+#define T1HA_VERSION_RELEASE 0
+
+#include "Common/Tcdefs.h"
+#include "config.h"
+#include "misc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define T1HA_ALIGN_PREFIX CRYPTOPP_ALIGN_DATA(32)
+#define T1HA_ALIGN_SUFFIX
+
+#ifdef _MSC_VER
+#define uint8_t byte
+#define uint16_t uint16
+#define uint32_t uint32
+#define uint64_t uint64
+#endif
+
+typedef union T1HA_ALIGN_PREFIX t1ha_state256 {
+ uint8_t bytes[32];
+ uint32_t u32[8];
+ uint64_t u64[4];
+ struct {
+ uint64_t a, b, c, d;
+ } n;
+} t1ha_state256_t T1HA_ALIGN_SUFFIX;
+
+typedef struct t1ha_context {
+ t1ha_state256_t state;
+ t1ha_state256_t buffer;
+ size_t partial;
+ uint64_t total;
+} t1ha_context_t;
+
+/******************************************************************************
+ *
+ * t1ha2 = 64 and 128-bit, SLIGHTLY MORE ATTENTION FOR QUALITY AND STRENGTH.
+ *
+ * - The recommended version of "Fast Positive Hash" with good quality
+ * for checksum, hash tables and fingerprinting.
+ * - Portable and extremely efficiency on modern 64-bit CPUs.
+ * Designed for 64-bit little-endian platforms,
+ * in other cases will runs slowly.
+ * - Great quality of hashing and still faster than other non-t1ha hashes.
+ * Provides streaming mode and 128-bit result.
+ *
+ * Note: Due performance reason 64- and 128-bit results are completely
+ * different each other, i.e. 64-bit result is NOT any part of 128-bit.
+ */
+
+/* The at-once variant with 64-bit result */
+uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed);
+
+/* The at-once variant with 128-bit result.
+ * Argument `extra_result` is NOT optional and MUST be valid.
+ * The high 64-bit part of 128-bit hash will be always unconditionally
+ * stored to the address given by `extra_result` argument. */
+uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result,
+ const void *__restrict data, size_t length,
+ uint64_t seed);
+
+/* The init/update/final trinity for streaming.
+ * Return 64 or 128-bit result depentently from `extra_result` argument. */
+void t1ha2_init(t1ha_context_t *ctx, uint64_t seed_x, uint64_t seed_y);
+void t1ha2_update(t1ha_context_t *__restrict ctx,
+ const void *__restrict data, size_t length);
+
+/* Argument `extra_result` is optional and MAY be NULL.
+ * - If `extra_result` is NOT NULL then the 128-bit hash will be calculated,
+ * and high 64-bit part of it will be stored to the address given
+ * by `extra_result` argument.
+ * - Otherwise the 64-bit hash will be calculated
+ * and returned from function directly.
+ *
+ * Note: Due performance reason 64- and 128-bit results are completely
+ * different each other, i.e. 64-bit result is NOT any part of 128-bit. */
+uint64_t t1ha2_final(t1ha_context_t *__restrict ctx,
+ uint64_t *__restrict extra_result /* optional */);
+
+
+int t1ha_selfcheck__t1ha2_atonce(void);
+int t1ha_selfcheck__t1ha2_atonce128(void);
+int t1ha_selfcheck__t1ha2_stream(void);
+int t1ha_selfcheck__t1ha2(void);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Crypto/t1ha2.c b/src/Crypto/t1ha2.c
new file mode 100644
index 00000000..1a67f9c4
--- /dev/null
+++ b/src/Crypto/t1ha2.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com,
+ * Fast Positive Hash.
+ *
+ * Portions Copyright (c) 2010-2018 Leonid Yuriev <leo@yuriev.ru>,
+ * The 1Hippeus project (t1h).
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgement in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" }
+ * by [Positive Technologies](https://www.ptsecurity.ru)
+ *
+ * Briefly, it is a 64-bit Hash Function:
+ * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64,
+ * but portable and without penalties it can run on any 64-bit CPU.
+ * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash
+ * and all others portable hash-functions (which do not use specific
+ * hardware tricks).
+ * 3. Not suitable for cryptography.
+ *
+ * The Future will Positive. Всё будет хорошо.
+ *
+ * ACKNOWLEDGEMENT:
+ * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев)
+ * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta!
+ */
+
+#include "t1ha_bits.h"
+#include "t1ha_selfcheck.h"
+
+static __always_inline void init_ab(t1ha_state256_t *s, uint64_t x,
+ uint64_t y) {
+ s->n.a = x;
+ s->n.b = y;
+}
+
+static __always_inline void init_cd(t1ha_state256_t *s, uint64_t x,
+ uint64_t y) {
+ s->n.c = rot64(y, 23) + ~x;
+ s->n.d = ~y + rot64(x, 19);
+}
+
+/* TODO: C++ template in the next version */
+#define T1HA2_UPDATE(ENDIANNES, ALIGNESS, state, v) \
+ do { \
+ t1ha_state256_t *const s = state; \
+ const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0); \
+ const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1); \
+ const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2); \
+ const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3); \
+ \
+ const uint64_t d02 = w0 + rot64(w2 + s->n.d, 56); \
+ const uint64_t c13 = w1 + rot64(w3 + s->n.c, 19); \
+ s->n.d ^= s->n.b + rot64(w1, 38); \
+ s->n.c ^= s->n.a + rot64(w0, 57); \
+ s->n.b ^= prime_6 * (c13 + w2); \
+ s->n.a ^= prime_5 * (d02 + w3); \
+ } while (0)
+
+static __always_inline void squash(t1ha_state256_t *s) {
+ s->n.a ^= prime_6 * (s->n.c + rot64(s->n.d, 23));
+ s->n.b ^= prime_5 * (rot64(s->n.c, 19) + s->n.d);
+}
+
+/* TODO: C++ template in the next version */
+#define T1HA2_LOOP(ENDIANNES, ALIGNESS, state, data, len) \
+ do { \
+ const void *detent = (const uint8_t *)data + len - 31; \
+ do { \
+ const uint64_t *v = (const uint64_t *)data; \
+ data = (const uint64_t *)data + 4; \
+ prefetch(data); \
+ T1HA2_UPDATE(le, ALIGNESS, state, v); \
+ } while (likely(data < detent)); \
+ } while (0)
+
+/* TODO: C++ template in the next version */
+#define T1HA2_TAIL_AB(ENDIANNES, ALIGNESS, state, data, len) \
+ do { \
+ t1ha_state256_t *const s = state; \
+ const uint64_t *v = (const uint64_t *)data; \
+ switch (len) { \
+ default: \
+ mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
+ prime_4); \
+ /* fall through */ \
+ case 24: \
+ case 23: \
+ case 22: \
+ case 21: \
+ case 20: \
+ case 19: \
+ case 18: \
+ case 17: \
+ mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
+ prime_3); \
+ /* fall through */ \
+ case 16: \
+ case 15: \
+ case 14: \
+ case 13: \
+ case 12: \
+ case 11: \
+ case 10: \
+ case 9: \
+ mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
+ prime_2); \
+ /* fall through */ \
+ case 8: \
+ case 7: \
+ case 6: \
+ case 5: \
+ case 4: \
+ case 3: \
+ case 2: \
+ case 1: \
+ mixup64(&s->n.b, &s->n.a, tail64_##ENDIANNES##_##ALIGNESS(v, len), \
+ prime_1); \
+ /* fall through */ \
+ case 0: \
+ return final64(s->n.a, s->n.b); \
+ } \
+ } while (0)
+
+/* TODO: C++ template in the next version */
+#define T1HA2_TAIL_ABCD(ENDIANNES, ALIGNESS, state, data, len) \
+ do { \
+ t1ha_state256_t *const s = state; \
+ const uint64_t *v = (const uint64_t *)data; \
+ switch (len) { \
+ default: \
+ mixup64(&s->n.a, &s->n.d, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
+ prime_4); \
+ /* fall through */ \
+ case 24: \
+ case 23: \
+ case 22: \
+ case 21: \
+ case 20: \
+ case 19: \
+ case 18: \
+ case 17: \
+ mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
+ prime_3); \
+ /* fall through */ \
+ case 16: \
+ case 15: \
+ case 14: \
+ case 13: \
+ case 12: \
+ case 11: \
+ case 10: \
+ case 9: \
+ mixup64(&s->n.c, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
+ prime_2); \
+ /* fall through */ \
+ case 8: \
+ case 7: \
+ case 6: \
+ case 5: \
+ case 4: \
+ case 3: \
+ case 2: \
+ case 1: \
+ mixup64(&s->n.d, &s->n.c, tail64_##ENDIANNES##_##ALIGNESS(v, len), \
+ prime_1); \
+ /* fall through */ \
+ case 0: \
+ return final128(s->n.a, s->n.b, s->n.c, s->n.d, extra_result); \
+ } \
+ } while (0)
+
+static __always_inline uint64_t final128(uint64_t a, uint64_t b, uint64_t c,
+ uint64_t d, uint64_t *h) {
+ mixup64(&a, &b, rot64(c, 41) ^ d, prime_0);
+ mixup64(&b, &c, rot64(d, 23) ^ a, prime_6);
+ mixup64(&c, &d, rot64(a, 19) ^ b, prime_5);
+ mixup64(&d, &a, rot64(b, 31) ^ c, prime_4);
+ *h = c + d;
+ return a ^ b;
+}
+
+//------------------------------------------------------------------------------
+
+uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed) {
+ t1ha_state256_t state;
+ init_ab(&state, seed, length);
+
+#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT
+ if (unlikely(length > 32)) {
+ init_cd(&state, seed, length);
+ T1HA2_LOOP(le, unaligned, &state, data, length);
+ squash(&state);
+ length &= 31;
+ }
+ T1HA2_TAIL_AB(le, unaligned, &state, data, length);
+#else
+ if ((((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0) {
+ if (unlikely(length > 32)) {
+ init_cd(&state, seed, length);
+ T1HA2_LOOP(le, unaligned, &state, data, length);
+ squash(&state);
+ length &= 31;
+ }
+ T1HA2_TAIL_AB(le, unaligned, &state, data, length);
+ } else {
+ if (unlikely(length > 32)) {
+ init_cd(&state, seed, length);
+ T1HA2_LOOP(le, aligned, &state, data, length);
+ squash(&state);
+ length &= 31;
+ }
+ T1HA2_TAIL_AB(le, aligned, &state, data, length);
+ }
+#endif
+}
+
+uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result,
+ const void *__restrict data, size_t length,
+ uint64_t seed) {
+ t1ha_state256_t state;
+ init_ab(&state, seed, length);
+ init_cd(&state, seed, length);
+
+#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT
+ if (unlikely(length > 32)) {
+ T1HA2_LOOP(le, unaligned, &state, data, length);
+ length &= 31;
+ }
+ T1HA2_TAIL_ABCD(le, unaligned, &state, data, length);
+#else
+ if ((((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0) {
+ if (unlikely(length > 32)) {
+ T1HA2_LOOP(le, unaligned, &state, data, length);
+ length &= 31;
+ }
+ T1HA2_TAIL_ABCD(le, unaligned, &state, data, length);
+ } else {
+ if (unlikely(length > 32)) {
+ T1HA2_LOOP(le, aligned, &state, data, length);
+ length &= 31;
+ }
+ T1HA2_TAIL_ABCD(le, aligned, &state, data, length);
+ }
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+void t1ha2_init(t1ha_context_t *ctx, uint64_t seed_x, uint64_t seed_y) {
+ init_ab(&ctx->state, seed_x, seed_y);
+ init_cd(&ctx->state, seed_x, seed_y);
+ ctx->partial = 0;
+ ctx->total = 0;
+}
+
+void t1ha2_update(t1ha_context_t *__restrict ctx, const void *__restrict data,
+ size_t length) {
+ ctx->total += length;
+
+ if (ctx->partial) {
+ const size_t left = 32 - ctx->partial;
+ const size_t chunk = (length >= left) ? left : length;
+ memcpy(ctx->buffer.bytes + ctx->partial, data, chunk);
+ ctx->partial += chunk;
+ if (ctx->partial < 32) {
+ assert(left >= length);
+ return;
+ }
+ ctx->partial = 0;
+ data = (const uint8_t *)data + chunk;
+ length -= chunk;
+ T1HA2_UPDATE(le, aligned, &ctx->state, ctx->buffer.u64);
+ }
+
+ if (length >= 32) {
+#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT
+ T1HA2_LOOP(le, unaligned, &ctx->state, data, length);
+#else
+ if ((((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0) {
+ T1HA2_LOOP(le, unaligned, &ctx->state, data, length);
+ } else {
+ T1HA2_LOOP(le, aligned, &ctx->state, data, length);
+ }
+#endif
+ length &= 31;
+ }
+
+ if (length)
+ memcpy(ctx->buffer.bytes, data, ctx->partial = length);
+}
+
+uint64_t t1ha2_final(t1ha_context_t *__restrict ctx,
+ uint64_t *__restrict extra_result) {
+ uint64_t bits = (ctx->total << 3) ^ (UINT64_C(1) << 63);
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+ bits = bswap64(bits);
+#endif
+ t1ha2_update(ctx, &bits, 8);
+
+ if (likely(!extra_result)) {
+ squash(&ctx->state);
+ T1HA2_TAIL_AB(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial);
+ }
+
+ T1HA2_TAIL_ABCD(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial);
+}
diff --git a/src/Crypto/t1ha2_selfcheck.c b/src/Crypto/t1ha2_selfcheck.c
new file mode 100644
index 00000000..35e21916
--- /dev/null
+++ b/src/Crypto/t1ha2_selfcheck.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com,
+ * Fast Positive Hash.
+ *
+ * Portions Copyright (c) 2010-2018 Leonid Yuriev <leo@yuriev.ru>,
+ * The 1Hippeus project (t1h).
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgement in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" }
+ * by [Positive Technologies](https://www.ptsecurity.ru)
+ *
+ * Briefly, it is a 64-bit Hash Function:
+ * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64,
+ * but portable and without penalties it can run on any 64-bit CPU.
+ * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash
+ * and all others portable hash-functions (which do not use specific
+ * hardware tricks).
+ * 3. Not suitable for cryptography.
+ *
+ * The Future will Positive. Всё будет хорошо.
+ *
+ * ACKNOWLEDGEMENT:
+ * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев)
+ * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta!
+ */
+
+
+#include "t1ha_bits.h"
+#include "t1ha_selfcheck.h"
+
+/* *INDENT-OFF* */
+/* clang-format off */
+
+const uint64_t t1ha_refval_2atonce[81] = { 0,
+ 0x772C7311BE32FF42, 0x444753D23F207E03, 0x71F6DF5DA3B4F532, 0x555859635365F660,
+ 0xE98808F1CD39C626, 0x2EB18FAF2163BB09, 0x7B9DD892C8019C87, 0xE2B1431C4DA4D15A,
+ 0x1984E718A5477F70, 0x08DD17B266484F79, 0x4C83A05D766AD550, 0x92DCEBB131D1907D,
+ 0xD67BC6FC881B8549, 0xF6A9886555FBF66B, 0x6E31616D7F33E25E, 0x36E31B7426E3049D,
+ 0x4F8E4FAF46A13F5F, 0x03EB0CB3253F819F, 0x636A7769905770D2, 0x3ADF3781D16D1148,
+ 0x92D19CB1818BC9C2, 0x283E68F4D459C533, 0xFA83A8A88DECAA04, 0x8C6F00368EAC538C,
+ 0x7B66B0CF3797B322, 0x5131E122FDABA3FF, 0x6E59FF515C08C7A9, 0xBA2C5269B2C377B0,
+ 0xA9D24FD368FE8A2B, 0x22DB13D32E33E891, 0x7B97DFC804B876E5, 0xC598BDFCD0E834F9,
+ 0xB256163D3687F5A7, 0x66D7A73C6AEF50B3, 0x25A7201C85D9E2A3, 0x911573EDA15299AA,
+ 0x5C0062B669E18E4C, 0x17734ADE08D54E28, 0xFFF036E33883F43B, 0xFE0756E7777DF11E,
+ 0x37972472D023F129, 0x6CFCE201B55C7F57, 0xE019D1D89F02B3E1, 0xAE5CC580FA1BB7E6,
+ 0x295695FB7E59FC3A, 0x76B6C820A40DD35E, 0xB1680A1768462B17, 0x2FB6AF279137DADA,
+ 0x28FB6B4366C78535, 0xEC278E53924541B1, 0x164F8AAB8A2A28B5, 0xB6C330AEAC4578AD,
+ 0x7F6F371070085084, 0x94DEAD60C0F448D3, 0x99737AC232C559EF, 0x6F54A6F9CA8EDD57,
+ 0x979B01E926BFCE0C, 0xF7D20BC85439C5B4, 0x64EDB27CD8087C12, 0x11488DE5F79C0BE2,
+ 0x25541DDD1680B5A4, 0x8B633D33BE9D1973, 0x404A3113ACF7F6C6, 0xC59DBDEF8550CD56,
+ 0x039D23C68F4F992C, 0x5BBB48E4BDD6FD86, 0x41E312248780DF5A, 0xD34791CE75D4E94F,
+ 0xED523E5D04DCDCFF, 0x7A6BCE0B6182D879, 0x21FB37483CAC28D8, 0x19A1B66E8DA878AD,
+ 0x6F804C5295B09ABE, 0x2A4BE5014115BA81, 0xA678ECC5FC924BE0, 0x50F7A54A99A36F59,
+ 0x0FD7E63A39A66452, 0x5AB1B213DD29C4E4, 0xF3ED80D9DF6534C5, 0xC736B12EF90615FD
+};
+
+const uint64_t t1ha_refval_2atonce128[81] = { 0x4EC7F6A48E33B00A,
+ 0xB7B7FAA5BD7D8C1E, 0x3269533F66534A76, 0x6C3EC6B687923BFC, 0xC096F5E7EFA471A9,
+ 0x79D8AFB550CEA471, 0xCEE0507A20FD5119, 0xFB04CFFC14A9F4BF, 0xBD4406E923807AF2,
+ 0x375C02FF11010491, 0xA6EA4C2A59E173FF, 0xE0A606F0002CADDF, 0xE13BEAE6EBC07897,
+ 0xF069C2463E48EA10, 0x75BEE1A97089B5FA, 0x378F22F8DE0B8085, 0x9C726FC4D53D0D8B,
+ 0x71F6130A2D08F788, 0x7A9B20433FF6CF69, 0xFF49B7CD59BF6D61, 0xCCAAEE0D1CA9C6B3,
+ 0xC77889D86039D2AD, 0x7B378B5BEA9B0475, 0x6520BFA79D59AD66, 0x2441490CB8A37267,
+ 0xA715A66B7D5CF473, 0x9AE892C88334FD67, 0xD2FFE9AEC1D2169A, 0x790B993F18B18CBB,
+ 0xA0D02FBCF6A7B1AD, 0xA90833E6F151D0C1, 0x1AC7AFA37BD79BE0, 0xD5383628B2881A24,
+ 0xE5526F9D63F9F8F1, 0xC1F165A01A6D1F4D, 0x6CCEF8FF3FCFA3F2, 0x2030F18325E6DF48,
+ 0x289207230E3FB17A, 0x077B66F713A3C4B9, 0x9F39843CAF871754, 0x512FDA0F808ACCF3,
+ 0xF4D9801CD0CD1F14, 0x28A0C749ED323638, 0x94844CAFA671F01C, 0xD0E261876B8ACA51,
+ 0x8FC2A648A4792EA2, 0x8EF87282136AF5FE, 0x5FE6A54A9FBA6B40, 0xA3CC5B8FE6223D54,
+ 0xA8C3C0DD651BB01C, 0x625E9FDD534716F3, 0x1AB2604083C33AC5, 0xDE098853F8692F12,
+ 0x4B0813891BD87624, 0x4AB89C4553D182AD, 0x92C15AA2A3C27ADA, 0xFF2918D68191F5D9,
+ 0x06363174F641C325, 0x667112ADA74A2059, 0x4BD605D6B5E53D7D, 0xF2512C53663A14C8,
+ 0x21857BCB1852667C, 0xAFBEBD0369AEE228, 0x7049340E48FBFD6B, 0x50710E1924F46954,
+ 0x869A75E04A976A3F, 0x5A41ABBDD6373889, 0xA781778389B4B188, 0x21A3AFCED6C925B6,
+ 0x107226192EC10B42, 0x62A862E84EC2F9B1, 0x2B15E91659606DD7, 0x613934D1F9EC5A42,
+ 0x4DC3A96DC5361BAF, 0xC80BBA4CB5F12903, 0x3E3EDAE99A7D6987, 0x8F97B2D55941DCB0,
+ 0x4C9787364C3E4EC1, 0xEF0A2D07BEA90CA7, 0x5FABF32C70AEEAFB, 0x3356A5CFA8F23BF4
+};
+
+const uint64_t t1ha_refval_2stream[81] = { 0x3C8426E33CB41606,
+ 0xFD74BE70EE73E617, 0xF43DE3CDD8A20486, 0x882FBCB37E8EA3BB, 0x1AA2CDD34CAA3D4B,
+ 0xEE755B2BFAE07ED5, 0xD4E225250D92E213, 0xA09B49083205965B, 0xD47B21724EF9EC9E,
+ 0xAC888FC3858CEE11, 0x94F820D85736F244, 0x1707951CCA920932, 0x8E0E45603F7877F0,
+ 0x9FD2592C0E3A7212, 0x9A66370F3AE3D427, 0xD33382D2161DE2B7, 0x9A35BE079DA7115F,
+ 0x73457C7FF58B4EC3, 0xBE8610BD53D7CE98, 0x65506DFE5CCD5371, 0x286A321AF9D5D9FA,
+ 0xB81EF9A7EF3C536D, 0x2CFDB5E6825C6E86, 0xB2A58CBFDFDD303A, 0xD26094A42B950635,
+ 0xA34D666A5F02AD9A, 0x0151E013EBCC72E5, 0x9254A6EA7FCB6BB5, 0x10C9361B3869DC2B,
+ 0xD7EC55A060606276, 0xA2FF7F8BF8976FFD, 0xB5181BB6852DCC88, 0x0EE394BB6178BAFF,
+ 0x3A8B4B400D21B89C, 0xEC270461970960FD, 0x615967FAB053877E, 0xFA51BF1CFEB4714C,
+ 0x29FDA8383070F375, 0xC3B663061BC52EDA, 0x192BBAF1F1A57923, 0x6D193B52F93C53AF,
+ 0x7F6F5639FE87CA1E, 0x69F7F9140B32EDC8, 0xD0F2416FB24325B6, 0x62C0E37FEDD49FF3,
+ 0x57866A4B809D373D, 0x9848D24BD935E137, 0xDFC905B66734D50A, 0x9A938DD194A68529,
+ 0x8276C44DF0625228, 0xA4B35D00AD67C0AB, 0x3D9CB359842DB452, 0x4241BFA8C23B267F,
+ 0x650FA517BEF15952, 0x782DE2ABD8C7B1E1, 0x4EAE456166CA3E15, 0x40CDF3A02614E337,
+ 0xAD84092C46102172, 0x0C68479B03F9A167, 0x7E1BA046749E181C, 0x3F3AB41A697382C1,
+ 0xC5E5DD6586EBFDC4, 0xFF926CD4EB02555C, 0x035CFE67F89E709B, 0x89F06AB6464A1B9D,
+ 0x8EFF58F3F7DEA758, 0x8B54AC657902089F, 0xC6C4F1F9F8DA4D64, 0xBDB729048AAAC93A,
+ 0xEA76BA628F5E5CD6, 0x742159B728B8A979, 0x6D151CD3C720E53D, 0xE97FFF9368FCDC42,
+ 0xCA5B38314914FBDA, 0xDD92C91D8B858EAE, 0x66E5F07CF647CBF2, 0xD4CF9B42F4985AFB,
+ 0x72AE17AC7D92F6B7, 0xB8206B22AB0472E1, 0x385876B5CFD42479, 0x03294A249EBE6B26
+};
+
+const uint64_t t1ha_refval_2stream128[81] = { 0xCD2801D3B92237D6,
+ 0x10E4D47BD821546D, 0x9100704B9D65CD06, 0xD6951CB4016313EF, 0x24DB636F96F474DA,
+ 0x3F4AF7DF3C49E422, 0xBFF25B8AF143459B, 0xA157EC13538BE549, 0xD3F5F52C47DBD419,
+ 0x0EF3D7D735AF1575, 0x46B7B892823F7B1B, 0xEE22EA4655213289, 0x56AD76F02FE929BC,
+ 0x9CF6CD1AC886546E, 0xAF45CE47AEA0B933, 0x535F9DC09F3996B7, 0x1F0C3C01694AE128,
+ 0x18495069BE0766F7, 0x37E5FFB3D72A4CB1, 0x6D6C2E9299F30709, 0x4F39E693F50B41E3,
+ 0xB11FC4EF0658E116, 0x48BFAACB78E5079B, 0xE1B4C89C781B3AD0, 0x81D2F34888D333A1,
+ 0xF6D02270D2EA449C, 0xC884C3C2C3CE1503, 0x711AE16BA157A9B9, 0x1E6140C642558C9D,
+ 0x35AB3D238F5DC55B, 0x33F07B6AEF051177, 0xE57336776EEFA71C, 0x6D445F8318BA3752,
+ 0xD4F5F6631934C988, 0xD5E260085727C4A2, 0x5B54B41EC180B4FA, 0x7F5D75769C15A898,
+ 0xAE5A6DB850CA33C6, 0x038CCB8044663403, 0xDA16310133DC92B8, 0x6A2FFB7AB2B7CE2B,
+ 0xDC1832D9229BAE20, 0x8C62C479F5ABC9E4, 0x5EB7B617857C9CCB, 0xB79CF7D749A1E80D,
+ 0xDE7FAC3798324FD3, 0x8178911813685D06, 0x6A726CBD394D4410, 0x6CBE6B3280DA1113,
+ 0x6829BA4410CF1148, 0xFA7E417EB26C5BC6, 0x22ED87884D6E3A49, 0x15F1472D5115669D,
+ 0x2EA0B4C8BF69D318, 0xDFE87070AA545503, 0x6B4C14B5F7144AB9, 0xC1ED49C06126551A,
+ 0x351919FC425C3899, 0x7B569C0FA6F1BD3E, 0x713AC2350844CFFD, 0xE9367F9A638C2FF3,
+ 0x97F17D325AEA0786, 0xBCB907CC6CF75F91, 0x0CB7517DAF247719, 0xBE16093CC45BE8A9,
+ 0x786EEE97359AD6AB, 0xB7AFA4F326B97E78, 0x2694B67FE23E502E, 0x4CB492826E98E0B4,
+ 0x838D119F74A416C7, 0x70D6A91E4E5677FD, 0xF3E4027AD30000E6, 0x9BDF692795807F77,
+ 0x6A371F966E034A54, 0x8789CF41AE4D67EF, 0x02688755484D60AE, 0xD5834B3A4BF5CE42,
+ 0x9405FC61440DE25D, 0x35EB280A157979B6, 0x48D40D6A525297AC, 0x6A87DC185054BADA
+};
+
+/* *INDENT-ON* */
+/* clang-format on */
+
+__cold int t1ha_selfcheck__t1ha2_atonce(void) {
+ return t1ha_selfcheck(t1ha2_atonce, t1ha_refval_2atonce);
+}
+
+__cold static uint64_t thunk_atonce128(const void *data, size_t len,
+ uint64_t seed) {
+ uint64_t unused;
+ return t1ha2_atonce128(&unused, data, len, seed);
+}
+
+__cold int t1ha_selfcheck__t1ha2_atonce128(void) {
+ return t1ha_selfcheck(thunk_atonce128, t1ha_refval_2atonce128);
+}
+
+__cold static uint64_t thunk_stream(const void *data, size_t len,
+ uint64_t seed) {
+ t1ha_context_t ctx;
+ t1ha2_init(&ctx, seed, seed);
+ t1ha2_update(&ctx, data, len);
+ return t1ha2_final(&ctx, NULL);
+}
+
+__cold static uint64_t thunk_stream128(const void *data, size_t len,
+ uint64_t seed) {
+ uint64_t unused;
+ t1ha_context_t ctx;
+ t1ha2_init(&ctx, seed, seed);
+ t1ha2_update(&ctx, data, len);
+ return t1ha2_final(&ctx, &unused);
+}
+
+__cold int t1ha_selfcheck__t1ha2_stream(void) {
+ return t1ha_selfcheck(thunk_stream, t1ha_refval_2stream) |
+ t1ha_selfcheck(thunk_stream128, t1ha_refval_2stream128);
+}
+
+__cold int t1ha_selfcheck__t1ha2(void) {
+ return t1ha_selfcheck__t1ha2_atonce() | t1ha_selfcheck__t1ha2_atonce128() |
+ t1ha_selfcheck__t1ha2_stream();
+}
+
diff --git a/src/Crypto/t1ha_bits.h b/src/Crypto/t1ha_bits.h
new file mode 100644
index 00000000..b78c4129
--- /dev/null
+++ b/src/Crypto/t1ha_bits.h
@@ -0,0 +1,904 @@
+/*
+ * Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com,
+ * Fast Positive Hash.
+ *
+ * Portions Copyright (c) 2010-2018 Leonid Yuriev <leo@yuriev.ru>,
+ * The 1Hippeus project (t1h).
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgement in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" }
+ * by [Positive Technologies](https://www.ptsecurity.ru)
+ *
+ * Briefly, it is a 64-bit Hash Function:
+ * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64,
+ * but portable and without penalties it can run on any 64-bit CPU.
+ * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash
+ * and all others portable hash-functions (which do not use specific
+ * hardware tricks).
+ * 3. Not suitable for cryptography.
+ *
+ * The Future will Positive. Всё будет хорошо.
+ *
+ * ACKNOWLEDGEMENT:
+ * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев)
+ * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta!
+ */
+
+#pragma once
+
+#if defined(_MSC_VER)
+#pragma warning(disable : 4201) /* nameless struct/union */
+#if _MSC_VER > 1800
+#pragma warning(disable : 4464) /* relative include path contains '..' */
+#endif /* 1800 */
+#endif /* MSVC */
+#include "t1ha.h"
+
+#ifndef T1HA_USE_FAST_ONESHOT_READ
+/* Define it to 1 for little bit faster code.
+ * Unfortunately this may triggering a false-positive alarms from Valgrind,
+ * AddressSanitizer and other similar tool.
+ * So, define it to 0 for calmness if doubt. */
+#define T1HA_USE_FAST_ONESHOT_READ 1
+#endif /* T1HA_USE_FAST_ONESHOT_READ */
+
+/*****************************************************************************/
+
+#include <assert.h> /* for assert() */
+#include <string.h> /* for memcpy() */
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ && \
+ __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
+#error Unsupported byte order.
+#endif
+
+#define T1HA_UNALIGNED_ACCESS__UNABLE 0
+#define T1HA_UNALIGNED_ACCESS__SLOW 1
+#define T1HA_UNALIGNED_ACCESS__EFFICIENT 2
+
+#ifndef T1HA_SYS_UNALIGNED_ACCESS
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT
+#elif defined(__ia32__)
+#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT
+#elif defined(__e2k__)
+#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__SLOW
+#elif defined(__ARM_FEATURE_UNALIGNED)
+#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT
+#else
+#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__UNABLE
+#endif
+#endif /* T1HA_SYS_UNALIGNED_ACCESS */
+
+#define ALIGNMENT_16 2
+#define ALIGNMENT_32 4
+#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul
+#define ALIGNMENT_64 8
+#else
+#define ALIGNMENT_64 4
+#endif
+
+#ifndef PAGESIZE
+#define PAGESIZE 4096
+#endif /* PAGESIZE */
+
+/***************************************************************************/
+
+#ifndef __has_builtin
+#define __has_builtin(x) (0)
+#endif
+
+#ifndef __has_warning
+#define __has_warning(x) (0)
+#endif
+
+#ifndef __has_feature
+#define __has_feature(x) (0)
+#endif
+
+#ifndef __has_extension
+#define __has_extension(x) (0)
+#endif
+
+#ifndef __has_attribute
+#define __has_attribute(x) (0)
+#endif
+
+#if __has_feature(address_sanitizer)
+#define __SANITIZE_ADDRESS__ 1
+#endif
+
+#ifndef __optimize
+#if defined(__clang__) && !__has_attribute(optimize)
+#define __optimize(ops)
+#elif defined(__GNUC__) || __has_attribute(optimize)
+#define __optimize(ops) __attribute__((optimize(ops)))
+#else
+#define __optimize(ops)
+#endif
+#endif /* __optimize */
+
+#ifndef __cold
+#if defined(__OPTIMIZE__)
+#if defined(__e2k__)
+#define __cold __optimize(1) __attribute__((cold))
+#elif defined(__clang__) && !__has_attribute(cold)
+/* just put infrequently used functions in separate section */
+#define __cold __attribute__((section("text.unlikely"))) __optimize("Os")
+#elif defined(__GNUC__) || __has_attribute(cold)
+#define __cold __attribute__((cold)) __optimize("Os")
+#else
+#define __cold __optimize("Os")
+#endif
+#else
+#define __cold
+#endif
+#endif /* __cold */
+
+
+#if defined(_MSC_VER)
+
+#pragma warning(push, 1)
+
+#include <stdlib.h>
+#define likely(cond) (cond)
+#define unlikely(cond) (cond)
+#define unreachable() __assume(0)
+#define bswap64(v) byteswap_64(v)
+#define bswap32(v) byteswap_32(v)
+#define bswap16(v) byteswap_16(v)
+#define rot64(v, s) rotr64(v, s)
+#define rot32(v, s) rotr32(v, s)
+#define __always_inline __forceinline
+
+#ifdef TC_WINDOWS_DRIVER
+#undef assert
+#define assert ASSERT
+#endif
+
+#if defined(_M_X64) || defined(_M_IA64)
+#pragma intrinsic(_umul128)
+#define mul_64x64_128(a, b, ph) _umul128(a, b, ph)
+#endif
+
+#if defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64)
+#pragma intrinsic(__umulh)
+#define mul_64x64_high(a, b) __umulh(a, b)
+#endif
+
+#pragma warning(pop)
+#pragma warning(disable : 4514) /* 'xyz': unreferenced inline function \
+ has been removed */
+#pragma warning(disable : 4710) /* 'xyz': function not inlined */
+#pragma warning(disable : 4711) /* function 'xyz' selected for \
+ automatic inline expansion */
+#pragma warning(disable : 4127) /* conditional expression is constant */
+#pragma warning(disable : 4702) /* unreachable code */
+
+#define __GNUC_PREREQ(a,b) 0
+#define UINT64_C(value) value ## ULL
+
+#endif /* Compiler */
+
+#ifndef likely
+#define likely(cond) (cond)
+#endif
+#ifndef unlikely
+#define unlikely(cond) (cond)
+#endif
+#ifndef __maybe_unused
+#define __maybe_unused
+#endif
+#ifndef __always_inline
+#define __always_inline __inline
+#endif
+#ifndef unreachable
+#define unreachable() \
+ do { \
+ } while (1)
+#endif
+
+
+
+#ifndef read_unaligned
+#if defined(__GNUC__) || __has_attribute(packed)
+typedef struct {
+ uint8_t unaligned_8;
+ uint16_t unaligned_16;
+ uint32_t unaligned_32;
+ uint64_t unaligned_64;
+} __attribute__((packed)) t1ha_unaligned_proxy;
+#define read_unaligned(ptr, bits) \
+ (((const t1ha_unaligned_proxy *)((const uint8_t *)(ptr)-offsetof( \
+ t1ha_unaligned_proxy, unaligned_##bits))) \
+ ->unaligned_##bits)
+#elif defined(_MSC_VER)
+#pragma warning( \
+ disable : 4235) /* nonstandard extension used: '__unaligned' \
+ * keyword not supported on this architecture */
+#define read_unaligned(ptr, bits) (*(const __unaligned uint##bits##_t *)(ptr))
+#else
+#pragma pack(push, 1)
+typedef struct {
+ uint8_t unaligned_8;
+ uint16_t unaligned_16;
+ uint32_t unaligned_32;
+ uint64_t unaligned_64;
+} t1ha_unaligned_proxy;
+#pragma pack(pop)
+#define read_unaligned(ptr, bits) \
+ (((const t1ha_unaligned_proxy *)((const uint8_t *)(ptr)-offsetof( \
+ t1ha_unaligned_proxy, unaligned_##bits))) \
+ ->unaligned_##bits)
+#endif
+#endif /* read_unaligned */
+
+#ifndef read_aligned
+#if __GNUC_PREREQ(4, 8) || __has_builtin(__builtin_assume_aligned)
+#define read_aligned(ptr, bits) \
+ (*(const uint##bits##_t *)__builtin_assume_aligned(ptr, ALIGNMENT_##bits))
+#elif (__GNUC_PREREQ(3, 3) || __has_attribute(aligned)) && !defined(__clang__)
+#define read_aligned(ptr, bits) \
+ (*(const uint##bits##_t __attribute__((aligned(ALIGNMENT_##bits))) *)(ptr))
+#elif __has_attribute(assume_aligned)
+
+static __always_inline const
+ uint16_t *__attribute__((assume_aligned(ALIGNMENT_16)))
+ cast_aligned_16(const void *ptr) {
+ return (const uint16_t *)ptr;
+}
+static __always_inline const
+ uint32_t *__attribute__((assume_aligned(ALIGNMENT_32)))
+ cast_aligned_32(const void *ptr) {
+ return (const uint32_t *)ptr;
+}
+static __always_inline const
+ uint64_t *__attribute__((assume_aligned(ALIGNMENT_64)))
+ cast_aligned_64(const void *ptr) {
+ return (const uint64_t *)ptr;
+}
+
+#define read_aligned(ptr, bits) (*cast_aligned_##bits(ptr))
+
+#elif defined(_MSC_VER)
+#define read_aligned(ptr, bits) \
+ (*(const __declspec(align(ALIGNMENT_##bits)) uint##bits##_t *)(ptr))
+#else
+#define read_aligned(ptr, bits) (*(const uint##bits##_t *)(ptr))
+#endif
+#endif /* read_aligned */
+
+#ifndef prefetch
+#if (__GNUC_PREREQ(4, 0) || __has_builtin(__builtin_prefetch)) && \
+ !defined(__ia32__)
+#define prefetch(ptr) __builtin_prefetch(ptr)
+#elif defined(_M_ARM64) || defined(_M_ARM)
+#define prefetch(ptr) __prefetch(ptr)
+#else
+#define prefetch(ptr) \
+ do { \
+ (void)(ptr); \
+ } while (0)
+#endif
+#endif /* prefetch */
+
+#if __has_warning("-Wconstant-logical-operand")
+#if defined(__clang__)
+#pragma clang diagnostic ignored "-Wconstant-logical-operand"
+#elif defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wconstant-logical-operand"
+#else
+#pragma warning disable "constant-logical-operand"
+#endif
+#endif /* -Wconstant-logical-operand */
+
+#if __has_warning("-Wtautological-pointer-compare")
+#if defined(__clang__)
+#pragma clang diagnostic ignored "-Wtautological-pointer-compare"
+#elif defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wtautological-pointer-compare"
+#else
+#pragma warning disable "tautological-pointer-compare"
+#endif
+#endif /* -Wtautological-pointer-compare */
+
+/***************************************************************************/
+
+#if __GNUC_PREREQ(4, 0)
+#pragma GCC visibility push(hidden)
+#endif /* __GNUC_PREREQ(4,0) */
+
+/*---------------------------------------------------------- Little Endian */
+
+#ifndef fetch16_le_aligned
+static __always_inline uint16_t fetch16_le_aligned(const void *v) {
+ assert(((uintptr_t)v) % ALIGNMENT_16 == 0);
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return read_aligned(v, 16);
+#else
+ return bswap16(read_aligned(v, 16));
+#endif
+}
+#endif /* fetch16_le_aligned */
+
+#ifndef fetch16_le_unaligned
+static __always_inline uint16_t fetch16_le_unaligned(const void *v) {
+#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE
+ const uint8_t *p = (const uint8_t *)v;
+ return p[0] | (uint16_t)p[1] << 8;
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return read_unaligned(v, 16);
+#else
+ return bswap16(read_unaligned(v, 16));
+#endif
+}
+#endif /* fetch16_le_unaligned */
+
+#ifndef fetch32_le_aligned
+static __always_inline uint32_t fetch32_le_aligned(const void *v) {
+ assert(((uintptr_t)v) % ALIGNMENT_32 == 0);
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return read_aligned(v, 32);
+#else
+ return bswap32(read_aligned(v, 32));
+#endif
+}
+#endif /* fetch32_le_aligned */
+
+#ifndef fetch32_le_unaligned
+static __always_inline uint32_t fetch32_le_unaligned(const void *v) {
+#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE
+ return fetch16_le_unaligned(v) |
+ (uint32_t)fetch16_le_unaligned((const uint8_t *)v + 2) << 16;
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return read_unaligned(v, 32);
+#else
+ return bswap32(read_unaligned(v, 32));
+#endif
+}
+#endif /* fetch32_le_unaligned */
+
+#ifndef fetch64_le_aligned
+static __always_inline uint64_t fetch64_le_aligned(const void *v) {
+ assert(((uintptr_t)v) % ALIGNMENT_64 == 0);
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return read_aligned(v, 64);
+#else
+ return bswap64(read_aligned(v, 64));
+#endif
+}
+#endif /* fetch64_le_aligned */
+
+#ifndef fetch64_le_unaligned
+static __always_inline uint64_t fetch64_le_unaligned(const void *v) {
+#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE
+ return fetch32_le_unaligned(v) |
+ (uint64_t)fetch32_le_unaligned((const uint8_t *)v + 4) << 32;
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return read_unaligned(v, 64);
+#else
+ return bswap64(read_unaligned(v, 64));
+#endif
+}
+#endif /* fetch64_le_unaligned */
+
+static __always_inline uint64_t tail64_le_aligned(const void *v, size_t tail) {
+ const uint8_t *const p = (const uint8_t *)v;
+#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__)
+ /* We can perform a 'oneshot' read, which is little bit faster. */
+ const unsigned shift = ((8 - tail) & 7) << 3;
+ return fetch64_le_aligned(p) & ((~UINT64_C(0)) >> shift);
+#else
+ uint64_t r = 0;
+ switch (tail & 7) {
+ default:
+ unreachable();
+/* fall through */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ /* For most CPUs this code is better when not needed byte reordering. */
+ case 0:
+ return fetch64_le_aligned(p);
+ case 7:
+ r = (uint64_t)p[6] << 8;
+ /* fall through */
+ case 6:
+ r += p[5];
+ r <<= 8;
+ /* fall through */
+ case 5:
+ r += p[4];
+ r <<= 32;
+ /* fall through */
+ case 4:
+ return r + fetch32_le_aligned(p);
+ case 3:
+ r = (uint64_t)p[2] << 16;
+ /* fall through */
+ case 2:
+ return r + fetch16_le_aligned(p);
+ case 1:
+ return p[0];
+#else
+ case 0:
+ r = p[7] << 8;
+ /* fall through */
+ case 7:
+ r += p[6];
+ r <<= 8;
+ /* fall through */
+ case 6:
+ r += p[5];
+ r <<= 8;
+ /* fall through */
+ case 5:
+ r += p[4];
+ r <<= 8;
+ /* fall through */
+ case 4:
+ r += p[3];
+ r <<= 8;
+ /* fall through */
+ case 3:
+ r += p[2];
+ r <<= 8;
+ /* fall through */
+ case 2:
+ r += p[1];
+ r <<= 8;
+ /* fall through */
+ case 1:
+ return r + p[0];
+#endif
+ }
+#endif /* T1HA_USE_FAST_ONESHOT_READ */
+}
+
+#if T1HA_USE_FAST_ONESHOT_READ && \
+ T1HA_SYS_UNALIGNED_ACCESS != T1HA_UNALIGNED_ACCESS__UNABLE && \
+ defined(PAGESIZE) && PAGESIZE > 42 && !defined(__SANITIZE_ADDRESS__)
+#define can_read_underside(ptr, size) \
+ (((PAGESIZE - (size)) & (uintptr_t)(ptr)) != 0)
+#endif /* T1HA_USE_FAST_ONESHOT_READ */
+
+static __always_inline uint64_t tail64_le_unaligned(const void *v,
+ size_t tail) {
+ const uint8_t *p = (const uint8_t *)v;
+#if defined(can_read_underside) && \
+ (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul)
+ /* On some systems (e.g. x86_64) we can perform a 'oneshot' read, which
+ * is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com>
+ * for the reminder. */
+ const unsigned offset = (8 - tail) & 7;
+ const unsigned shift = offset << 3;
+ if (likely(can_read_underside(p, 8))) {
+ p -= offset;
+ return fetch64_le_unaligned(p) >> shift;
+ }
+ return fetch64_le_unaligned(p) & ((~UINT64_C(0)) >> shift);
+#else
+ uint64_t r = 0;
+ switch (tail & 7) {
+ default:
+ unreachable();
+/* fall through */
+#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \
+ __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ /* For most CPUs this code is better when not needed
+ * copying for alignment or byte reordering. */
+ case 0:
+ return fetch64_le_unaligned(p);
+ case 7:
+ r = (uint64_t)p[6] << 8;
+ /* fall through */
+ case 6:
+ r += p[5];
+ r <<= 8;
+ /* fall through */
+ case 5:
+ r += p[4];
+ r <<= 32;
+ /* fall through */
+ case 4:
+ return r + fetch32_le_unaligned(p);
+ case 3:
+ r = (uint64_t)p[2] << 16;
+ /* fall through */
+ case 2:
+ return r + fetch16_le_unaligned(p);
+ case 1:
+ return p[0];
+#else
+ /* For most CPUs this code is better than a
+ * copying for alignment and/or byte reordering. */
+ case 0:
+ r = p[7] << 8;
+ /* fall through */
+ case 7:
+ r += p[6];
+ r <<= 8;
+ /* fall through */
+ case 6:
+ r += p[5];
+ r <<= 8;
+ /* fall through */
+ case 5:
+ r += p[4];
+ r <<= 8;
+ /* fall through */
+ case 4:
+ r += p[3];
+ r <<= 8;
+ /* fall through */
+ case 3:
+ r += p[2];
+ r <<= 8;
+ /* fall through */
+ case 2:
+ r += p[1];
+ r <<= 8;
+ /* fall through */
+ case 1:
+ return r + p[0];
+#endif
+ }
+#endif /* can_read_underside */
+}
+
+/*------------------------------------------------------------- Big Endian */
+
+#ifndef fetch16_be_aligned
+static __maybe_unused __always_inline uint16_t
+fetch16_be_aligned(const void *v) {
+ assert(((uintptr_t)v) % ALIGNMENT_16 == 0);
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return read_aligned(v, 16);
+#else
+ return bswap16(read_aligned(v, 16));
+#endif
+}
+#endif /* fetch16_be_aligned */
+
+#ifndef fetch16_be_unaligned
+static __maybe_unused __always_inline uint16_t
+fetch16_be_unaligned(const void *v) {
+#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE
+ const uint8_t *p = (const uint8_t *)v;
+ return (uint16_t)p[0] << 8 | p[1];
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return read_unaligned(v, 16);
+#else
+ return bswap16(read_unaligned(v, 16));
+#endif
+}
+#endif /* fetch16_be_unaligned */
+
+#ifndef fetch32_be_aligned
+static __maybe_unused __always_inline uint32_t
+fetch32_be_aligned(const void *v) {
+ assert(((uintptr_t)v) % ALIGNMENT_32 == 0);
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return read_aligned(v, 32);
+#else
+ return bswap32(read_aligned(v, 32));
+#endif
+}
+#endif /* fetch32_be_aligned */
+
+#ifndef fetch32_be_unaligned
+static __maybe_unused __always_inline uint32_t
+fetch32_be_unaligned(const void *v) {
+#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE
+ return (uint32_t)fetch16_be_unaligned(v) << 16 |
+ fetch16_be_unaligned((const uint8_t *)v + 2);
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return read_unaligned(v, 32);
+#else
+ return bswap32(read_unaligned(v, 32));
+#endif
+}
+#endif /* fetch32_be_unaligned */
+
+#ifndef fetch64_be_aligned
+static __maybe_unused __always_inline uint64_t
+fetch64_be_aligned(const void *v) {
+ assert(((uintptr_t)v) % ALIGNMENT_64 == 0);
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return read_aligned(v, 64);
+#else
+ return bswap64(read_aligned(v, 64));
+#endif
+}
+#endif /* fetch64_be_aligned */
+
+#ifndef fetch64_be_unaligned
+static __maybe_unused __always_inline uint64_t
+fetch64_be_unaligned(const void *v) {
+#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE
+ return (uint64_t)fetch32_be_unaligned(v) << 32 |
+ fetch32_be_unaligned((const uint8_t *)v + 4);
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return read_unaligned(v, 64);
+#else
+ return bswap64(read_unaligned(v, 64));
+#endif
+}
+#endif /* fetch64_be_unaligned */
+
+static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v,
+ size_t tail) {
+ const uint8_t *const p = (const uint8_t *)v;
+#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__)
+ /* We can perform a 'oneshot' read, which is little bit faster. */
+ const unsigned shift = ((8 - tail) & 7) << 3;
+ return fetch64_be_aligned(p) >> shift;
+#else
+ switch (tail & 7) {
+ default:
+ unreachable();
+/* fall through */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ /* For most CPUs this code is better when not byte reordering. */
+ case 1:
+ return p[0];
+ case 2:
+ return fetch16_be_aligned(p);
+ case 3:
+ return (uint32_t)fetch16_be_aligned(p) << 8 | p[2];
+ case 4:
+ return fetch32_be_aligned(p);
+ case 5:
+ return (uint64_t)fetch32_be_aligned(p) << 8 | p[4];
+ case 6:
+ return (uint64_t)fetch32_be_aligned(p) << 16 | fetch16_be_aligned(p + 4);
+ case 7:
+ return (uint64_t)fetch32_be_aligned(p) << 24 |
+ (uint32_t)fetch16_be_aligned(p + 4) << 8 | p[6];
+ case 0:
+ return fetch64_be_aligned(p);
+#else
+ case 1:
+ return p[0];
+ case 2:
+ return p[1] | (uint32_t)p[0] << 8;
+ case 3:
+ return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16;
+ case 4:
+ return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 |
+ (uint32_t)p[0] << 24;
+ case 5:
+ return p[4] | (uint32_t)p[3] << 8 | (uint32_t)p[2] << 16 |
+ (uint32_t)p[1] << 24 | (uint64_t)p[0] << 32;
+ case 6:
+ return p[5] | (uint32_t)p[4] << 8 | (uint32_t)p[3] << 16 |
+ (uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40;
+ case 7:
+ return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 |
+ (uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 |
+ (uint64_t)p[0] << 48;
+ case 0:
+ return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 |
+ (uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 |
+ (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56;
+#endif
+ }
+#endif /* T1HA_USE_FAST_ONESHOT_READ */
+}
+
+static __maybe_unused __always_inline uint64_t
+tail64_be_unaligned(const void *v, size_t tail) {
+ const uint8_t *p = (const uint8_t *)v;
+#if defined(can_read_underside) && \
+ (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul)
+ /* On some systems (e.g. x86_64) we can perform a 'oneshot' read, which
+ * is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com>
+ * for the reminder. */
+ const unsigned offset = (8 - tail) & 7;
+ const unsigned shift = offset << 3;
+ if (likely(can_read_underside(p, 8))) {
+ p -= offset;
+ return fetch64_be_unaligned(p) & ((~UINT64_C(0)) >> shift);
+ }
+ return fetch64_be_unaligned(p) >> shift;
+#else
+ switch (tail & 7) {
+ default:
+ unreachable();
+/* fall through */
+#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \
+ __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ /* For most CPUs this code is better when not needed
+ * copying for alignment or byte reordering. */
+ case 1:
+ return p[0];
+ case 2:
+ return fetch16_be_unaligned(p);
+ case 3:
+ return (uint32_t)fetch16_be_unaligned(p) << 8 | p[2];
+ case 4:
+ return fetch32_be(p);
+ case 5:
+ return (uint64_t)fetch32_be_unaligned(p) << 8 | p[4];
+ case 6:
+ return (uint64_t)fetch32_be_unaligned(p) << 16 |
+ fetch16_be_unaligned(p + 4);
+ case 7:
+ return (uint64_t)fetch32_be_unaligned(p) << 24 |
+ (uint32_t)fetch16_be_unaligned(p + 4) << 8 | p[6];
+ case 0:
+ return fetch64_be_unaligned(p);
+#else
+ /* For most CPUs this code is better than a
+ * copying for alignment and/or byte reordering. */
+ case 1:
+ return p[0];
+ case 2:
+ return p[1] | (uint32_t)p[0] << 8;
+ case 3:
+ return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16;
+ case 4:
+ return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 |
+ (uint32_t)p[0] << 24;
+ case 5:
+ return p[4] | (uint32_t)p[3] << 8 | (uint32_t)p[2] << 16 |
+ (uint32_t)p[1] << 24 | (uint64_t)p[0] << 32;
+ case 6:
+ return p[5] | (uint32_t)p[4] << 8 | (uint32_t)p[3] << 16 |
+ (uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40;
+ case 7:
+ return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 |
+ (uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 |
+ (uint64_t)p[0] << 48;
+ case 0:
+ return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 |
+ (uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 |
+ (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56;
+#endif
+ }
+#endif /* can_read_underside */
+}
+
+/***************************************************************************/
+
+#ifndef rot64
+static __always_inline uint64_t rot64(uint64_t v, unsigned s) {
+ return (v >> s) | (v << (64 - s));
+}
+#endif /* rot64 */
+
+#ifndef mul_32x32_64
+static __always_inline uint64_t mul_32x32_64(uint32_t a, uint32_t b) {
+ return a * (uint64_t)b;
+}
+#endif /* mul_32x32_64 */
+
+#ifndef add64carry_first
+static __maybe_unused __always_inline unsigned
+add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) {
+#if __has_builtin(__builtin_addcll)
+ unsigned long long carryout;
+ *sum = __builtin_addcll(base, addend, 0, &carryout);
+ return (unsigned)carryout;
+#else
+ *sum = base + addend;
+ return *sum < addend;
+#endif /* __has_builtin(__builtin_addcll) */
+}
+#endif /* add64carry_fist */
+
+#ifndef add64carry_next
+static __maybe_unused __always_inline unsigned
+add64carry_next(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) {
+#if __has_builtin(__builtin_addcll)
+ unsigned long long carryout;
+ *sum = __builtin_addcll(base, addend, carry, &carryout);
+ return (unsigned)carryout;
+#else
+ *sum = base + addend + carry;
+ return *sum < addend || (carry && *sum == addend);
+#endif /* __has_builtin(__builtin_addcll) */
+}
+#endif /* add64carry_next */
+
+#ifndef add64carry_last
+static __maybe_unused __always_inline void
+add64carry_last(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) {
+#if __has_builtin(__builtin_addcll)
+ unsigned long long carryout;
+ *sum = __builtin_addcll(base, addend, carry, &carryout);
+ (void)carryout;
+#else
+ *sum = base + addend + carry;
+#endif /* __has_builtin(__builtin_addcll) */
+}
+#endif /* add64carry_last */
+
+#ifndef mul_64x64_128
+static __maybe_unused __always_inline uint64_t mul_64x64_128(uint64_t a,
+ uint64_t b,
+ uint64_t *h) {
+#if defined(__SIZEOF_INT128__) || \
+ (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
+ __uint128_t r = (__uint128_t)a * (__uint128_t)b;
+ /* modern GCC could nicely optimize this */
+ *h = (uint64_t)(r >> 64);
+ return (uint64_t)r;
+#elif defined(mul_64x64_high)
+ *h = mul_64x64_high(a, b);
+ return a * b;
+#else
+ /* performs 64x64 to 128 bit multiplication */
+ const uint64_t ll = mul_32x32_64((uint32_t)a, (uint32_t)b);
+ const uint64_t lh = mul_32x32_64(a >> 32, (uint32_t)b);
+ const uint64_t hl = mul_32x32_64((uint32_t)a, b >> 32);
+ const uint64_t hh = mul_32x32_64(a >> 32, b >> 32);
+
+ /* Few simplification are possible here for 32-bit architectures,
+ * but thus we would lost compatibility with the original 64-bit
+ * version. Think is very bad idea, because then 32-bit t1ha will
+ * still (relatively) very slowly and well yet not compatible. */
+ uint64_t l;
+ add64carry_last(add64carry_first(ll, lh << 32, &l), hh, lh >> 32, h);
+ add64carry_last(add64carry_first(l, hl << 32, &l), *h, hl >> 32, h);
+ return l;
+#endif
+}
+#endif /* mul_64x64_128() */
+
+#ifndef mul_64x64_high
+static __maybe_unused __always_inline uint64_t mul_64x64_high(uint64_t a,
+ uint64_t b) {
+ uint64_t h;
+ mul_64x64_128(a, b, &h);
+ return h;
+}
+#endif /* mul_64x64_high */
+
+/***************************************************************************/
+
+/* 'magic' primes */
+static const uint64_t prime_0 = UINT64_C(0xEC99BF0D8372CAAB);
+static const uint64_t prime_1 = UINT64_C(0x82434FE90EDCEF39);
+static const uint64_t prime_2 = UINT64_C(0xD4F06DB99D67BE4B);
+static const uint64_t prime_3 = UINT64_C(0xBD9CACC22C6E9571);
+static const uint64_t prime_4 = UINT64_C(0x9C06FAF4D023E3AB);
+static const uint64_t prime_5 = UINT64_C(0xC060724A8424F345);
+static const uint64_t prime_6 = UINT64_C(0xCB5AF53AE3AAAC31);
+
+/* xor high and low parts of full 128-bit product */
+static __maybe_unused __always_inline uint64_t mux64(uint64_t v,
+ uint64_t prime) {
+ uint64_t l, h;
+ l = mul_64x64_128(v, prime, &h);
+ return l ^ h;
+}
+
+static __always_inline uint64_t final64(uint64_t a, uint64_t b) {
+ uint64_t x = (a + rot64(b, 41)) * prime_0;
+ uint64_t y = (rot64(a, 23) + b) * prime_6;
+ return mux64(x ^ y, prime_5);
+}
+
+static __always_inline void mixup64(uint64_t *__restrict a,
+ uint64_t *__restrict b, uint64_t v,
+ uint64_t prime) {
+ uint64_t h;
+ *a ^= mul_64x64_128(*b + v, prime, &h);
+ *b += h;
+}
diff --git a/src/Crypto/t1ha_selfcheck.c b/src/Crypto/t1ha_selfcheck.c
new file mode 100644
index 00000000..51d02912
--- /dev/null
+++ b/src/Crypto/t1ha_selfcheck.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com,
+ * Fast Positive Hash.
+ *
+ * Portions Copyright (c) 2010-2018 Leonid Yuriev <leo@yuriev.ru>,
+ * The 1Hippeus project (t1h).
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgement in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" }
+ * by [Positive Technologies](https://www.ptsecurity.ru)
+ *
+ * Briefly, it is a 64-bit Hash Function:
+ * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64,
+ * but portable and without penalties it can run on any 64-bit CPU.
+ * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash
+ * and all others portable hash-functions (which do not use specific
+ * hardware tricks).
+ * 3. Not suitable for cryptography.
+ *
+ * The Future will Positive. Всё будет хорошо.
+ *
+ * ACKNOWLEDGEMENT:
+ * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев)
+ * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta!
+ */
+
+#include "t1ha_selfcheck.h"
+#include "t1ha_bits.h"
+
+const uint8_t t1ha_test_pattern[64] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x7F, 0x3F,
+ 0x1F, 0xF, 8, 16, 32, 64, 0x80, 0xFE, 0xFC, 0xF8, 0xF0,
+ 0xE0, 0xC0, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x55, 0xAA, 11,
+ 17, 19, 23, 29, 37, 42, 43, 'a', 'b', 'c', 'd',
+ 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x'};
+
+static VC_INLINE int probe(uint64_t (*hash)(const void *, size_t, uint64_t),
+ const uint64_t reference, const void *data,
+ unsigned len, uint64_t seed) {
+ const uint64_t actual = hash(data, len, seed);
+ assert(actual == reference);
+ return actual != reference;
+}
+
+__cold int t1ha_selfcheck(uint64_t (*hash)(const void *, size_t, uint64_t),
+ const uint64_t *reference_values) {
+ int failed = 0;
+ uint64_t seed = 1;
+ const uint64_t zero = 0;
+ uint8_t pattern_long[512];
+ int i;
+ failed |= probe(hash, /* empty-zero */ *reference_values++, NULL, 0, zero);
+ failed |= probe(hash, /* empty-all1 */ *reference_values++, NULL, 0, ~zero);
+ failed |= probe(hash, /* bin64-zero */ *reference_values++, t1ha_test_pattern,
+ 64, zero);
+
+ for (i = 1; i < 64; i++) {
+ /* bin%i-1p%i */
+ failed |= probe(hash, *reference_values++, t1ha_test_pattern, i, seed);
+ seed <<= 1;
+ }
+
+ seed = ~zero;
+ for (i = 1; i <= 7; i++) {
+ seed <<= 1;
+ /* align%i_F%i */;
+ failed |=
+ probe(hash, *reference_values++, t1ha_test_pattern + i, 64 - i, seed);
+ }
+
+
+ for (i = 0; i < sizeof(pattern_long); ++i)
+ pattern_long[i] = (uint8_t)i;
+ for (i = 0; i <= 7; i++) {
+ /* long-%05i */
+ failed |=
+ probe(hash, *reference_values++, pattern_long + i, 128 + i * 17, seed);
+ }
+
+ return failed ? -1 : 0;
+}
diff --git a/src/Crypto/t1ha_selfcheck.h b/src/Crypto/t1ha_selfcheck.h
new file mode 100644
index 00000000..943bf2d2
--- /dev/null
+++ b/src/Crypto/t1ha_selfcheck.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com,
+ * Fast Positive Hash.
+ *
+ * Portions Copyright (c) 2010-2018 Leonid Yuriev <leo@yuriev.ru>,
+ * The 1Hippeus project (t1h).
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgement in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" }
+ * by [Positive Technologies](https://www.ptsecurity.ru)
+ *
+ * Briefly, it is a 64-bit Hash Function:
+ * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64,
+ * but portable and without penalties it can run on any 64-bit CPU.
+ * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash
+ * and all others portable hash-functions (which do not use specific
+ * hardware tricks).
+ * 3. Not suitable for cryptography.
+ *
+ * The Future will Positive. Всё будет хорошо.
+ *
+ * ACKNOWLEDGEMENT:
+ * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев)
+ * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta!
+ */
+
+#pragma once
+#if defined(_MSC_VER) && _MSC_VER > 1800
+#pragma warning(disable : 4464) /* relative include path contains '..' */
+#endif /* MSVC */
+#include "t1ha.h"
+
+/***************************************************************************/
+/* Self-checking */
+
+extern const uint8_t t1ha_test_pattern[64];
+int t1ha_selfcheck(uint64_t (*hash)(const void *, size_t, uint64_t),
+ const uint64_t *reference_values);
+
+#ifndef T1HA2_DISABLED
+extern const uint64_t t1ha_refval_2atonce[81];
+extern const uint64_t t1ha_refval_2atonce128[81];
+extern const uint64_t t1ha_refval_2stream[81];
+extern const uint64_t t1ha_refval_2stream128[81];
+#endif /* T1HA2_DISABLED */
+
+#ifndef T1HA1_DISABLED
+extern const uint64_t t1ha_refval_64le[81];
+extern const uint64_t t1ha_refval_64be[81];
+#endif /* T1HA1_DISABLED */
+
+#ifndef T1HA0_DISABLED
+extern const uint64_t t1ha_refval_32le[81];
+extern const uint64_t t1ha_refval_32be[81];
+#if T1HA0_AESNI_AVAILABLE
+extern const uint64_t t1ha_refval_ia32aes_a[81];
+extern const uint64_t t1ha_refval_ia32aes_b[81];
+#endif /* T1HA0_AESNI_AVAILABLE */
+#endif /* T1HA0_DISABLED */