From fc37cc4a02ed13d1a73b941a9f80975600fd1b99 Mon Sep 17 00:00:00 2001 From: David Foerster Date: Tue, 10 May 2016 20:20:14 +0200 Subject: Normalize all line terminators --- src/Crypto/Aes.h | 430 +++---- src/Crypto/AesSmall.c | 1906 ++++++++++++++-------------- src/Crypto/AesSmall.h | 338 ++--- src/Crypto/AesSmall_x86.asm | 2888 +++++++++++++++++++++---------------------- src/Crypto/Aes_hw_cpu.asm | 660 +++++----- src/Crypto/Aes_hw_cpu.h | 48 +- src/Crypto/Aes_x64.asm | 1814 +++++++++++++-------------- src/Crypto/Aes_x86.asm | 1292 +++++++++---------- src/Crypto/Aescrypt.c | 622 +++++----- src/Crypto/Aeskey.c | 1146 ++++++++--------- src/Crypto/Aesopt.h | 1468 +++++++++++----------- src/Crypto/Aestab.c | 856 ++++++------- src/Crypto/Aestab.h | 348 +++--- src/Crypto/Crypto.vcproj | 1034 ++++++++-------- src/Crypto/Makefile | 2 +- src/Crypto/Makefile.inc | 30 +- src/Crypto/Rmd160.c | 996 +++++++-------- src/Crypto/Rmd160.h | 66 +- src/Crypto/Serpent.c | 1876 ++++++++++++++-------------- src/Crypto/Serpent.h | 40 +- src/Crypto/Sha2.c | 1506 +++++++++++----------- src/Crypto/Sha2.h | 310 ++--- src/Crypto/Sha2Small.c | 468 +++---- src/Crypto/Sha2Small.h | 26 +- src/Crypto/Sources | 40 +- src/Crypto/Twofish.c | 1098 ++++++++-------- src/Crypto/Twofish.h | 112 +- src/Crypto/Whirlpool.h | 54 +- src/Crypto/cpu.c | 462 +++---- src/Crypto/cpu.h | 616 ++++----- 30 files changed, 11276 insertions(+), 11276 deletions(-) (limited to 'src/Crypto') diff --git a/src/Crypto/Aes.h b/src/Crypto/Aes.h index 7a1eff47..e12c6fc8 100644 --- a/src/Crypto/Aes.h +++ b/src/Crypto/Aes.h @@ -1,215 +1,215 @@ -/* - --------------------------------------------------------------------------- - Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. - - LICENSE TERMS - - The free distribution and use of this software is allowed (with or without - changes) provided that: - - 1. source code distributions include the above copyright notice, this - list of conditions and the following disclaimer; - - 2. binary distributions include the above copyright notice, this list - of conditions and the following disclaimer in their documentation; - - 3. the name of the copyright holder is not used to endorse products - built using this software without specific written permission. - - DISCLAIMER - - This software is provided 'as is' with no explicit or implied warranties - in respect of its properties, including, but not limited to, correctness - and/or fitness for purpose. - --------------------------------------------------------------------------- - Issue Date: 20/12/2007 - - This file contains the definitions required to use AES in C. See aesopt.h - for optimisation details. -*/ - -/* Adapted for TrueCrypt */ - -#ifndef _AES_H -#define _AES_H - -#include "Common/Tcdefs.h" - -#ifndef EXIT_SUCCESS -#define EXIT_SUCCESS 0 -#define EXIT_FAILURE 1 -#endif -#define INT_RETURN int - -#if defined(__cplusplus) -extern "C" -{ -#endif - -// #define AES_128 /* define if AES with 128 bit keys is needed */ -// #define AES_192 /* define if AES with 192 bit keys is needed */ -#define AES_256 /* define if AES with 256 bit keys is needed */ -// #define AES_VAR /* define if a variable key size is needed */ -// #define AES_MODES /* define if support is needed for modes */ - -/* The following must also be set in assembler files if being used */ - -#define AES_ENCRYPT /* if support for encryption is needed */ -#define AES_DECRYPT /* if support for decryption is needed */ -#define AES_ERR_CHK /* for parameter checks & error return codes */ -#define AES_REV_DKS /* define to reverse decryption key schedule */ - -#define AES_BLOCK_SIZE 16 /* the AES block size in bytes */ -#define N_COLS 4 /* the number of columns in the state */ - -/* The key schedule length is 11, 13 or 15 16-byte blocks for 128, */ -/* 192 or 256-bit keys respectively. That is 176, 208 or 240 bytes */ -/* or 44, 52 or 60 32-bit words. */ - -#if defined( AES_VAR ) || defined( AES_256 ) -#define KS_LENGTH 60 -#elif defined( AES_192 ) -#define KS_LENGTH 52 -#else -#define KS_LENGTH 44 -#endif - -#if defined( AES_ERR_CHK ) -#define AES_RETURN INT_RETURN -#else -#define AES_RETURN VOID_RETURN -#endif - -/* the character array 'inf' in the following structures is used */ -/* to hold AES context information. This AES code uses cx->inf.b[0] */ -/* to hold the number of rounds multiplied by 16. The other three */ -/* elements can be used by code that implements additional modes */ - -typedef union -{ uint_32t l; - uint_8t b[4]; -} aes_inf; - -typedef struct -{ uint_32t ks[KS_LENGTH]; - aes_inf inf; -} aes_encrypt_ctx; - -typedef struct -{ uint_32t ks[KS_LENGTH]; - aes_inf inf; -} aes_decrypt_ctx; - -/* This routine must be called before first use if non-static */ -/* tables are being used */ - -AES_RETURN aes_init(void); - -/* Key lengths in the range 16 <= key_len <= 32 are given in bytes, */ -/* those in the range 128 <= key_len <= 256 are given in bits */ - -#if defined( AES_ENCRYPT ) - -#if defined(AES_128) || defined(AES_VAR) -AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]); -#endif - -#if defined(AES_192) || defined(AES_VAR) -AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]); -#endif - -#if defined(AES_256) || defined(AES_VAR) -AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]); -#endif - -#if defined(AES_VAR) -AES_RETURN aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]); -#endif - -AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]); - -#endif - -#if defined( AES_DECRYPT ) - -#if defined(AES_128) || defined(AES_VAR) -AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]); -#endif - -#if defined(AES_192) || defined(AES_VAR) -AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]); -#endif - -#if defined(AES_256) || defined(AES_VAR) -AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]); -#endif - -#if defined(AES_VAR) -AES_RETURN aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]); -#endif - -AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]); - -#endif - -#if defined(AES_MODES) - -/* Multiple calls to the following subroutines for multiple block */ -/* ECB, CBC, CFB, OFB and CTR mode encryption can be used to handle */ -/* long messages incremantally provided that the context AND the iv */ -/* are preserved between all such calls. For the ECB and CBC modes */ -/* each individual call within a series of incremental calls must */ -/* process only full blocks (i.e. len must be a multiple of 16) but */ -/* the CFB, OFB and CTR mode calls can handle multiple incremental */ -/* calls of any length. Each mode is reset when a new AES key is */ -/* set but ECB and CBC operations can be reset without setting a */ -/* new key by setting a new IV value. To reset CFB, OFB and CTR */ -/* without setting the key, aes_mode_reset() must be called and the */ -/* IV must be set. NOTE: All these calls update the IV on exit so */ -/* this has to be reset if a new operation with the same IV as the */ -/* previous one is required (or decryption follows encryption with */ -/* the same IV array). */ - -AES_RETURN aes_test_alignment_detection(unsigned int n); - -AES_RETURN aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf, - int len, const aes_encrypt_ctx cx[1]); - -AES_RETURN aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf, - int len, const aes_decrypt_ctx cx[1]); - -AES_RETURN aes_cbc_encrypt(const unsigned char *ibuf, unsigned char *obuf, - int len, unsigned char *iv, const aes_encrypt_ctx cx[1]); - -AES_RETURN aes_cbc_decrypt(const unsigned char *ibuf, unsigned char *obuf, - int len, unsigned char *iv, const aes_decrypt_ctx cx[1]); - -AES_RETURN aes_mode_reset(aes_encrypt_ctx cx[1]); - -AES_RETURN aes_cfb_encrypt(const unsigned char *ibuf, unsigned char *obuf, - int len, unsigned char *iv, aes_encrypt_ctx cx[1]); - -AES_RETURN aes_cfb_decrypt(const unsigned char *ibuf, unsigned char *obuf, - int len, unsigned char *iv, aes_encrypt_ctx cx[1]); - -#define aes_ofb_encrypt aes_ofb_crypt -#define aes_ofb_decrypt aes_ofb_crypt - -AES_RETURN aes_ofb_crypt(const unsigned char *ibuf, unsigned char *obuf, - int len, unsigned char *iv, aes_encrypt_ctx cx[1]); - -typedef void cbuf_inc(unsigned char *cbuf); - -#define aes_ctr_encrypt aes_ctr_crypt -#define aes_ctr_decrypt aes_ctr_crypt - -AES_RETURN aes_ctr_crypt(const unsigned char *ibuf, unsigned char *obuf, - int len, unsigned char *cbuf, cbuf_inc ctr_inc, aes_encrypt_ctx cx[1]); - -#endif - -#if defined(__cplusplus) -} -#endif - -#endif +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + + This file contains the definitions required to use AES in C. See aesopt.h + for optimisation details. +*/ + +/* Adapted for TrueCrypt */ + +#ifndef _AES_H +#define _AES_H + +#include "Common/Tcdefs.h" + +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#define EXIT_FAILURE 1 +#endif +#define INT_RETURN int + +#if defined(__cplusplus) +extern "C" +{ +#endif + +// #define AES_128 /* define if AES with 128 bit keys is needed */ +// #define AES_192 /* define if AES with 192 bit keys is needed */ +#define AES_256 /* define if AES with 256 bit keys is needed */ +// #define AES_VAR /* define if a variable key size is needed */ +// #define AES_MODES /* define if support is needed for modes */ + +/* The following must also be set in assembler files if being used */ + +#define AES_ENCRYPT /* if support for encryption is needed */ +#define AES_DECRYPT /* if support for decryption is needed */ +#define AES_ERR_CHK /* for parameter checks & error return codes */ +#define AES_REV_DKS /* define to reverse decryption key schedule */ + +#define AES_BLOCK_SIZE 16 /* the AES block size in bytes */ +#define N_COLS 4 /* the number of columns in the state */ + +/* The key schedule length is 11, 13 or 15 16-byte blocks for 128, */ +/* 192 or 256-bit keys respectively. That is 176, 208 or 240 bytes */ +/* or 44, 52 or 60 32-bit words. */ + +#if defined( AES_VAR ) || defined( AES_256 ) +#define KS_LENGTH 60 +#elif defined( AES_192 ) +#define KS_LENGTH 52 +#else +#define KS_LENGTH 44 +#endif + +#if defined( AES_ERR_CHK ) +#define AES_RETURN INT_RETURN +#else +#define AES_RETURN VOID_RETURN +#endif + +/* the character array 'inf' in the following structures is used */ +/* to hold AES context information. This AES code uses cx->inf.b[0] */ +/* to hold the number of rounds multiplied by 16. The other three */ +/* elements can be used by code that implements additional modes */ + +typedef union +{ uint_32t l; + uint_8t b[4]; +} aes_inf; + +typedef struct +{ uint_32t ks[KS_LENGTH]; + aes_inf inf; +} aes_encrypt_ctx; + +typedef struct +{ uint_32t ks[KS_LENGTH]; + aes_inf inf; +} aes_decrypt_ctx; + +/* This routine must be called before first use if non-static */ +/* tables are being used */ + +AES_RETURN aes_init(void); + +/* Key lengths in the range 16 <= key_len <= 32 are given in bytes, */ +/* those in the range 128 <= key_len <= 256 are given in bits */ + +#if defined( AES_ENCRYPT ) + +#if defined(AES_128) || defined(AES_VAR) +AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]); +#endif + +#if defined(AES_192) || defined(AES_VAR) +AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]); +#endif + +#if defined(AES_256) || defined(AES_VAR) +AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]); +#endif + +#if defined(AES_VAR) +AES_RETURN aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]); +#endif + +AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]); + +#endif + +#if defined( AES_DECRYPT ) + +#if defined(AES_128) || defined(AES_VAR) +AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]); +#endif + +#if defined(AES_192) || defined(AES_VAR) +AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]); +#endif + +#if defined(AES_256) || defined(AES_VAR) +AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]); +#endif + +#if defined(AES_VAR) +AES_RETURN aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]); +#endif + +AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]); + +#endif + +#if defined(AES_MODES) + +/* Multiple calls to the following subroutines for multiple block */ +/* ECB, CBC, CFB, OFB and CTR mode encryption can be used to handle */ +/* long messages incremantally provided that the context AND the iv */ +/* are preserved between all such calls. For the ECB and CBC modes */ +/* each individual call within a series of incremental calls must */ +/* process only full blocks (i.e. len must be a multiple of 16) but */ +/* the CFB, OFB and CTR mode calls can handle multiple incremental */ +/* calls of any length. Each mode is reset when a new AES key is */ +/* set but ECB and CBC operations can be reset without setting a */ +/* new key by setting a new IV value. To reset CFB, OFB and CTR */ +/* without setting the key, aes_mode_reset() must be called and the */ +/* IV must be set. NOTE: All these calls update the IV on exit so */ +/* this has to be reset if a new operation with the same IV as the */ +/* previous one is required (or decryption follows encryption with */ +/* the same IV array). */ + +AES_RETURN aes_test_alignment_detection(unsigned int n); + +AES_RETURN aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf, + int len, const aes_encrypt_ctx cx[1]); + +AES_RETURN aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf, + int len, const aes_decrypt_ctx cx[1]); + +AES_RETURN aes_cbc_encrypt(const unsigned char *ibuf, unsigned char *obuf, + int len, unsigned char *iv, const aes_encrypt_ctx cx[1]); + +AES_RETURN aes_cbc_decrypt(const unsigned char *ibuf, unsigned char *obuf, + int len, unsigned char *iv, const aes_decrypt_ctx cx[1]); + +AES_RETURN aes_mode_reset(aes_encrypt_ctx cx[1]); + +AES_RETURN aes_cfb_encrypt(const unsigned char *ibuf, unsigned char *obuf, + int len, unsigned char *iv, aes_encrypt_ctx cx[1]); + +AES_RETURN aes_cfb_decrypt(const unsigned char *ibuf, unsigned char *obuf, + int len, unsigned char *iv, aes_encrypt_ctx cx[1]); + +#define aes_ofb_encrypt aes_ofb_crypt +#define aes_ofb_decrypt aes_ofb_crypt + +AES_RETURN aes_ofb_crypt(const unsigned char *ibuf, unsigned char *obuf, + int len, unsigned char *iv, aes_encrypt_ctx cx[1]); + +typedef void cbuf_inc(unsigned char *cbuf); + +#define aes_ctr_encrypt aes_ctr_crypt +#define aes_ctr_decrypt aes_ctr_crypt + +AES_RETURN aes_ctr_crypt(const unsigned char *ibuf, unsigned char *obuf, + int len, unsigned char *cbuf, cbuf_inc ctr_inc, aes_encrypt_ctx cx[1]); + +#endif + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/src/Crypto/AesSmall.c b/src/Crypto/AesSmall.c index 91c89873..10e7cf83 100644 --- a/src/Crypto/AesSmall.c +++ b/src/Crypto/AesSmall.c @@ -1,953 +1,953 @@ -/* - --------------------------------------------------------------------------- - Copyright (c) 1998-2006, Brian Gladman, Worcester, UK. All rights reserved. - - LICENSE TERMS - - The free distribution and use of this software is allowed (with or without - changes) provided that: - - 1. source code distributions include the above copyright notice, this - list of conditions and the following disclaimer; - - 2. binary distributions include the above copyright notice, this list - of conditions and the following disclaimer in their documentation; - - 3. the name of the copyright holder is not used to endorse products - built using this software without specific written permission. - - DISCLAIMER - - This software is provided 'as is' with no explicit or implied warranties - in respect of its properties, including, but not limited to, correctness - and/or fitness for purpose. - --------------------------------------------------------------------------- - Issue 09/09/2006 - - This is an AES implementation that uses only 8-bit byte operations on the - cipher state (there are options to use 32-bit types if available). - - The combination of mix columns and byte substitution used here is based on - that developed by Karl Malbrain. His contribution is acknowledged. - */ - -/* Adapted for TrueCrypt: - - Macro-generated tables were replaced with static data to enable compiling - with MSVC++ 1.5 which runs out of resources when expanding large macros. -*/ - -#pragma optimize ("t", on) - -/* define if you have a fast memcpy function on your system */ -#if 1 -# define HAVE_MEMCPY -# include -# if defined( _MSC_VER ) -# ifndef DEBUG -# pragma intrinsic( memcpy ) -# endif -# endif -#endif - -/* define if you have fast 32-bit types on your system */ -#if 1 -# define HAVE_UINT_32T -#endif - -/* alternative versions (test for performance on your system) */ -#if 0 -# define VERSION_1 -#endif - -#include "AesSmall.h" - -#define WPOLY 0x011b -#define DPOLY 0x008d -#define f1(x) (x) -#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY)) -#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY)) -#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) \ - ^ (((x>>5) & 4) * WPOLY)) -#define d2(x) (((x) >> 1) ^ ((x) & 1 ? DPOLY : 0)) - -#define f3(x) (f2(x) ^ x) -#define f9(x) (f8(x) ^ x) -#define fb(x) (f8(x) ^ f2(x) ^ x) -#define fd(x) (f8(x) ^ f4(x) ^ x) -#define fe(x) (f8(x) ^ f4(x) ^ f2(x)) - -static const uint_8t s_box[256] = { - 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5, - 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76, - 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0, - 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0, - 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc, - 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15, - 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a, - 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75, - 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0, - 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84, - 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b, - 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf, - 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85, - 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8, - 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5, - 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2, - 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17, - 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73, - 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88, - 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb, - 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c, - 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79, - 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9, - 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08, - 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6, - 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a, - 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e, - 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e, - 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94, - 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf, - 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68, - 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 -}; - -static const uint_8t inv_s_box[256] = { - 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38, - 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb, - 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87, - 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb, - 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d, - 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e, - 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2, - 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25, - 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16, - 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92, - 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda, - 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84, - 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a, - 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06, - 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02, - 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b, - 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea, - 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73, - 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85, - 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e, - 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89, - 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b, - 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20, - 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4, - 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31, - 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f, - 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d, - 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef, - 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0, - 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61, - 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26, - 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d -}; - -static const uint_8t gfm2_s_box[256] = { - 0xc6,0xf8,0xee,0xf6,0xff,0xd6,0xde,0x91, - 0x60,0x02,0xce,0x56,0xe7,0xb5,0x4d,0xec, - 0x8f,0x1f,0x89,0xfa,0xef,0xb2,0x8e,0xfb, - 0x41,0xb3,0x5f,0x45,0x23,0x53,0xe4,0x9b, - 0x75,0xe1,0x3d,0x4c,0x6c,0x7e,0xf5,0x83, - 0x68,0x51,0xd1,0xf9,0xe2,0xab,0x62,0x2a, - 0x08,0x95,0x46,0x9d,0x30,0x37,0x0a,0x2f, - 0x0e,0x24,0x1b,0xdf,0xcd,0x4e,0x7f,0xea, - 0x12,0x1d,0x58,0x34,0x36,0xdc,0xb4,0x5b, - 0xa4,0x76,0xb7,0x7d,0x52,0xdd,0x5e,0x13, - 0xa6,0xb9,0x00,0xc1,0x40,0xe3,0x79,0xb6, - 0xd4,0x8d,0x67,0x72,0x94,0x98,0xb0,0x85, - 0xbb,0xc5,0x4f,0xed,0x86,0x9a,0x66,0x11, - 0x8a,0xe9,0x04,0xfe,0xa0,0x78,0x25,0x4b, - 0xa2,0x5d,0x80,0x05,0x3f,0x21,0x70,0xf1, - 0x63,0x77,0xaf,0x42,0x20,0xe5,0xfd,0xbf, - 0x81,0x18,0x26,0xc3,0xbe,0x35,0x88,0x2e, - 0x93,0x55,0xfc,0x7a,0xc8,0xba,0x32,0xe6, - 0xc0,0x19,0x9e,0xa3,0x44,0x54,0x3b,0x0b, - 0x8c,0xc7,0x6b,0x28,0xa7,0xbc,0x16,0xad, - 0xdb,0x64,0x74,0x14,0x92,0x0c,0x48,0xb8, - 0x9f,0xbd,0x43,0xc4,0x39,0x31,0xd3,0xf2, - 0xd5,0x8b,0x6e,0xda,0x01,0xb1,0x9c,0x49, - 0xd8,0xac,0xf3,0xcf,0xca,0xf4,0x47,0x10, - 0x6f,0xf0,0x4a,0x5c,0x38,0x57,0x73,0x97, - 0xcb,0xa1,0xe8,0x3e,0x96,0x61,0x0d,0x0f, - 0xe0,0x7c,0x71,0xcc,0x90,0x06,0xf7,0x1c, - 0xc2,0x6a,0xae,0x69,0x17,0x99,0x3a,0x27, - 0xd9,0xeb,0x2b,0x22,0xd2,0xa9,0x07,0x33, - 0x2d,0x3c,0x15,0xc9,0x87,0xaa,0x50,0xa5, - 0x03,0x59,0x09,0x1a,0x65,0xd7,0x84,0xd0, - 0x82,0x29,0x5a,0x1e,0x7b,0xa8,0x6d,0x2c -}; - -static const uint_8t gfm3_s_box[256] = { - 0xa5,0x84,0x99,0x8d,0x0d,0xbd,0xb1,0x54, - 0x50,0x03,0xa9,0x7d,0x19,0x62,0xe6,0x9a, - 0x45,0x9d,0x40,0x87,0x15,0xeb,0xc9,0x0b, - 0xec,0x67,0xfd,0xea,0xbf,0xf7,0x96,0x5b, - 0xc2,0x1c,0xae,0x6a,0x5a,0x41,0x02,0x4f, - 0x5c,0xf4,0x34,0x08,0x93,0x73,0x53,0x3f, - 0x0c,0x52,0x65,0x5e,0x28,0xa1,0x0f,0xb5, - 0x09,0x36,0x9b,0x3d,0x26,0x69,0xcd,0x9f, - 0x1b,0x9e,0x74,0x2e,0x2d,0xb2,0xee,0xfb, - 0xf6,0x4d,0x61,0xce,0x7b,0x3e,0x71,0x97, - 0xf5,0x68,0x00,0x2c,0x60,0x1f,0xc8,0xed, - 0xbe,0x46,0xd9,0x4b,0xde,0xd4,0xe8,0x4a, - 0x6b,0x2a,0xe5,0x16,0xc5,0xd7,0x55,0x94, - 0xcf,0x10,0x06,0x81,0xf0,0x44,0xba,0xe3, - 0xf3,0xfe,0xc0,0x8a,0xad,0xbc,0x48,0x04, - 0xdf,0xc1,0x75,0x63,0x30,0x1a,0x0e,0x6d, - 0x4c,0x14,0x35,0x2f,0xe1,0xa2,0xcc,0x39, - 0x57,0xf2,0x82,0x47,0xac,0xe7,0x2b,0x95, - 0xa0,0x98,0xd1,0x7f,0x66,0x7e,0xab,0x83, - 0xca,0x29,0xd3,0x3c,0x79,0xe2,0x1d,0x76, - 0x3b,0x56,0x4e,0x1e,0xdb,0x0a,0x6c,0xe4, - 0x5d,0x6e,0xef,0xa6,0xa8,0xa4,0x37,0x8b, - 0x32,0x43,0x59,0xb7,0x8c,0x64,0xd2,0xe0, - 0xb4,0xfa,0x07,0x25,0xaf,0x8e,0xe9,0x18, - 0xd5,0x88,0x6f,0x72,0x24,0xf1,0xc7,0x51, - 0x23,0x7c,0x9c,0x21,0xdd,0xdc,0x86,0x85, - 0x90,0x42,0xc4,0xaa,0xd8,0x05,0x01,0x12, - 0xa3,0x5f,0xf9,0xd0,0x91,0x58,0x27,0xb9, - 0x38,0x13,0xb3,0x33,0xbb,0x70,0x89,0xa7, - 0xb6,0x22,0x92,0x20,0x49,0xff,0x78,0x7a, - 0x8f,0xf8,0x80,0x17,0xda,0x31,0xc6,0xb8, - 0xc3,0xb0,0x77,0x11,0xcb,0xfc,0xd6,0x3a -}; - -static const uint_8t gfmul_9[256] = { - 0x00,0x09,0x12,0x1b,0x24,0x2d,0x36,0x3f, - 0x48,0x41,0x5a,0x53,0x6c,0x65,0x7e,0x77, - 0x90,0x99,0x82,0x8b,0xb4,0xbd,0xa6,0xaf, - 0xd8,0xd1,0xca,0xc3,0xfc,0xf5,0xee,0xe7, - 0x3b,0x32,0x29,0x20,0x1f,0x16,0x0d,0x04, - 0x73,0x7a,0x61,0x68,0x57,0x5e,0x45,0x4c, - 0xab,0xa2,0xb9,0xb0,0x8f,0x86,0x9d,0x94, - 0xe3,0xea,0xf1,0xf8,0xc7,0xce,0xd5,0xdc, - 0x76,0x7f,0x64,0x6d,0x52,0x5b,0x40,0x49, - 0x3e,0x37,0x2c,0x25,0x1a,0x13,0x08,0x01, - 0xe6,0xef,0xf4,0xfd,0xc2,0xcb,0xd0,0xd9, - 0xae,0xa7,0xbc,0xb5,0x8a,0x83,0x98,0x91, - 0x4d,0x44,0x5f,0x56,0x69,0x60,0x7b,0x72, - 0x05,0x0c,0x17,0x1e,0x21,0x28,0x33,0x3a, - 0xdd,0xd4,0xcf,0xc6,0xf9,0xf0,0xeb,0xe2, - 0x95,0x9c,0x87,0x8e,0xb1,0xb8,0xa3,0xaa, - 0xec,0xe5,0xfe,0xf7,0xc8,0xc1,0xda,0xd3, - 0xa4,0xad,0xb6,0xbf,0x80,0x89,0x92,0x9b, - 0x7c,0x75,0x6e,0x67,0x58,0x51,0x4a,0x43, - 0x34,0x3d,0x26,0x2f,0x10,0x19,0x02,0x0b, - 0xd7,0xde,0xc5,0xcc,0xf3,0xfa,0xe1,0xe8, - 0x9f,0x96,0x8d,0x84,0xbb,0xb2,0xa9,0xa0, - 0x47,0x4e,0x55,0x5c,0x63,0x6a,0x71,0x78, - 0x0f,0x06,0x1d,0x14,0x2b,0x22,0x39,0x30, - 0x9a,0x93,0x88,0x81,0xbe,0xb7,0xac,0xa5, - 0xd2,0xdb,0xc0,0xc9,0xf6,0xff,0xe4,0xed, - 0x0a,0x03,0x18,0x11,0x2e,0x27,0x3c,0x35, - 0x42,0x4b,0x50,0x59,0x66,0x6f,0x74,0x7d, - 0xa1,0xa8,0xb3,0xba,0x85,0x8c,0x97,0x9e, - 0xe9,0xe0,0xfb,0xf2,0xcd,0xc4,0xdf,0xd6, - 0x31,0x38,0x23,0x2a,0x15,0x1c,0x07,0x0e, - 0x79,0x70,0x6b,0x62,0x5d,0x54,0x4f,0x46 -}; - -static const uint_8t gfmul_b[256] = { - 0x00,0x0b,0x16,0x1d,0x2c,0x27,0x3a,0x31, - 0x58,0x53,0x4e,0x45,0x74,0x7f,0x62,0x69, - 0xb0,0xbb,0xa6,0xad,0x9c,0x97,0x8a,0x81, - 0xe8,0xe3,0xfe,0xf5,0xc4,0xcf,0xd2,0xd9, - 0x7b,0x70,0x6d,0x66,0x57,0x5c,0x41,0x4a, - 0x23,0x28,0x35,0x3e,0x0f,0x04,0x19,0x12, - 0xcb,0xc0,0xdd,0xd6,0xe7,0xec,0xf1,0xfa, - 0x93,0x98,0x85,0x8e,0xbf,0xb4,0xa9,0xa2, - 0xf6,0xfd,0xe0,0xeb,0xda,0xd1,0xcc,0xc7, - 0xae,0xa5,0xb8,0xb3,0x82,0x89,0x94,0x9f, - 0x46,0x4d,0x50,0x5b,0x6a,0x61,0x7c,0x77, - 0x1e,0x15,0x08,0x03,0x32,0x39,0x24,0x2f, - 0x8d,0x86,0x9b,0x90,0xa1,0xaa,0xb7,0xbc, - 0xd5,0xde,0xc3,0xc8,0xf9,0xf2,0xef,0xe4, - 0x3d,0x36,0x2b,0x20,0x11,0x1a,0x07,0x0c, - 0x65,0x6e,0x73,0x78,0x49,0x42,0x5f,0x54, - 0xf7,0xfc,0xe1,0xea,0xdb,0xd0,0xcd,0xc6, - 0xaf,0xa4,0xb9,0xb2,0x83,0x88,0x95,0x9e, - 0x47,0x4c,0x51,0x5a,0x6b,0x60,0x7d,0x76, - 0x1f,0x14,0x09,0x02,0x33,0x38,0x25,0x2e, - 0x8c,0x87,0x9a,0x91,0xa0,0xab,0xb6,0xbd, - 0xd4,0xdf,0xc2,0xc9,0xf8,0xf3,0xee,0xe5, - 0x3c,0x37,0x2a,0x21,0x10,0x1b,0x06,0x0d, - 0x64,0x6f,0x72,0x79,0x48,0x43,0x5e,0x55, - 0x01,0x0a,0x17,0x1c,0x2d,0x26,0x3b,0x30, - 0x59,0x52,0x4f,0x44,0x75,0x7e,0x63,0x68, - 0xb1,0xba,0xa7,0xac,0x9d,0x96,0x8b,0x80, - 0xe9,0xe2,0xff,0xf4,0xc5,0xce,0xd3,0xd8, - 0x7a,0x71,0x6c,0x67,0x56,0x5d,0x40,0x4b, - 0x22,0x29,0x34,0x3f,0x0e,0x05,0x18,0x13, - 0xca,0xc1,0xdc,0xd7,0xe6,0xed,0xf0,0xfb, - 0x92,0x99,0x84,0x8f,0xbe,0xb5,0xa8,0xa3 -}; - -static const uint_8t gfmul_d[256] = { - 0x00,0x0d,0x1a,0x17,0x34,0x39,0x2e,0x23, - 0x68,0x65,0x72,0x7f,0x5c,0x51,0x46,0x4b, - 0xd0,0xdd,0xca,0xc7,0xe4,0xe9,0xfe,0xf3, - 0xb8,0xb5,0xa2,0xaf,0x8c,0x81,0x96,0x9b, - 0xbb,0xb6,0xa1,0xac,0x8f,0x82,0x95,0x98, - 0xd3,0xde,0xc9,0xc4,0xe7,0xea,0xfd,0xf0, - 0x6b,0x66,0x71,0x7c,0x5f,0x52,0x45,0x48, - 0x03,0x0e,0x19,0x14,0x37,0x3a,0x2d,0x20, - 0x6d,0x60,0x77,0x7a,0x59,0x54,0x43,0x4e, - 0x05,0x08,0x1f,0x12,0x31,0x3c,0x2b,0x26, - 0xbd,0xb0,0xa7,0xaa,0x89,0x84,0x93,0x9e, - 0xd5,0xd8,0xcf,0xc2,0xe1,0xec,0xfb,0xf6, - 0xd6,0xdb,0xcc,0xc1,0xe2,0xef,0xf8,0xf5, - 0xbe,0xb3,0xa4,0xa9,0x8a,0x87,0x90,0x9d, - 0x06,0x0b,0x1c,0x11,0x32,0x3f,0x28,0x25, - 0x6e,0x63,0x74,0x79,0x5a,0x57,0x40,0x4d, - 0xda,0xd7,0xc0,0xcd,0xee,0xe3,0xf4,0xf9, - 0xb2,0xbf,0xa8,0xa5,0x86,0x8b,0x9c,0x91, - 0x0a,0x07,0x10,0x1d,0x3e,0x33,0x24,0x29, - 0x62,0x6f,0x78,0x75,0x56,0x5b,0x4c,0x41, - 0x61,0x6c,0x7b,0x76,0x55,0x58,0x4f,0x42, - 0x09,0x04,0x13,0x1e,0x3d,0x30,0x27,0x2a, - 0xb1,0xbc,0xab,0xa6,0x85,0x88,0x9f,0x92, - 0xd9,0xd4,0xc3,0xce,0xed,0xe0,0xf7,0xfa, - 0xb7,0xba,0xad,0xa0,0x83,0x8e,0x99,0x94, - 0xdf,0xd2,0xc5,0xc8,0xeb,0xe6,0xf1,0xfc, - 0x67,0x6a,0x7d,0x70,0x53,0x5e,0x49,0x44, - 0x0f,0x02,0x15,0x18,0x3b,0x36,0x21,0x2c, - 0x0c,0x01,0x16,0x1b,0x38,0x35,0x22,0x2f, - 0x64,0x69,0x7e,0x73,0x50,0x5d,0x4a,0x47, - 0xdc,0xd1,0xc6,0xcb,0xe8,0xe5,0xf2,0xff, - 0xb4,0xb9,0xae,0xa3,0x80,0x8d,0x9a,0x97 -}; - -static const uint_8t gfmul_e[256] = { - 0x00,0x0e,0x1c,0x12,0x38,0x36,0x24,0x2a, - 0x70,0x7e,0x6c,0x62,0x48,0x46,0x54,0x5a, - 0xe0,0xee,0xfc,0xf2,0xd8,0xd6,0xc4,0xca, - 0x90,0x9e,0x8c,0x82,0xa8,0xa6,0xb4,0xba, - 0xdb,0xd5,0xc7,0xc9,0xe3,0xed,0xff,0xf1, - 0xab,0xa5,0xb7,0xb9,0x93,0x9d,0x8f,0x81, - 0x3b,0x35,0x27,0x29,0x03,0x0d,0x1f,0x11, - 0x4b,0x45,0x57,0x59,0x73,0x7d,0x6f,0x61, - 0xad,0xa3,0xb1,0xbf,0x95,0x9b,0x89,0x87, - 0xdd,0xd3,0xc1,0xcf,0xe5,0xeb,0xf9,0xf7, - 0x4d,0x43,0x51,0x5f,0x75,0x7b,0x69,0x67, - 0x3d,0x33,0x21,0x2f,0x05,0x0b,0x19,0x17, - 0x76,0x78,0x6a,0x64,0x4e,0x40,0x52,0x5c, - 0x06,0x08,0x1a,0x14,0x3e,0x30,0x22,0x2c, - 0x96,0x98,0x8a,0x84,0xae,0xa0,0xb2,0xbc, - 0xe6,0xe8,0xfa,0xf4,0xde,0xd0,0xc2,0xcc, - 0x41,0x4f,0x5d,0x53,0x79,0x77,0x65,0x6b, - 0x31,0x3f,0x2d,0x23,0x09,0x07,0x15,0x1b, - 0xa1,0xaf,0xbd,0xb3,0x99,0x97,0x85,0x8b, - 0xd1,0xdf,0xcd,0xc3,0xe9,0xe7,0xf5,0xfb, - 0x9a,0x94,0x86,0x88,0xa2,0xac,0xbe,0xb0, - 0xea,0xe4,0xf6,0xf8,0xd2,0xdc,0xce,0xc0, - 0x7a,0x74,0x66,0x68,0x42,0x4c,0x5e,0x50, - 0x0a,0x04,0x16,0x18,0x32,0x3c,0x2e,0x20, - 0xec,0xe2,0xf0,0xfe,0xd4,0xda,0xc8,0xc6, - 0x9c,0x92,0x80,0x8e,0xa4,0xaa,0xb8,0xb6, - 0x0c,0x02,0x10,0x1e,0x34,0x3a,0x28,0x26, - 0x7c,0x72,0x60,0x6e,0x44,0x4a,0x58,0x56, - 0x37,0x39,0x2b,0x25,0x0f,0x01,0x13,0x1d, - 0x47,0x49,0x5b,0x55,0x7f,0x71,0x63,0x6d, - 0xd7,0xd9,0xcb,0xc5,0xef,0xe1,0xf3,0xfd, - 0xa7,0xa9,0xbb,0xb5,0x9f,0x91,0x83,0x8d -}; - -#if defined( HAVE_UINT_32T ) - typedef unsigned long uint_32t; -#endif - -#if defined( HAVE_MEMCPY ) -# define block_copy(d, s, l) memcpy(d, s, l) -# define block16_copy(d, s) memcpy(d, s, N_BLOCK) -#else -# define block_copy(d, s, l) copy_block(d, s, l) -# define block16_copy(d, s) copy_block16(d, s) -#endif - -/* block size 'nn' must be a multiple of four */ - -static void copy_block16( void *d, const void *s ) -{ -#if defined( HAVE_UINT_32T ) - ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0]; - ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1]; - ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2]; - ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3]; -#else - ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0]; - ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1]; - ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2]; - ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3]; - ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4]; - ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5]; - ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6]; - ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7]; - ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8]; - ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9]; - ((uint_8t*)d)[10] = ((uint_8t*)s)[10]; - ((uint_8t*)d)[11] = ((uint_8t*)s)[11]; - ((uint_8t*)d)[12] = ((uint_8t*)s)[12]; - ((uint_8t*)d)[13] = ((uint_8t*)s)[13]; - ((uint_8t*)d)[14] = ((uint_8t*)s)[14]; - ((uint_8t*)d)[15] = ((uint_8t*)s)[15]; -#endif -} - -static void copy_block( void * d, void *s, uint_8t nn ) -{ - while( nn-- ) - *((uint_8t*)d)++ = *((uint_8t*)s)++; -} - -static void xor_block( void *d, const void *s ) -{ -#if defined( HAVE_UINT_32T ) - ((uint_32t*)d)[ 0] ^= ((uint_32t*)s)[ 0]; - ((uint_32t*)d)[ 1] ^= ((uint_32t*)s)[ 1]; - ((uint_32t*)d)[ 2] ^= ((uint_32t*)s)[ 2]; - ((uint_32t*)d)[ 3] ^= ((uint_32t*)s)[ 3]; -#else - ((uint_8t*)d)[ 0] ^= ((uint_8t*)s)[ 0]; - ((uint_8t*)d)[ 1] ^= ((uint_8t*)s)[ 1]; - ((uint_8t*)d)[ 2] ^= ((uint_8t*)s)[ 2]; - ((uint_8t*)d)[ 3] ^= ((uint_8t*)s)[ 3]; - ((uint_8t*)d)[ 4] ^= ((uint_8t*)s)[ 4]; - ((uint_8t*)d)[ 5] ^= ((uint_8t*)s)[ 5]; - ((uint_8t*)d)[ 6] ^= ((uint_8t*)s)[ 6]; - ((uint_8t*)d)[ 7] ^= ((uint_8t*)s)[ 7]; - ((uint_8t*)d)[ 8] ^= ((uint_8t*)s)[ 8]; - ((uint_8t*)d)[ 9] ^= ((uint_8t*)s)[ 9]; - ((uint_8t*)d)[10] ^= ((uint_8t*)s)[10]; - ((uint_8t*)d)[11] ^= ((uint_8t*)s)[11]; - ((uint_8t*)d)[12] ^= ((uint_8t*)s)[12]; - ((uint_8t*)d)[13] ^= ((uint_8t*)s)[13]; - ((uint_8t*)d)[14] ^= ((uint_8t*)s)[14]; - ((uint_8t*)d)[15] ^= ((uint_8t*)s)[15]; -#endif -} - -static void copy_and_key( void *d, const void *s, const void *k ) -{ -#if defined( HAVE_UINT_32T ) - ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0] ^ ((uint_32t*)k)[ 0]; - ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1] ^ ((uint_32t*)k)[ 1]; - ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2] ^ ((uint_32t*)k)[ 2]; - ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3] ^ ((uint_32t*)k)[ 3]; -#elif 1 - ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0] ^ ((uint_8t*)k)[ 0]; - ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1] ^ ((uint_8t*)k)[ 1]; - ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2] ^ ((uint_8t*)k)[ 2]; - ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3] ^ ((uint_8t*)k)[ 3]; - ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4] ^ ((uint_8t*)k)[ 4]; - ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5] ^ ((uint_8t*)k)[ 5]; - ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6] ^ ((uint_8t*)k)[ 6]; - ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7] ^ ((uint_8t*)k)[ 7]; - ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8] ^ ((uint_8t*)k)[ 8]; - ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9] ^ ((uint_8t*)k)[ 9]; - ((uint_8t*)d)[10] = ((uint_8t*)s)[10] ^ ((uint_8t*)k)[10]; - ((uint_8t*)d)[11] = ((uint_8t*)s)[11] ^ ((uint_8t*)k)[11]; - ((uint_8t*)d)[12] = ((uint_8t*)s)[12] ^ ((uint_8t*)k)[12]; - ((uint_8t*)d)[13] = ((uint_8t*)s)[13] ^ ((uint_8t*)k)[13]; - ((uint_8t*)d)[14] = ((uint_8t*)s)[14] ^ ((uint_8t*)k)[14]; - ((uint_8t*)d)[15] = ((uint_8t*)s)[15] ^ ((uint_8t*)k)[15]; -#else - block16_copy(d, s); - xor_block(d, k); -#endif -} - -static void add_round_key( uint_8t d[N_BLOCK], const uint_8t k[N_BLOCK] ) -{ - xor_block(d, k); -} - -static void shift_sub_rows( uint_8t st[N_BLOCK] ) -{ uint_8t tt; - - st[ 0] = s_box[st[ 0]]; st[ 4] = s_box[st[ 4]]; - st[ 8] = s_box[st[ 8]]; st[12] = s_box[st[12]]; - - tt = st[1]; st[ 1] = s_box[st[ 5]]; st[ 5] = s_box[st[ 9]]; - st[ 9] = s_box[st[13]]; st[13] = s_box[ tt ]; - - tt = st[2]; st[ 2] = s_box[st[10]]; st[10] = s_box[ tt ]; - tt = st[6]; st[ 6] = s_box[st[14]]; st[14] = s_box[ tt ]; - - tt = st[15]; st[15] = s_box[st[11]]; st[11] = s_box[st[ 7]]; - st[ 7] = s_box[st[ 3]]; st[ 3] = s_box[ tt ]; -} - -static void inv_shift_sub_rows( uint_8t st[N_BLOCK] ) -{ uint_8t tt; - - st[ 0] = inv_s_box[st[ 0]]; st[ 4] = inv_s_box[st[ 4]]; - st[ 8] = inv_s_box[st[ 8]]; st[12] = inv_s_box[st[12]]; - - tt = st[13]; st[13] = inv_s_box[st[9]]; st[ 9] = inv_s_box[st[5]]; - st[ 5] = inv_s_box[st[1]]; st[ 1] = inv_s_box[ tt ]; - - tt = st[2]; st[ 2] = inv_s_box[st[10]]; st[10] = inv_s_box[ tt ]; - tt = st[6]; st[ 6] = inv_s_box[st[14]]; st[14] = inv_s_box[ tt ]; - - tt = st[3]; st[ 3] = inv_s_box[st[ 7]]; st[ 7] = inv_s_box[st[11]]; - st[11] = inv_s_box[st[15]]; st[15] = inv_s_box[ tt ]; -} - -#if defined( VERSION_1 ) - static void mix_sub_columns( uint_8t dt[N_BLOCK] ) - { uint_8t st[N_BLOCK]; - block16_copy(st, dt); -#else - static void mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] ) - { -#endif - dt[ 0] = gfm2_s_box[st[0]] ^ gfm3_s_box[st[5]] ^ s_box[st[10]] ^ s_box[st[15]]; - dt[ 1] = s_box[st[0]] ^ gfm2_s_box[st[5]] ^ gfm3_s_box[st[10]] ^ s_box[st[15]]; - dt[ 2] = s_box[st[0]] ^ s_box[st[5]] ^ gfm2_s_box[st[10]] ^ gfm3_s_box[st[15]]; - dt[ 3] = gfm3_s_box[st[0]] ^ s_box[st[5]] ^ s_box[st[10]] ^ gfm2_s_box[st[15]]; - - dt[ 4] = gfm2_s_box[st[4]] ^ gfm3_s_box[st[9]] ^ s_box[st[14]] ^ s_box[st[3]]; - dt[ 5] = s_box[st[4]] ^ gfm2_s_box[st[9]] ^ gfm3_s_box[st[14]] ^ s_box[st[3]]; - dt[ 6] = s_box[st[4]] ^ s_box[st[9]] ^ gfm2_s_box[st[14]] ^ gfm3_s_box[st[3]]; - dt[ 7] = gfm3_s_box[st[4]] ^ s_box[st[9]] ^ s_box[st[14]] ^ gfm2_s_box[st[3]]; - - dt[ 8] = gfm2_s_box[st[8]] ^ gfm3_s_box[st[13]] ^ s_box[st[2]] ^ s_box[st[7]]; - dt[ 9] = s_box[st[8]] ^ gfm2_s_box[st[13]] ^ gfm3_s_box[st[2]] ^ s_box[st[7]]; - dt[10] = s_box[st[8]] ^ s_box[st[13]] ^ gfm2_s_box[st[2]] ^ gfm3_s_box[st[7]]; - dt[11] = gfm3_s_box[st[8]] ^ s_box[st[13]] ^ s_box[st[2]] ^ gfm2_s_box[st[7]]; - - dt[12] = gfm2_s_box[st[12]] ^ gfm3_s_box[st[1]] ^ s_box[st[6]] ^ s_box[st[11]]; - dt[13] = s_box[st[12]] ^ gfm2_s_box[st[1]] ^ gfm3_s_box[st[6]] ^ s_box[st[11]]; - dt[14] = s_box[st[12]] ^ s_box[st[1]] ^ gfm2_s_box[st[6]] ^ gfm3_s_box[st[11]]; - dt[15] = gfm3_s_box[st[12]] ^ s_box[st[1]] ^ s_box[st[6]] ^ gfm2_s_box[st[11]]; - } - -#if defined( VERSION_1 ) - static void inv_mix_sub_columns( uint_8t dt[N_BLOCK] ) - { uint_8t st[N_BLOCK]; - block16_copy(st, dt); -#else - static void inv_mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] ) - { -#endif - dt[ 0] = inv_s_box[gfmul_e[st[ 0]] ^ gfmul_b[st[ 1]] ^ gfmul_d[st[ 2]] ^ gfmul_9[st[ 3]]]; - dt[ 5] = inv_s_box[gfmul_9[st[ 0]] ^ gfmul_e[st[ 1]] ^ gfmul_b[st[ 2]] ^ gfmul_d[st[ 3]]]; - dt[10] = inv_s_box[gfmul_d[st[ 0]] ^ gfmul_9[st[ 1]] ^ gfmul_e[st[ 2]] ^ gfmul_b[st[ 3]]]; - dt[15] = inv_s_box[gfmul_b[st[ 0]] ^ gfmul_d[st[ 1]] ^ gfmul_9[st[ 2]] ^ gfmul_e[st[ 3]]]; - - dt[ 4] = inv_s_box[gfmul_e[st[ 4]] ^ gfmul_b[st[ 5]] ^ gfmul_d[st[ 6]] ^ gfmul_9[st[ 7]]]; - dt[ 9] = inv_s_box[gfmul_9[st[ 4]] ^ gfmul_e[st[ 5]] ^ gfmul_b[st[ 6]] ^ gfmul_d[st[ 7]]]; - dt[14] = inv_s_box[gfmul_d[st[ 4]] ^ gfmul_9[st[ 5]] ^ gfmul_e[st[ 6]] ^ gfmul_b[st[ 7]]]; - dt[ 3] = inv_s_box[gfmul_b[st[ 4]] ^ gfmul_d[st[ 5]] ^ gfmul_9[st[ 6]] ^ gfmul_e[st[ 7]]]; - - dt[ 8] = inv_s_box[gfmul_e[st[ 8]] ^ gfmul_b[st[ 9]] ^ gfmul_d[st[10]] ^ gfmul_9[st[11]]]; - dt[13] = inv_s_box[gfmul_9[st[ 8]] ^ gfmul_e[st[ 9]] ^ gfmul_b[st[10]] ^ gfmul_d[st[11]]]; - dt[ 2] = inv_s_box[gfmul_d[st[ 8]] ^ gfmul_9[st[ 9]] ^ gfmul_e[st[10]] ^ gfmul_b[st[11]]]; - dt[ 7] = inv_s_box[gfmul_b[st[ 8]] ^ gfmul_d[st[ 9]] ^ gfmul_9[st[10]] ^ gfmul_e[st[11]]]; - - dt[12] = inv_s_box[gfmul_e[st[12]] ^ gfmul_b[st[13]] ^ gfmul_d[st[14]] ^ gfmul_9[st[15]]]; - dt[ 1] = inv_s_box[gfmul_9[st[12]] ^ gfmul_e[st[13]] ^ gfmul_b[st[14]] ^ gfmul_d[st[15]]]; - dt[ 6] = inv_s_box[gfmul_d[st[12]] ^ gfmul_9[st[13]] ^ gfmul_e[st[14]] ^ gfmul_b[st[15]]]; - dt[11] = inv_s_box[gfmul_b[st[12]] ^ gfmul_d[st[13]] ^ gfmul_9[st[14]] ^ gfmul_e[st[15]]]; - } - -#if defined( AES_ENC_PREKEYED ) || defined( AES_DEC_PREKEYED ) - -/* Set the cipher key for the pre-keyed version */ - -return_type aes_set_key( const unsigned char key[], length_type keylen, aes_context ctx[1] ) -{ - uint_8t cc, rc, hi; - - switch( keylen ) - { - case 16: - case 128: - keylen = 16; - break; - case 24: - case 192: - keylen = 24; - break; - case 32: - case 256: - keylen = 32; - break; - default: - ctx->rnd = 0; - return (return_type) -1; - } - block_copy(ctx->ksch, key, keylen); - hi = (keylen + 28) << 2; - ctx->rnd = (hi >> 4) - 1; - for( cc = keylen, rc = 1; cc < hi; cc += 4 ) - { uint_8t tt, t0, t1, t2, t3; - - t0 = ctx->ksch[cc - 4]; - t1 = ctx->ksch[cc - 3]; - t2 = ctx->ksch[cc - 2]; - t3 = ctx->ksch[cc - 1]; - if( cc % keylen == 0 ) - { - tt = t0; - t0 = s_box[t1] ^ rc; - t1 = s_box[t2]; - t2 = s_box[t3]; - t3 = s_box[tt]; - rc = f2(rc); - } - else if( keylen > 24 && cc % keylen == 16 ) - { - t0 = s_box[t0]; - t1 = s_box[t1]; - t2 = s_box[t2]; - t3 = s_box[t3]; - } - tt = cc - keylen; - ctx->ksch[cc + 0] = ctx->ksch[tt + 0] ^ t0; - ctx->ksch[cc + 1] = ctx->ksch[tt + 1] ^ t1; - ctx->ksch[cc + 2] = ctx->ksch[tt + 2] ^ t2; - ctx->ksch[cc + 3] = ctx->ksch[tt + 3] ^ t3; - } - return 0; -} - -#endif - -#if defined( AES_ENC_PREKEYED ) - -/* Encrypt a single block of 16 bytes */ - -return_type aes_encrypt( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], const aes_context ctx[1] ) -{ - if( ctx->rnd ) - { - uint_8t s1[N_BLOCK], r; - copy_and_key( s1, in, ctx->ksch ); - - for( r = 1 ; r < ctx->rnd ; ++r ) -#if defined( VERSION_1 ) - { - mix_sub_columns( s1 ); - add_round_key( s1, ctx->ksch + r * N_BLOCK); - } -#else - { uint_8t s2[N_BLOCK]; - mix_sub_columns( s2, s1 ); - copy_and_key( s1, s2, ctx->ksch + r * N_BLOCK); - } -#endif - shift_sub_rows( s1 ); - copy_and_key( out, s1, ctx->ksch + r * N_BLOCK ); - } - else - return (return_type) -1; - return 0; -} - -#endif - -#if defined( AES_DEC_PREKEYED ) - -/* Decrypt a single block of 16 bytes */ - -return_type aes_decrypt( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], const aes_context ctx[1] ) -{ - if( ctx->rnd ) - { - uint_8t s1[N_BLOCK], r; - copy_and_key( s1, in, ctx->ksch + ctx->rnd * N_BLOCK ); - inv_shift_sub_rows( s1 ); - - for( r = ctx->rnd ; --r ; ) -#if defined( VERSION_1 ) - { - add_round_key( s1, ctx->ksch + r * N_BLOCK ); - inv_mix_sub_columns( s1 ); - } -#else - { uint_8t s2[N_BLOCK]; - copy_and_key( s2, s1, ctx->ksch + r * N_BLOCK ); - inv_mix_sub_columns( s1, s2 ); - } -#endif - copy_and_key( out, s1, ctx->ksch ); - } - else - return (return_type) -1; - return 0; -} - -#endif - -#if defined( AES_ENC_128_OTFK ) - -/* The 'on the fly' encryption key update for for 128 bit keys */ - -static void update_encrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc ) -{ uint_8t cc; - - k[0] ^= s_box[k[13]] ^ *rc; - k[1] ^= s_box[k[14]]; - k[2] ^= s_box[k[15]]; - k[3] ^= s_box[k[12]]; - *rc = f2( *rc ); - - for(cc = 4; cc < 16; cc += 4 ) - { - k[cc + 0] ^= k[cc - 4]; - k[cc + 1] ^= k[cc - 3]; - k[cc + 2] ^= k[cc - 2]; - k[cc + 3] ^= k[cc - 1]; - } -} - -/* Encrypt a single block of 16 bytes with 'on the fly' 128 bit keying */ - -void aes_encrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], - const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] ) -{ uint_8t s1[N_BLOCK], r, rc = 1; - - if(o_key != key) - block16_copy( o_key, key ); - copy_and_key( s1, in, o_key ); - - for( r = 1 ; r < 10 ; ++r ) -#if defined( VERSION_1 ) - { - mix_sub_columns( s1 ); - update_encrypt_key_128( o_key, &rc ); - add_round_key( s1, o_key ); - } -#else - { uint_8t s2[N_BLOCK]; - mix_sub_columns( s2, s1 ); - update_encrypt_key_128( o_key, &rc ); - copy_and_key( s1, s2, o_key ); - } -#endif - - shift_sub_rows( s1 ); - update_encrypt_key_128( o_key, &rc ); - copy_and_key( out, s1, o_key ); -} - -#endif - -#if defined( AES_DEC_128_OTFK ) - -/* The 'on the fly' decryption key update for for 128 bit keys */ - -static void update_decrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc ) -{ uint_8t cc; - - for( cc = 12; cc > 0; cc -= 4 ) - { - k[cc + 0] ^= k[cc - 4]; - k[cc + 1] ^= k[cc - 3]; - k[cc + 2] ^= k[cc - 2]; - k[cc + 3] ^= k[cc - 1]; - } - *rc = d2(*rc); - k[0] ^= s_box[k[13]] ^ *rc; - k[1] ^= s_box[k[14]]; - k[2] ^= s_box[k[15]]; - k[3] ^= s_box[k[12]]; -} - -/* Decrypt a single block of 16 bytes with 'on the fly' 128 bit keying */ - -void aes_decrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], - const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] ) -{ - uint_8t s1[N_BLOCK], r, rc = 0x6c; - if(o_key != key) - block16_copy( o_key, key ); - - copy_and_key( s1, in, o_key ); - inv_shift_sub_rows( s1 ); - - for( r = 10 ; --r ; ) -#if defined( VERSION_1 ) - { - update_decrypt_key_128( o_key, &rc ); - add_round_key( s1, o_key ); - inv_mix_sub_columns( s1 ); - } -#else - { uint_8t s2[N_BLOCK]; - update_decrypt_key_128( o_key, &rc ); - copy_and_key( s2, s1, o_key ); - inv_mix_sub_columns( s1, s2 ); - } -#endif - update_decrypt_key_128( o_key, &rc ); - copy_and_key( out, s1, o_key ); -} - -#endif - -#if defined( AES_ENC_256_OTFK ) - -/* The 'on the fly' encryption key update for for 256 bit keys */ - -static void update_encrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc ) -{ uint_8t cc; - - k[0] ^= s_box[k[29]] ^ *rc; - k[1] ^= s_box[k[30]]; - k[2] ^= s_box[k[31]]; - k[3] ^= s_box[k[28]]; - *rc = f2( *rc ); - - for(cc = 4; cc < 16; cc += 4) - { - k[cc + 0] ^= k[cc - 4]; - k[cc + 1] ^= k[cc - 3]; - k[cc + 2] ^= k[cc - 2]; - k[cc + 3] ^= k[cc - 1]; - } - - k[16] ^= s_box[k[12]]; - k[17] ^= s_box[k[13]]; - k[18] ^= s_box[k[14]]; - k[19] ^= s_box[k[15]]; - - for( cc = 20; cc < 32; cc += 4 ) - { - k[cc + 0] ^= k[cc - 4]; - k[cc + 1] ^= k[cc - 3]; - k[cc + 2] ^= k[cc - 2]; - k[cc + 3] ^= k[cc - 1]; - } -} - -/* Encrypt a single block of 16 bytes with 'on the fly' 256 bit keying */ - -void aes_encrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], - const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] ) -{ - uint_8t s1[N_BLOCK], r, rc = 1; - if(o_key != key) - { - block16_copy( o_key, key ); - block16_copy( o_key + 16, key + 16 ); - } - copy_and_key( s1, in, o_key ); - - for( r = 1 ; r < 14 ; ++r ) -#if defined( VERSION_1 ) - { - mix_sub_columns(s1); - if( r & 1 ) - add_round_key( s1, o_key + 16 ); - else - { - update_encrypt_key_256( o_key, &rc ); - add_round_key( s1, o_key ); - } - } -#else - { uint_8t s2[N_BLOCK]; - mix_sub_columns( s2, s1 ); - if( r & 1 ) - copy_and_key( s1, s2, o_key + 16 ); - else - { - update_encrypt_key_256( o_key, &rc ); - copy_and_key( s1, s2, o_key ); - } - } -#endif - - shift_sub_rows( s1 ); - update_encrypt_key_256( o_key, &rc ); - copy_and_key( out, s1, o_key ); -} - -#endif - -#if defined( AES_DEC_256_OTFK ) - -/* The 'on the fly' encryption key update for for 256 bit keys */ - -static void update_decrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc ) -{ uint_8t cc; - - for(cc = 28; cc > 16; cc -= 4) - { - k[cc + 0] ^= k[cc - 4]; - k[cc + 1] ^= k[cc - 3]; - k[cc + 2] ^= k[cc - 2]; - k[cc + 3] ^= k[cc - 1]; - } - - k[16] ^= s_box[k[12]]; - k[17] ^= s_box[k[13]]; - k[18] ^= s_box[k[14]]; - k[19] ^= s_box[k[15]]; - - for(cc = 12; cc > 0; cc -= 4) - { - k[cc + 0] ^= k[cc - 4]; - k[cc + 1] ^= k[cc - 3]; - k[cc + 2] ^= k[cc - 2]; - k[cc + 3] ^= k[cc - 1]; - } - - *rc = d2(*rc); - k[0] ^= s_box[k[29]] ^ *rc; - k[1] ^= s_box[k[30]]; - k[2] ^= s_box[k[31]]; - k[3] ^= s_box[k[28]]; -} - -/* Decrypt a single block of 16 bytes with 'on the fly' - 256 bit keying -*/ -void aes_decrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], - const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] ) -{ - uint_8t s1[N_BLOCK], r, rc = 0x80; - - if(o_key != key) - { - block16_copy( o_key, key ); - block16_copy( o_key + 16, key + 16 ); - } - - copy_and_key( s1, in, o_key ); - inv_shift_sub_rows( s1 ); - - for( r = 14 ; --r ; ) -#if defined( VERSION_1 ) - { - if( ( r & 1 ) ) - { - update_decrypt_key_256( o_key, &rc ); - add_round_key( s1, o_key + 16 ); - } - else - add_round_key( s1, o_key ); - inv_mix_sub_columns( s1 ); - } -#else - { uint_8t s2[N_BLOCK]; - if( ( r & 1 ) ) - { - update_decrypt_key_256( o_key, &rc ); - copy_and_key( s2, s1, o_key + 16 ); - } - else - copy_and_key( s2, s1, o_key ); - inv_mix_sub_columns( s1, s2 ); - } -#endif - copy_and_key( out, s1, o_key ); -} - -#endif +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2006, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 09/09/2006 + + This is an AES implementation that uses only 8-bit byte operations on the + cipher state (there are options to use 32-bit types if available). + + The combination of mix columns and byte substitution used here is based on + that developed by Karl Malbrain. His contribution is acknowledged. + */ + +/* Adapted for TrueCrypt: + - Macro-generated tables were replaced with static data to enable compiling + with MSVC++ 1.5 which runs out of resources when expanding large macros. +*/ + +#pragma optimize ("t", on) + +/* define if you have a fast memcpy function on your system */ +#if 1 +# define HAVE_MEMCPY +# include +# if defined( _MSC_VER ) +# ifndef DEBUG +# pragma intrinsic( memcpy ) +# endif +# endif +#endif + +/* define if you have fast 32-bit types on your system */ +#if 1 +# define HAVE_UINT_32T +#endif + +/* alternative versions (test for performance on your system) */ +#if 0 +# define VERSION_1 +#endif + +#include "AesSmall.h" + +#define WPOLY 0x011b +#define DPOLY 0x008d +#define f1(x) (x) +#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY)) +#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY)) +#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) \ + ^ (((x>>5) & 4) * WPOLY)) +#define d2(x) (((x) >> 1) ^ ((x) & 1 ? DPOLY : 0)) + +#define f3(x) (f2(x) ^ x) +#define f9(x) (f8(x) ^ x) +#define fb(x) (f8(x) ^ f2(x) ^ x) +#define fd(x) (f8(x) ^ f4(x) ^ x) +#define fe(x) (f8(x) ^ f4(x) ^ f2(x)) + +static const uint_8t s_box[256] = { + 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5, + 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76, + 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0, + 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0, + 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc, + 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15, + 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a, + 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75, + 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0, + 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84, + 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b, + 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf, + 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85, + 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8, + 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5, + 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2, + 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17, + 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73, + 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88, + 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb, + 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c, + 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79, + 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9, + 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08, + 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6, + 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a, + 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e, + 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e, + 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94, + 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf, + 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68, + 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +}; + +static const uint_8t inv_s_box[256] = { + 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38, + 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb, + 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87, + 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb, + 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d, + 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e, + 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2, + 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25, + 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16, + 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92, + 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda, + 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84, + 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a, + 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06, + 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02, + 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b, + 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea, + 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73, + 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85, + 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e, + 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89, + 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b, + 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20, + 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4, + 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31, + 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f, + 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d, + 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef, + 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0, + 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61, + 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26, + 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +}; + +static const uint_8t gfm2_s_box[256] = { + 0xc6,0xf8,0xee,0xf6,0xff,0xd6,0xde,0x91, + 0x60,0x02,0xce,0x56,0xe7,0xb5,0x4d,0xec, + 0x8f,0x1f,0x89,0xfa,0xef,0xb2,0x8e,0xfb, + 0x41,0xb3,0x5f,0x45,0x23,0x53,0xe4,0x9b, + 0x75,0xe1,0x3d,0x4c,0x6c,0x7e,0xf5,0x83, + 0x68,0x51,0xd1,0xf9,0xe2,0xab,0x62,0x2a, + 0x08,0x95,0x46,0x9d,0x30,0x37,0x0a,0x2f, + 0x0e,0x24,0x1b,0xdf,0xcd,0x4e,0x7f,0xea, + 0x12,0x1d,0x58,0x34,0x36,0xdc,0xb4,0x5b, + 0xa4,0x76,0xb7,0x7d,0x52,0xdd,0x5e,0x13, + 0xa6,0xb9,0x00,0xc1,0x40,0xe3,0x79,0xb6, + 0xd4,0x8d,0x67,0x72,0x94,0x98,0xb0,0x85, + 0xbb,0xc5,0x4f,0xed,0x86,0x9a,0x66,0x11, + 0x8a,0xe9,0x04,0xfe,0xa0,0x78,0x25,0x4b, + 0xa2,0x5d,0x80,0x05,0x3f,0x21,0x70,0xf1, + 0x63,0x77,0xaf,0x42,0x20,0xe5,0xfd,0xbf, + 0x81,0x18,0x26,0xc3,0xbe,0x35,0x88,0x2e, + 0x93,0x55,0xfc,0x7a,0xc8,0xba,0x32,0xe6, + 0xc0,0x19,0x9e,0xa3,0x44,0x54,0x3b,0x0b, + 0x8c,0xc7,0x6b,0x28,0xa7,0xbc,0x16,0xad, + 0xdb,0x64,0x74,0x14,0x92,0x0c,0x48,0xb8, + 0x9f,0xbd,0x43,0xc4,0x39,0x31,0xd3,0xf2, + 0xd5,0x8b,0x6e,0xda,0x01,0xb1,0x9c,0x49, + 0xd8,0xac,0xf3,0xcf,0xca,0xf4,0x47,0x10, + 0x6f,0xf0,0x4a,0x5c,0x38,0x57,0x73,0x97, + 0xcb,0xa1,0xe8,0x3e,0x96,0x61,0x0d,0x0f, + 0xe0,0x7c,0x71,0xcc,0x90,0x06,0xf7,0x1c, + 0xc2,0x6a,0xae,0x69,0x17,0x99,0x3a,0x27, + 0xd9,0xeb,0x2b,0x22,0xd2,0xa9,0x07,0x33, + 0x2d,0x3c,0x15,0xc9,0x87,0xaa,0x50,0xa5, + 0x03,0x59,0x09,0x1a,0x65,0xd7,0x84,0xd0, + 0x82,0x29,0x5a,0x1e,0x7b,0xa8,0x6d,0x2c +}; + +static const uint_8t gfm3_s_box[256] = { + 0xa5,0x84,0x99,0x8d,0x0d,0xbd,0xb1,0x54, + 0x50,0x03,0xa9,0x7d,0x19,0x62,0xe6,0x9a, + 0x45,0x9d,0x40,0x87,0x15,0xeb,0xc9,0x0b, + 0xec,0x67,0xfd,0xea,0xbf,0xf7,0x96,0x5b, + 0xc2,0x1c,0xae,0x6a,0x5a,0x41,0x02,0x4f, + 0x5c,0xf4,0x34,0x08,0x93,0x73,0x53,0x3f, + 0x0c,0x52,0x65,0x5e,0x28,0xa1,0x0f,0xb5, + 0x09,0x36,0x9b,0x3d,0x26,0x69,0xcd,0x9f, + 0x1b,0x9e,0x74,0x2e,0x2d,0xb2,0xee,0xfb, + 0xf6,0x4d,0x61,0xce,0x7b,0x3e,0x71,0x97, + 0xf5,0x68,0x00,0x2c,0x60,0x1f,0xc8,0xed, + 0xbe,0x46,0xd9,0x4b,0xde,0xd4,0xe8,0x4a, + 0x6b,0x2a,0xe5,0x16,0xc5,0xd7,0x55,0x94, + 0xcf,0x10,0x06,0x81,0xf0,0x44,0xba,0xe3, + 0xf3,0xfe,0xc0,0x8a,0xad,0xbc,0x48,0x04, + 0xdf,0xc1,0x75,0x63,0x30,0x1a,0x0e,0x6d, + 0x4c,0x14,0x35,0x2f,0xe1,0xa2,0xcc,0x39, + 0x57,0xf2,0x82,0x47,0xac,0xe7,0x2b,0x95, + 0xa0,0x98,0xd1,0x7f,0x66,0x7e,0xab,0x83, + 0xca,0x29,0xd3,0x3c,0x79,0xe2,0x1d,0x76, + 0x3b,0x56,0x4e,0x1e,0xdb,0x0a,0x6c,0xe4, + 0x5d,0x6e,0xef,0xa6,0xa8,0xa4,0x37,0x8b, + 0x32,0x43,0x59,0xb7,0x8c,0x64,0xd2,0xe0, + 0xb4,0xfa,0x07,0x25,0xaf,0x8e,0xe9,0x18, + 0xd5,0x88,0x6f,0x72,0x24,0xf1,0xc7,0x51, + 0x23,0x7c,0x9c,0x21,0xdd,0xdc,0x86,0x85, + 0x90,0x42,0xc4,0xaa,0xd8,0x05,0x01,0x12, + 0xa3,0x5f,0xf9,0xd0,0x91,0x58,0x27,0xb9, + 0x38,0x13,0xb3,0x33,0xbb,0x70,0x89,0xa7, + 0xb6,0x22,0x92,0x20,0x49,0xff,0x78,0x7a, + 0x8f,0xf8,0x80,0x17,0xda,0x31,0xc6,0xb8, + 0xc3,0xb0,0x77,0x11,0xcb,0xfc,0xd6,0x3a +}; + +static const uint_8t gfmul_9[256] = { + 0x00,0x09,0x12,0x1b,0x24,0x2d,0x36,0x3f, + 0x48,0x41,0x5a,0x53,0x6c,0x65,0x7e,0x77, + 0x90,0x99,0x82,0x8b,0xb4,0xbd,0xa6,0xaf, + 0xd8,0xd1,0xca,0xc3,0xfc,0xf5,0xee,0xe7, + 0x3b,0x32,0x29,0x20,0x1f,0x16,0x0d,0x04, + 0x73,0x7a,0x61,0x68,0x57,0x5e,0x45,0x4c, + 0xab,0xa2,0xb9,0xb0,0x8f,0x86,0x9d,0x94, + 0xe3,0xea,0xf1,0xf8,0xc7,0xce,0xd5,0xdc, + 0x76,0x7f,0x64,0x6d,0x52,0x5b,0x40,0x49, + 0x3e,0x37,0x2c,0x25,0x1a,0x13,0x08,0x01, + 0xe6,0xef,0xf4,0xfd,0xc2,0xcb,0xd0,0xd9, + 0xae,0xa7,0xbc,0xb5,0x8a,0x83,0x98,0x91, + 0x4d,0x44,0x5f,0x56,0x69,0x60,0x7b,0x72, + 0x05,0x0c,0x17,0x1e,0x21,0x28,0x33,0x3a, + 0xdd,0xd4,0xcf,0xc6,0xf9,0xf0,0xeb,0xe2, + 0x95,0x9c,0x87,0x8e,0xb1,0xb8,0xa3,0xaa, + 0xec,0xe5,0xfe,0xf7,0xc8,0xc1,0xda,0xd3, + 0xa4,0xad,0xb6,0xbf,0x80,0x89,0x92,0x9b, + 0x7c,0x75,0x6e,0x67,0x58,0x51,0x4a,0x43, + 0x34,0x3d,0x26,0x2f,0x10,0x19,0x02,0x0b, + 0xd7,0xde,0xc5,0xcc,0xf3,0xfa,0xe1,0xe8, + 0x9f,0x96,0x8d,0x84,0xbb,0xb2,0xa9,0xa0, + 0x47,0x4e,0x55,0x5c,0x63,0x6a,0x71,0x78, + 0x0f,0x06,0x1d,0x14,0x2b,0x22,0x39,0x30, + 0x9a,0x93,0x88,0x81,0xbe,0xb7,0xac,0xa5, + 0xd2,0xdb,0xc0,0xc9,0xf6,0xff,0xe4,0xed, + 0x0a,0x03,0x18,0x11,0x2e,0x27,0x3c,0x35, + 0x42,0x4b,0x50,0x59,0x66,0x6f,0x74,0x7d, + 0xa1,0xa8,0xb3,0xba,0x85,0x8c,0x97,0x9e, + 0xe9,0xe0,0xfb,0xf2,0xcd,0xc4,0xdf,0xd6, + 0x31,0x38,0x23,0x2a,0x15,0x1c,0x07,0x0e, + 0x79,0x70,0x6b,0x62,0x5d,0x54,0x4f,0x46 +}; + +static const uint_8t gfmul_b[256] = { + 0x00,0x0b,0x16,0x1d,0x2c,0x27,0x3a,0x31, + 0x58,0x53,0x4e,0x45,0x74,0x7f,0x62,0x69, + 0xb0,0xbb,0xa6,0xad,0x9c,0x97,0x8a,0x81, + 0xe8,0xe3,0xfe,0xf5,0xc4,0xcf,0xd2,0xd9, + 0x7b,0x70,0x6d,0x66,0x57,0x5c,0x41,0x4a, + 0x23,0x28,0x35,0x3e,0x0f,0x04,0x19,0x12, + 0xcb,0xc0,0xdd,0xd6,0xe7,0xec,0xf1,0xfa, + 0x93,0x98,0x85,0x8e,0xbf,0xb4,0xa9,0xa2, + 0xf6,0xfd,0xe0,0xeb,0xda,0xd1,0xcc,0xc7, + 0xae,0xa5,0xb8,0xb3,0x82,0x89,0x94,0x9f, + 0x46,0x4d,0x50,0x5b,0x6a,0x61,0x7c,0x77, + 0x1e,0x15,0x08,0x03,0x32,0x39,0x24,0x2f, + 0x8d,0x86,0x9b,0x90,0xa1,0xaa,0xb7,0xbc, + 0xd5,0xde,0xc3,0xc8,0xf9,0xf2,0xef,0xe4, + 0x3d,0x36,0x2b,0x20,0x11,0x1a,0x07,0x0c, + 0x65,0x6e,0x73,0x78,0x49,0x42,0x5f,0x54, + 0xf7,0xfc,0xe1,0xea,0xdb,0xd0,0xcd,0xc6, + 0xaf,0xa4,0xb9,0xb2,0x83,0x88,0x95,0x9e, + 0x47,0x4c,0x51,0x5a,0x6b,0x60,0x7d,0x76, + 0x1f,0x14,0x09,0x02,0x33,0x38,0x25,0x2e, + 0x8c,0x87,0x9a,0x91,0xa0,0xab,0xb6,0xbd, + 0xd4,0xdf,0xc2,0xc9,0xf8,0xf3,0xee,0xe5, + 0x3c,0x37,0x2a,0x21,0x10,0x1b,0x06,0x0d, + 0x64,0x6f,0x72,0x79,0x48,0x43,0x5e,0x55, + 0x01,0x0a,0x17,0x1c,0x2d,0x26,0x3b,0x30, + 0x59,0x52,0x4f,0x44,0x75,0x7e,0x63,0x68, + 0xb1,0xba,0xa7,0xac,0x9d,0x96,0x8b,0x80, + 0xe9,0xe2,0xff,0xf4,0xc5,0xce,0xd3,0xd8, + 0x7a,0x71,0x6c,0x67,0x56,0x5d,0x40,0x4b, + 0x22,0x29,0x34,0x3f,0x0e,0x05,0x18,0x13, + 0xca,0xc1,0xdc,0xd7,0xe6,0xed,0xf0,0xfb, + 0x92,0x99,0x84,0x8f,0xbe,0xb5,0xa8,0xa3 +}; + +static const uint_8t gfmul_d[256] = { + 0x00,0x0d,0x1a,0x17,0x34,0x39,0x2e,0x23, + 0x68,0x65,0x72,0x7f,0x5c,0x51,0x46,0x4b, + 0xd0,0xdd,0xca,0xc7,0xe4,0xe9,0xfe,0xf3, + 0xb8,0xb5,0xa2,0xaf,0x8c,0x81,0x96,0x9b, + 0xbb,0xb6,0xa1,0xac,0x8f,0x82,0x95,0x98, + 0xd3,0xde,0xc9,0xc4,0xe7,0xea,0xfd,0xf0, + 0x6b,0x66,0x71,0x7c,0x5f,0x52,0x45,0x48, + 0x03,0x0e,0x19,0x14,0x37,0x3a,0x2d,0x20, + 0x6d,0x60,0x77,0x7a,0x59,0x54,0x43,0x4e, + 0x05,0x08,0x1f,0x12,0x31,0x3c,0x2b,0x26, + 0xbd,0xb0,0xa7,0xaa,0x89,0x84,0x93,0x9e, + 0xd5,0xd8,0xcf,0xc2,0xe1,0xec,0xfb,0xf6, + 0xd6,0xdb,0xcc,0xc1,0xe2,0xef,0xf8,0xf5, + 0xbe,0xb3,0xa4,0xa9,0x8a,0x87,0x90,0x9d, + 0x06,0x0b,0x1c,0x11,0x32,0x3f,0x28,0x25, + 0x6e,0x63,0x74,0x79,0x5a,0x57,0x40,0x4d, + 0xda,0xd7,0xc0,0xcd,0xee,0xe3,0xf4,0xf9, + 0xb2,0xbf,0xa8,0xa5,0x86,0x8b,0x9c,0x91, + 0x0a,0x07,0x10,0x1d,0x3e,0x33,0x24,0x29, + 0x62,0x6f,0x78,0x75,0x56,0x5b,0x4c,0x41, + 0x61,0x6c,0x7b,0x76,0x55,0x58,0x4f,0x42, + 0x09,0x04,0x13,0x1e,0x3d,0x30,0x27,0x2a, + 0xb1,0xbc,0xab,0xa6,0x85,0x88,0x9f,0x92, + 0xd9,0xd4,0xc3,0xce,0xed,0xe0,0xf7,0xfa, + 0xb7,0xba,0xad,0xa0,0x83,0x8e,0x99,0x94, + 0xdf,0xd2,0xc5,0xc8,0xeb,0xe6,0xf1,0xfc, + 0x67,0x6a,0x7d,0x70,0x53,0x5e,0x49,0x44, + 0x0f,0x02,0x15,0x18,0x3b,0x36,0x21,0x2c, + 0x0c,0x01,0x16,0x1b,0x38,0x35,0x22,0x2f, + 0x64,0x69,0x7e,0x73,0x50,0x5d,0x4a,0x47, + 0xdc,0xd1,0xc6,0xcb,0xe8,0xe5,0xf2,0xff, + 0xb4,0xb9,0xae,0xa3,0x80,0x8d,0x9a,0x97 +}; + +static const uint_8t gfmul_e[256] = { + 0x00,0x0e,0x1c,0x12,0x38,0x36,0x24,0x2a, + 0x70,0x7e,0x6c,0x62,0x48,0x46,0x54,0x5a, + 0xe0,0xee,0xfc,0xf2,0xd8,0xd6,0xc4,0xca, + 0x90,0x9e,0x8c,0x82,0xa8,0xa6,0xb4,0xba, + 0xdb,0xd5,0xc7,0xc9,0xe3,0xed,0xff,0xf1, + 0xab,0xa5,0xb7,0xb9,0x93,0x9d,0x8f,0x81, + 0x3b,0x35,0x27,0x29,0x03,0x0d,0x1f,0x11, + 0x4b,0x45,0x57,0x59,0x73,0x7d,0x6f,0x61, + 0xad,0xa3,0xb1,0xbf,0x95,0x9b,0x89,0x87, + 0xdd,0xd3,0xc1,0xcf,0xe5,0xeb,0xf9,0xf7, + 0x4d,0x43,0x51,0x5f,0x75,0x7b,0x69,0x67, + 0x3d,0x33,0x21,0x2f,0x05,0x0b,0x19,0x17, + 0x76,0x78,0x6a,0x64,0x4e,0x40,0x52,0x5c, + 0x06,0x08,0x1a,0x14,0x3e,0x30,0x22,0x2c, + 0x96,0x98,0x8a,0x84,0xae,0xa0,0xb2,0xbc, + 0xe6,0xe8,0xfa,0xf4,0xde,0xd0,0xc2,0xcc, + 0x41,0x4f,0x5d,0x53,0x79,0x77,0x65,0x6b, + 0x31,0x3f,0x2d,0x23,0x09,0x07,0x15,0x1b, + 0xa1,0xaf,0xbd,0xb3,0x99,0x97,0x85,0x8b, + 0xd1,0xdf,0xcd,0xc3,0xe9,0xe7,0xf5,0xfb, + 0x9a,0x94,0x86,0x88,0xa2,0xac,0xbe,0xb0, + 0xea,0xe4,0xf6,0xf8,0xd2,0xdc,0xce,0xc0, + 0x7a,0x74,0x66,0x68,0x42,0x4c,0x5e,0x50, + 0x0a,0x04,0x16,0x18,0x32,0x3c,0x2e,0x20, + 0xec,0xe2,0xf0,0xfe,0xd4,0xda,0xc8,0xc6, + 0x9c,0x92,0x80,0x8e,0xa4,0xaa,0xb8,0xb6, + 0x0c,0x02,0x10,0x1e,0x34,0x3a,0x28,0x26, + 0x7c,0x72,0x60,0x6e,0x44,0x4a,0x58,0x56, + 0x37,0x39,0x2b,0x25,0x0f,0x01,0x13,0x1d, + 0x47,0x49,0x5b,0x55,0x7f,0x71,0x63,0x6d, + 0xd7,0xd9,0xcb,0xc5,0xef,0xe1,0xf3,0xfd, + 0xa7,0xa9,0xbb,0xb5,0x9f,0x91,0x83,0x8d +}; + +#if defined( HAVE_UINT_32T ) + typedef unsigned long uint_32t; +#endif + +#if defined( HAVE_MEMCPY ) +# define block_copy(d, s, l) memcpy(d, s, l) +# define block16_copy(d, s) memcpy(d, s, N_BLOCK) +#else +# define block_copy(d, s, l) copy_block(d, s, l) +# define block16_copy(d, s) copy_block16(d, s) +#endif + +/* block size 'nn' must be a multiple of four */ + +static void copy_block16( void *d, const void *s ) +{ +#if defined( HAVE_UINT_32T ) + ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0]; + ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1]; + ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2]; + ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3]; +#else + ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0]; + ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1]; + ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2]; + ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3]; + ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4]; + ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5]; + ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6]; + ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7]; + ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8]; + ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9]; + ((uint_8t*)d)[10] = ((uint_8t*)s)[10]; + ((uint_8t*)d)[11] = ((uint_8t*)s)[11]; + ((uint_8t*)d)[12] = ((uint_8t*)s)[12]; + ((uint_8t*)d)[13] = ((uint_8t*)s)[13]; + ((uint_8t*)d)[14] = ((uint_8t*)s)[14]; + ((uint_8t*)d)[15] = ((uint_8t*)s)[15]; +#endif +} + +static void copy_block( void * d, void *s, uint_8t nn ) +{ + while( nn-- ) + *((uint_8t*)d)++ = *((uint_8t*)s)++; +} + +static void xor_block( void *d, const void *s ) +{ +#if defined( HAVE_UINT_32T ) + ((uint_32t*)d)[ 0] ^= ((uint_32t*)s)[ 0]; + ((uint_32t*)d)[ 1] ^= ((uint_32t*)s)[ 1]; + ((uint_32t*)d)[ 2] ^= ((uint_32t*)s)[ 2]; + ((uint_32t*)d)[ 3] ^= ((uint_32t*)s)[ 3]; +#else + ((uint_8t*)d)[ 0] ^= ((uint_8t*)s)[ 0]; + ((uint_8t*)d)[ 1] ^= ((uint_8t*)s)[ 1]; + ((uint_8t*)d)[ 2] ^= ((uint_8t*)s)[ 2]; + ((uint_8t*)d)[ 3] ^= ((uint_8t*)s)[ 3]; + ((uint_8t*)d)[ 4] ^= ((uint_8t*)s)[ 4]; + ((uint_8t*)d)[ 5] ^= ((uint_8t*)s)[ 5]; + ((uint_8t*)d)[ 6] ^= ((uint_8t*)s)[ 6]; + ((uint_8t*)d)[ 7] ^= ((uint_8t*)s)[ 7]; + ((uint_8t*)d)[ 8] ^= ((uint_8t*)s)[ 8]; + ((uint_8t*)d)[ 9] ^= ((uint_8t*)s)[ 9]; + ((uint_8t*)d)[10] ^= ((uint_8t*)s)[10]; + ((uint_8t*)d)[11] ^= ((uint_8t*)s)[11]; + ((uint_8t*)d)[12] ^= ((uint_8t*)s)[12]; + ((uint_8t*)d)[13] ^= ((uint_8t*)s)[13]; + ((uint_8t*)d)[14] ^= ((uint_8t*)s)[14]; + ((uint_8t*)d)[15] ^= ((uint_8t*)s)[15]; +#endif +} + +static void copy_and_key( void *d, const void *s, const void *k ) +{ +#if defined( HAVE_UINT_32T ) + ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0] ^ ((uint_32t*)k)[ 0]; + ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1] ^ ((uint_32t*)k)[ 1]; + ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2] ^ ((uint_32t*)k)[ 2]; + ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3] ^ ((uint_32t*)k)[ 3]; +#elif 1 + ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0] ^ ((uint_8t*)k)[ 0]; + ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1] ^ ((uint_8t*)k)[ 1]; + ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2] ^ ((uint_8t*)k)[ 2]; + ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3] ^ ((uint_8t*)k)[ 3]; + ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4] ^ ((uint_8t*)k)[ 4]; + ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5] ^ ((uint_8t*)k)[ 5]; + ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6] ^ ((uint_8t*)k)[ 6]; + ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7] ^ ((uint_8t*)k)[ 7]; + ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8] ^ ((uint_8t*)k)[ 8]; + ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9] ^ ((uint_8t*)k)[ 9]; + ((uint_8t*)d)[10] = ((uint_8t*)s)[10] ^ ((uint_8t*)k)[10]; + ((uint_8t*)d)[11] = ((uint_8t*)s)[11] ^ ((uint_8t*)k)[11]; + ((uint_8t*)d)[12] = ((uint_8t*)s)[12] ^ ((uint_8t*)k)[12]; + ((uint_8t*)d)[13] = ((uint_8t*)s)[13] ^ ((uint_8t*)k)[13]; + ((uint_8t*)d)[14] = ((uint_8t*)s)[14] ^ ((uint_8t*)k)[14]; + ((uint_8t*)d)[15] = ((uint_8t*)s)[15] ^ ((uint_8t*)k)[15]; +#else + block16_copy(d, s); + xor_block(d, k); +#endif +} + +static void add_round_key( uint_8t d[N_BLOCK], const uint_8t k[N_BLOCK] ) +{ + xor_block(d, k); +} + +static void shift_sub_rows( uint_8t st[N_BLOCK] ) +{ uint_8t tt; + + st[ 0] = s_box[st[ 0]]; st[ 4] = s_box[st[ 4]]; + st[ 8] = s_box[st[ 8]]; st[12] = s_box[st[12]]; + + tt = st[1]; st[ 1] = s_box[st[ 5]]; st[ 5] = s_box[st[ 9]]; + st[ 9] = s_box[st[13]]; st[13] = s_box[ tt ]; + + tt = st[2]; st[ 2] = s_box[st[10]]; st[10] = s_box[ tt ]; + tt = st[6]; st[ 6] = s_box[st[14]]; st[14] = s_box[ tt ]; + + tt = st[15]; st[15] = s_box[st[11]]; st[11] = s_box[st[ 7]]; + st[ 7] = s_box[st[ 3]]; st[ 3] = s_box[ tt ]; +} + +static void inv_shift_sub_rows( uint_8t st[N_BLOCK] ) +{ uint_8t tt; + + st[ 0] = inv_s_box[st[ 0]]; st[ 4] = inv_s_box[st[ 4]]; + st[ 8] = inv_s_box[st[ 8]]; st[12] = inv_s_box[st[12]]; + + tt = st[13]; st[13] = inv_s_box[st[9]]; st[ 9] = inv_s_box[st[5]]; + st[ 5] = inv_s_box[st[1]]; st[ 1] = inv_s_box[ tt ]; + + tt = st[2]; st[ 2] = inv_s_box[st[10]]; st[10] = inv_s_box[ tt ]; + tt = st[6]; st[ 6] = inv_s_box[st[14]]; st[14] = inv_s_box[ tt ]; + + tt = st[3]; st[ 3] = inv_s_box[st[ 7]]; st[ 7] = inv_s_box[st[11]]; + st[11] = inv_s_box[st[15]]; st[15] = inv_s_box[ tt ]; +} + +#if defined( VERSION_1 ) + static void mix_sub_columns( uint_8t dt[N_BLOCK] ) + { uint_8t st[N_BLOCK]; + block16_copy(st, dt); +#else + static void mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] ) + { +#endif + dt[ 0] = gfm2_s_box[st[0]] ^ gfm3_s_box[st[5]] ^ s_box[st[10]] ^ s_box[st[15]]; + dt[ 1] = s_box[st[0]] ^ gfm2_s_box[st[5]] ^ gfm3_s_box[st[10]] ^ s_box[st[15]]; + dt[ 2] = s_box[st[0]] ^ s_box[st[5]] ^ gfm2_s_box[st[10]] ^ gfm3_s_box[st[15]]; + dt[ 3] = gfm3_s_box[st[0]] ^ s_box[st[5]] ^ s_box[st[10]] ^ gfm2_s_box[st[15]]; + + dt[ 4] = gfm2_s_box[st[4]] ^ gfm3_s_box[st[9]] ^ s_box[st[14]] ^ s_box[st[3]]; + dt[ 5] = s_box[st[4]] ^ gfm2_s_box[st[9]] ^ gfm3_s_box[st[14]] ^ s_box[st[3]]; + dt[ 6] = s_box[st[4]] ^ s_box[st[9]] ^ gfm2_s_box[st[14]] ^ gfm3_s_box[st[3]]; + dt[ 7] = gfm3_s_box[st[4]] ^ s_box[st[9]] ^ s_box[st[14]] ^ gfm2_s_box[st[3]]; + + dt[ 8] = gfm2_s_box[st[8]] ^ gfm3_s_box[st[13]] ^ s_box[st[2]] ^ s_box[st[7]]; + dt[ 9] = s_box[st[8]] ^ gfm2_s_box[st[13]] ^ gfm3_s_box[st[2]] ^ s_box[st[7]]; + dt[10] = s_box[st[8]] ^ s_box[st[13]] ^ gfm2_s_box[st[2]] ^ gfm3_s_box[st[7]]; + dt[11] = gfm3_s_box[st[8]] ^ s_box[st[13]] ^ s_box[st[2]] ^ gfm2_s_box[st[7]]; + + dt[12] = gfm2_s_box[st[12]] ^ gfm3_s_box[st[1]] ^ s_box[st[6]] ^ s_box[st[11]]; + dt[13] = s_box[st[12]] ^ gfm2_s_box[st[1]] ^ gfm3_s_box[st[6]] ^ s_box[st[11]]; + dt[14] = s_box[st[12]] ^ s_box[st[1]] ^ gfm2_s_box[st[6]] ^ gfm3_s_box[st[11]]; + dt[15] = gfm3_s_box[st[12]] ^ s_box[st[1]] ^ s_box[st[6]] ^ gfm2_s_box[st[11]]; + } + +#if defined( VERSION_1 ) + static void inv_mix_sub_columns( uint_8t dt[N_BLOCK] ) + { uint_8t st[N_BLOCK]; + block16_copy(st, dt); +#else + static void inv_mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] ) + { +#endif + dt[ 0] = inv_s_box[gfmul_e[st[ 0]] ^ gfmul_b[st[ 1]] ^ gfmul_d[st[ 2]] ^ gfmul_9[st[ 3]]]; + dt[ 5] = inv_s_box[gfmul_9[st[ 0]] ^ gfmul_e[st[ 1]] ^ gfmul_b[st[ 2]] ^ gfmul_d[st[ 3]]]; + dt[10] = inv_s_box[gfmul_d[st[ 0]] ^ gfmul_9[st[ 1]] ^ gfmul_e[st[ 2]] ^ gfmul_b[st[ 3]]]; + dt[15] = inv_s_box[gfmul_b[st[ 0]] ^ gfmul_d[st[ 1]] ^ gfmul_9[st[ 2]] ^ gfmul_e[st[ 3]]]; + + dt[ 4] = inv_s_box[gfmul_e[st[ 4]] ^ gfmul_b[st[ 5]] ^ gfmul_d[st[ 6]] ^ gfmul_9[st[ 7]]]; + dt[ 9] = inv_s_box[gfmul_9[st[ 4]] ^ gfmul_e[st[ 5]] ^ gfmul_b[st[ 6]] ^ gfmul_d[st[ 7]]]; + dt[14] = inv_s_box[gfmul_d[st[ 4]] ^ gfmul_9[st[ 5]] ^ gfmul_e[st[ 6]] ^ gfmul_b[st[ 7]]]; + dt[ 3] = inv_s_box[gfmul_b[st[ 4]] ^ gfmul_d[st[ 5]] ^ gfmul_9[st[ 6]] ^ gfmul_e[st[ 7]]]; + + dt[ 8] = inv_s_box[gfmul_e[st[ 8]] ^ gfmul_b[st[ 9]] ^ gfmul_d[st[10]] ^ gfmul_9[st[11]]]; + dt[13] = inv_s_box[gfmul_9[st[ 8]] ^ gfmul_e[st[ 9]] ^ gfmul_b[st[10]] ^ gfmul_d[st[11]]]; + dt[ 2] = inv_s_box[gfmul_d[st[ 8]] ^ gfmul_9[st[ 9]] ^ gfmul_e[st[10]] ^ gfmul_b[st[11]]]; + dt[ 7] = inv_s_box[gfmul_b[st[ 8]] ^ gfmul_d[st[ 9]] ^ gfmul_9[st[10]] ^ gfmul_e[st[11]]]; + + dt[12] = inv_s_box[gfmul_e[st[12]] ^ gfmul_b[st[13]] ^ gfmul_d[st[14]] ^ gfmul_9[st[15]]]; + dt[ 1] = inv_s_box[gfmul_9[st[12]] ^ gfmul_e[st[13]] ^ gfmul_b[st[14]] ^ gfmul_d[st[15]]]; + dt[ 6] = inv_s_box[gfmul_d[st[12]] ^ gfmul_9[st[13]] ^ gfmul_e[st[14]] ^ gfmul_b[st[15]]]; + dt[11] = inv_s_box[gfmul_b[st[12]] ^ gfmul_d[st[13]] ^ gfmul_9[st[14]] ^ gfmul_e[st[15]]]; + } + +#if defined( AES_ENC_PREKEYED ) || defined( AES_DEC_PREKEYED ) + +/* Set the cipher key for the pre-keyed version */ + +return_type aes_set_key( const unsigned char key[], length_type keylen, aes_context ctx[1] ) +{ + uint_8t cc, rc, hi; + + switch( keylen ) + { + case 16: + case 128: + keylen = 16; + break; + case 24: + case 192: + keylen = 24; + break; + case 32: + case 256: + keylen = 32; + break; + default: + ctx->rnd = 0; + return (return_type) -1; + } + block_copy(ctx->ksch, key, keylen); + hi = (keylen + 28) << 2; + ctx->rnd = (hi >> 4) - 1; + for( cc = keylen, rc = 1; cc < hi; cc += 4 ) + { uint_8t tt, t0, t1, t2, t3; + + t0 = ctx->ksch[cc - 4]; + t1 = ctx->ksch[cc - 3]; + t2 = ctx->ksch[cc - 2]; + t3 = ctx->ksch[cc - 1]; + if( cc % keylen == 0 ) + { + tt = t0; + t0 = s_box[t1] ^ rc; + t1 = s_box[t2]; + t2 = s_box[t3]; + t3 = s_box[tt]; + rc = f2(rc); + } + else if( keylen > 24 && cc % keylen == 16 ) + { + t0 = s_box[t0]; + t1 = s_box[t1]; + t2 = s_box[t2]; + t3 = s_box[t3]; + } + tt = cc - keylen; + ctx->ksch[cc + 0] = ctx->ksch[tt + 0] ^ t0; + ctx->ksch[cc + 1] = ctx->ksch[tt + 1] ^ t1; + ctx->ksch[cc + 2] = ctx->ksch[tt + 2] ^ t2; + ctx->ksch[cc + 3] = ctx->ksch[tt + 3] ^ t3; + } + return 0; +} + +#endif + +#if defined( AES_ENC_PREKEYED ) + +/* Encrypt a single block of 16 bytes */ + +return_type aes_encrypt( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], const aes_context ctx[1] ) +{ + if( ctx->rnd ) + { + uint_8t s1[N_BLOCK], r; + copy_and_key( s1, in, ctx->ksch ); + + for( r = 1 ; r < ctx->rnd ; ++r ) +#if defined( VERSION_1 ) + { + mix_sub_columns( s1 ); + add_round_key( s1, ctx->ksch + r * N_BLOCK); + } +#else + { uint_8t s2[N_BLOCK]; + mix_sub_columns( s2, s1 ); + copy_and_key( s1, s2, ctx->ksch + r * N_BLOCK); + } +#endif + shift_sub_rows( s1 ); + copy_and_key( out, s1, ctx->ksch + r * N_BLOCK ); + } + else + return (return_type) -1; + return 0; +} + +#endif + +#if defined( AES_DEC_PREKEYED ) + +/* Decrypt a single block of 16 bytes */ + +return_type aes_decrypt( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], const aes_context ctx[1] ) +{ + if( ctx->rnd ) + { + uint_8t s1[N_BLOCK], r; + copy_and_key( s1, in, ctx->ksch + ctx->rnd * N_BLOCK ); + inv_shift_sub_rows( s1 ); + + for( r = ctx->rnd ; --r ; ) +#if defined( VERSION_1 ) + { + add_round_key( s1, ctx->ksch + r * N_BLOCK ); + inv_mix_sub_columns( s1 ); + } +#else + { uint_8t s2[N_BLOCK]; + copy_and_key( s2, s1, ctx->ksch + r * N_BLOCK ); + inv_mix_sub_columns( s1, s2 ); + } +#endif + copy_and_key( out, s1, ctx->ksch ); + } + else + return (return_type) -1; + return 0; +} + +#endif + +#if defined( AES_ENC_128_OTFK ) + +/* The 'on the fly' encryption key update for for 128 bit keys */ + +static void update_encrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc ) +{ uint_8t cc; + + k[0] ^= s_box[k[13]] ^ *rc; + k[1] ^= s_box[k[14]]; + k[2] ^= s_box[k[15]]; + k[3] ^= s_box[k[12]]; + *rc = f2( *rc ); + + for(cc = 4; cc < 16; cc += 4 ) + { + k[cc + 0] ^= k[cc - 4]; + k[cc + 1] ^= k[cc - 3]; + k[cc + 2] ^= k[cc - 2]; + k[cc + 3] ^= k[cc - 1]; + } +} + +/* Encrypt a single block of 16 bytes with 'on the fly' 128 bit keying */ + +void aes_encrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], + const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] ) +{ uint_8t s1[N_BLOCK], r, rc = 1; + + if(o_key != key) + block16_copy( o_key, key ); + copy_and_key( s1, in, o_key ); + + for( r = 1 ; r < 10 ; ++r ) +#if defined( VERSION_1 ) + { + mix_sub_columns( s1 ); + update_encrypt_key_128( o_key, &rc ); + add_round_key( s1, o_key ); + } +#else + { uint_8t s2[N_BLOCK]; + mix_sub_columns( s2, s1 ); + update_encrypt_key_128( o_key, &rc ); + copy_and_key( s1, s2, o_key ); + } +#endif + + shift_sub_rows( s1 ); + update_encrypt_key_128( o_key, &rc ); + copy_and_key( out, s1, o_key ); +} + +#endif + +#if defined( AES_DEC_128_OTFK ) + +/* The 'on the fly' decryption key update for for 128 bit keys */ + +static void update_decrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc ) +{ uint_8t cc; + + for( cc = 12; cc > 0; cc -= 4 ) + { + k[cc + 0] ^= k[cc - 4]; + k[cc + 1] ^= k[cc - 3]; + k[cc + 2] ^= k[cc - 2]; + k[cc + 3] ^= k[cc - 1]; + } + *rc = d2(*rc); + k[0] ^= s_box[k[13]] ^ *rc; + k[1] ^= s_box[k[14]]; + k[2] ^= s_box[k[15]]; + k[3] ^= s_box[k[12]]; +} + +/* Decrypt a single block of 16 bytes with 'on the fly' 128 bit keying */ + +void aes_decrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], + const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] ) +{ + uint_8t s1[N_BLOCK], r, rc = 0x6c; + if(o_key != key) + block16_copy( o_key, key ); + + copy_and_key( s1, in, o_key ); + inv_shift_sub_rows( s1 ); + + for( r = 10 ; --r ; ) +#if defined( VERSION_1 ) + { + update_decrypt_key_128( o_key, &rc ); + add_round_key( s1, o_key ); + inv_mix_sub_columns( s1 ); + } +#else + { uint_8t s2[N_BLOCK]; + update_decrypt_key_128( o_key, &rc ); + copy_and_key( s2, s1, o_key ); + inv_mix_sub_columns( s1, s2 ); + } +#endif + update_decrypt_key_128( o_key, &rc ); + copy_and_key( out, s1, o_key ); +} + +#endif + +#if defined( AES_ENC_256_OTFK ) + +/* The 'on the fly' encryption key update for for 256 bit keys */ + +static void update_encrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc ) +{ uint_8t cc; + + k[0] ^= s_box[k[29]] ^ *rc; + k[1] ^= s_box[k[30]]; + k[2] ^= s_box[k[31]]; + k[3] ^= s_box[k[28]]; + *rc = f2( *rc ); + + for(cc = 4; cc < 16; cc += 4) + { + k[cc + 0] ^= k[cc - 4]; + k[cc + 1] ^= k[cc - 3]; + k[cc + 2] ^= k[cc - 2]; + k[cc + 3] ^= k[cc - 1]; + } + + k[16] ^= s_box[k[12]]; + k[17] ^= s_box[k[13]]; + k[18] ^= s_box[k[14]]; + k[19] ^= s_box[k[15]]; + + for( cc = 20; cc < 32; cc += 4 ) + { + k[cc + 0] ^= k[cc - 4]; + k[cc + 1] ^= k[cc - 3]; + k[cc + 2] ^= k[cc - 2]; + k[cc + 3] ^= k[cc - 1]; + } +} + +/* Encrypt a single block of 16 bytes with 'on the fly' 256 bit keying */ + +void aes_encrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], + const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] ) +{ + uint_8t s1[N_BLOCK], r, rc = 1; + if(o_key != key) + { + block16_copy( o_key, key ); + block16_copy( o_key + 16, key + 16 ); + } + copy_and_key( s1, in, o_key ); + + for( r = 1 ; r < 14 ; ++r ) +#if defined( VERSION_1 ) + { + mix_sub_columns(s1); + if( r & 1 ) + add_round_key( s1, o_key + 16 ); + else + { + update_encrypt_key_256( o_key, &rc ); + add_round_key( s1, o_key ); + } + } +#else + { uint_8t s2[N_BLOCK]; + mix_sub_columns( s2, s1 ); + if( r & 1 ) + copy_and_key( s1, s2, o_key + 16 ); + else + { + update_encrypt_key_256( o_key, &rc ); + copy_and_key( s1, s2, o_key ); + } + } +#endif + + shift_sub_rows( s1 ); + update_encrypt_key_256( o_key, &rc ); + copy_and_key( out, s1, o_key ); +} + +#endif + +#if defined( AES_DEC_256_OTFK ) + +/* The 'on the fly' encryption key update for for 256 bit keys */ + +static void update_decrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc ) +{ uint_8t cc; + + for(cc = 28; cc > 16; cc -= 4) + { + k[cc + 0] ^= k[cc - 4]; + k[cc + 1] ^= k[cc - 3]; + k[cc + 2] ^= k[cc - 2]; + k[cc + 3] ^= k[cc - 1]; + } + + k[16] ^= s_box[k[12]]; + k[17] ^= s_box[k[13]]; + k[18] ^= s_box[k[14]]; + k[19] ^= s_box[k[15]]; + + for(cc = 12; cc > 0; cc -= 4) + { + k[cc + 0] ^= k[cc - 4]; + k[cc + 1] ^= k[cc - 3]; + k[cc + 2] ^= k[cc - 2]; + k[cc + 3] ^= k[cc - 1]; + } + + *rc = d2(*rc); + k[0] ^= s_box[k[29]] ^ *rc; + k[1] ^= s_box[k[30]]; + k[2] ^= s_box[k[31]]; + k[3] ^= s_box[k[28]]; +} + +/* Decrypt a single block of 16 bytes with 'on the fly' + 256 bit keying +*/ +void aes_decrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], + const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] ) +{ + uint_8t s1[N_BLOCK], r, rc = 0x80; + + if(o_key != key) + { + block16_copy( o_key, key ); + block16_copy( o_key + 16, key + 16 ); + } + + copy_and_key( s1, in, o_key ); + inv_shift_sub_rows( s1 ); + + for( r = 14 ; --r ; ) +#if defined( VERSION_1 ) + { + if( ( r & 1 ) ) + { + update_decrypt_key_256( o_key, &rc ); + add_round_key( s1, o_key + 16 ); + } + else + add_round_key( s1, o_key ); + inv_mix_sub_columns( s1 ); + } +#else + { uint_8t s2[N_BLOCK]; + if( ( r & 1 ) ) + { + update_decrypt_key_256( o_key, &rc ); + copy_and_key( s2, s1, o_key + 16 ); + } + else + copy_and_key( s2, s1, o_key ); + inv_mix_sub_columns( s1, s2 ); + } +#endif + copy_and_key( out, s1, o_key ); +} + +#endif diff --git a/src/Crypto/AesSmall.h b/src/Crypto/AesSmall.h index 516c6964..ebeb24ef 100644 --- a/src/Crypto/AesSmall.h +++ b/src/Crypto/AesSmall.h @@ -1,169 +1,169 @@ -/* - --------------------------------------------------------------------------- - Copyright (c) 1998-2006, Brian Gladman, Worcester, UK. All rights reserved. - - LICENSE TERMS - - The free distribution and use of this software in both source and binary - form is allowed (with or without changes) provided that: - - 1. distributions of this source code include the above copyright - notice, this list of conditions and the following disclaimer; - - 2. distributions in binary form include the above copyright - notice, this list of conditions and the following disclaimer - in the documentation and/or other associated materials; - - 3. the copyright holder's name is not used to endorse products - built using this software without specific written permission. - - ALTERNATIVELY, provided that this notice is retained in full, this product - may be distributed under the terms of the GNU General Public License (GPL), - in which case the provisions of the GPL apply INSTEAD OF those given above. - - DISCLAIMER - - This software is provided 'as is' with no explicit or implied warranties - in respect of its properties, including, but not limited to, correctness - and/or fitness for purpose. - --------------------------------------------------------------------------- - Issue 09/09/2006 - - This is an AES implementation that uses only 8-bit byte operations on the - cipher state. - */ - -#ifndef AES_H -#define AES_H - -#if defined(__cplusplus) -extern "C" -{ -#endif - -/* This provides speed optimisation opportunities if 32-bit word - operations are available -*/ -#if 1 -# define HAVE_UINT_32T -#endif - -#if 1 -# define AES_ENC_PREKEYED /* AES encryption with a precomputed key schedule */ -#endif -#if 1 -# define AES_DEC_PREKEYED /* AES decryption with a precomputed key schedule */ -#endif -#if 0 -# define AES_ENC_128_OTFK /* AES encryption with 'on the fly' 128 bit keying */ -#endif -#if 0 -# define AES_DEC_128_OTFK /* AES decryption with 'on the fly' 128 bit keying */ -#endif -#if 0 -# define AES_ENC_256_OTFK /* AES encryption with 'on the fly' 256 bit keying */ -#endif -#if 0 -# define AES_DEC_256_OTFK /* AES decryption with 'on the fly' 256 bit keying */ -#endif - -#define N_ROW 4 -#define N_COL 4 -#define N_BLOCK (N_ROW * N_COL) -#define N_MAX_ROUNDS 14 - -typedef unsigned char uint_8t; - -typedef uint_8t return_type; -typedef uint_8t length_type; -typedef uint_8t uint_type; - -typedef unsigned char uint_8t; - -typedef struct -{ uint_8t ksch[(N_MAX_ROUNDS + 1) * N_BLOCK]; - uint_8t rnd; -} aes_context; - -/* The following calls are for a precomputed key schedule - - NOTE: If the length_type used for the key length is an - unsigned 8-bit character, a key length of 256 bits must - be entered as a length in bytes (valid inputs are hence - 128, 192, 16, 24 and 32). -*/ - -#if defined( AES_ENC_PREKEYED ) || defined( AES_DEC_PREKEYED ) - -return_type aes_set_key( const unsigned char key[], - length_type keylen, - aes_context ctx[1] ); -#endif - -#if defined( AES_ENC_PREKEYED ) - -return_type aes_encrypt( const unsigned char in[N_BLOCK], - unsigned char out[N_BLOCK], - const aes_context ctx[1] ); -#endif - -#if defined( AES_DEC_PREKEYED ) - -return_type aes_decrypt( const unsigned char in[N_BLOCK], - unsigned char out[N_BLOCK], - const aes_context ctx[1] ); -#endif - -/* The following calls are for 'on the fly' keying. In this case the - encryption and decryption keys are different. - - The encryption subroutines take a key in an array of bytes in - key[L] where L is 16, 24 or 32 bytes for key lengths of 128, - 192, and 256 bits respectively. They then encrypts the input - data, in[] with this key and put the reult in the output array - out[]. In addition, the second key array, o_key[L], is used - to output the key that is needed by the decryption subroutine - to reverse the encryption operation. The two key arrays can - be the same array but in this case the original key will be - overwritten. - - In the same way, the decryption subroutines output keys that - can be used to reverse their effect when used for encryption. - - Only 128 and 256 bit keys are supported in these 'on the fly' - modes. -*/ - -#if defined( AES_ENC_128_OTFK ) -void aes_encrypt_128( const unsigned char in[N_BLOCK], - unsigned char out[N_BLOCK], - const unsigned char key[N_BLOCK], - uint_8t o_key[N_BLOCK] ); -#endif - -#if defined( AES_DEC_128_OTFK ) -void aes_decrypt_128( const unsigned char in[N_BLOCK], - unsigned char out[N_BLOCK], - const unsigned char key[N_BLOCK], - unsigned char o_key[N_BLOCK] ); -#endif - -#if defined( AES_ENC_256_OTFK ) -void aes_encrypt_256( const unsigned char in[N_BLOCK], - unsigned char out[N_BLOCK], - const unsigned char key[2 * N_BLOCK], - unsigned char o_key[2 * N_BLOCK] ); -#endif - -#if defined( AES_DEC_256_OTFK ) -void aes_decrypt_256( const unsigned char in[N_BLOCK], - unsigned char out[N_BLOCK], - const unsigned char key[2 * N_BLOCK], - unsigned char o_key[2 * N_BLOCK] ); -#endif - -#if defined(__cplusplus) -} -#endif - -#endif +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2006, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 09/09/2006 + + This is an AES implementation that uses only 8-bit byte operations on the + cipher state. + */ + +#ifndef AES_H +#define AES_H + +#if defined(__cplusplus) +extern "C" +{ +#endif + +/* This provides speed optimisation opportunities if 32-bit word + operations are available +*/ +#if 1 +# define HAVE_UINT_32T +#endif + +#if 1 +# define AES_ENC_PREKEYED /* AES encryption with a precomputed key schedule */ +#endif +#if 1 +# define AES_DEC_PREKEYED /* AES decryption with a precomputed key schedule */ +#endif +#if 0 +# define AES_ENC_128_OTFK /* AES encryption with 'on the fly' 128 bit keying */ +#endif +#if 0 +# define AES_DEC_128_OTFK /* AES decryption with 'on the fly' 128 bit keying */ +#endif +#if 0 +# define AES_ENC_256_OTFK /* AES encryption with 'on the fly' 256 bit keying */ +#endif +#if 0 +# define AES_DEC_256_OTFK /* AES decryption with 'on the fly' 256 bit keying */ +#endif + +#define N_ROW 4 +#define N_COL 4 +#define N_BLOCK (N_ROW * N_COL) +#define N_MAX_ROUNDS 14 + +typedef unsigned char uint_8t; + +typedef uint_8t return_type; +typedef uint_8t length_type; +typedef uint_8t uint_type; + +typedef unsigned char uint_8t; + +typedef struct +{ uint_8t ksch[(N_MAX_ROUNDS + 1) * N_BLOCK]; + uint_8t rnd; +} aes_context; + +/* The following calls are for a precomputed key schedule + + NOTE: If the length_type used for the key length is an + unsigned 8-bit character, a key length of 256 bits must + be entered as a length in bytes (valid inputs are hence + 128, 192, 16, 24 and 32). +*/ + +#if defined( AES_ENC_PREKEYED ) || defined( AES_DEC_PREKEYED ) + +return_type aes_set_key( const unsigned char key[], + length_type keylen, + aes_context ctx[1] ); +#endif + +#if defined( AES_ENC_PREKEYED ) + +return_type aes_encrypt( const unsigned char in[N_BLOCK], + unsigned char out[N_BLOCK], + const aes_context ctx[1] ); +#endif + +#if defined( AES_DEC_PREKEYED ) + +return_type aes_decrypt( const unsigned char in[N_BLOCK], + unsigned char out[N_BLOCK], + const aes_context ctx[1] ); +#endif + +/* The following calls are for 'on the fly' keying. In this case the + encryption and decryption keys are different. + + The encryption subroutines take a key in an array of bytes in + key[L] where L is 16, 24 or 32 bytes for key lengths of 128, + 192, and 256 bits respectively. They then encrypts the input + data, in[] with this key and put the reult in the output array + out[]. In addition, the second key array, o_key[L], is used + to output the key that is needed by the decryption subroutine + to reverse the encryption operation. The two key arrays can + be the same array but in this case the original key will be + overwritten. + + In the same way, the decryption subroutines output keys that + can be used to reverse their effect when used for encryption. + + Only 128 and 256 bit keys are supported in these 'on the fly' + modes. +*/ + +#if defined( AES_ENC_128_OTFK ) +void aes_encrypt_128( const unsigned char in[N_BLOCK], + unsigned char out[N_BLOCK], + const unsigned char key[N_BLOCK], + uint_8t o_key[N_BLOCK] ); +#endif + +#if defined( AES_DEC_128_OTFK ) +void aes_decrypt_128( const unsigned char in[N_BLOCK], + unsigned char out[N_BLOCK], + const unsigned char key[N_BLOCK], + unsigned char o_key[N_BLOCK] ); +#endif + +#if defined( AES_ENC_256_OTFK ) +void aes_encrypt_256( const unsigned char in[N_BLOCK], + unsigned char out[N_BLOCK], + const unsigned char key[2 * N_BLOCK], + unsigned char o_key[2 * N_BLOCK] ); +#endif + +#if defined( AES_DEC_256_OTFK ) +void aes_decrypt_256( const unsigned char in[N_BLOCK], + unsigned char out[N_BLOCK], + const unsigned char key[2 * N_BLOCK], + unsigned char o_key[2 * N_BLOCK] ); +#endif + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/src/Crypto/AesSmall_x86.asm b/src/Crypto/AesSmall_x86.asm index fe7dc47b..de32fc66 100644 --- a/src/Crypto/AesSmall_x86.asm +++ b/src/Crypto/AesSmall_x86.asm @@ -1,1444 +1,1444 @@ - -; --------------------------------------------------------------------------- -; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. -; -; LICENSE TERMS -; -; The free distribution and use of this software is allowed (with or without -; changes) provided that: -; -; 1. source code distributions include the above copyright notice, this -; list of conditions and the following disclaimer; -; -; 2. binary distributions include the above copyright notice, this list -; of conditions and the following disclaimer in their documentation; -; -; 3. the name of the copyright holder is not used to endorse products -; built using this software without specific written permission. -; -; DISCLAIMER -; -; This software is provided 'as is' with no explicit or implied warranties -; in respect of its properties, including, but not limited to, correctness -; and/or fitness for purpose. -; --------------------------------------------------------------------------- -; Issue 20/12/2007 -; -; This code requires either ASM_X86_V2 or ASM_X86_V2C to be set in aesopt.h -; and the same define to be set here as well. If AES_V2C is set this file -; requires the C files aeskey.c and aestab.c for support. - -; An AES implementation for x86 processors using the YASM (or NASM) assembler. -; This is a full assembler implementation covering encryption, decryption and -; key scheduling. It uses 2k bytes of tables but its encryption and decryption -; performance is very close to that obtained using large tables. Key schedule -; expansion is slower for both encryption and decryption but this is likely to -; be offset by the much smaller load that this version places on the processor -; cache. I acknowledge the contribution made by Daniel Bernstein to aspects of -; the design of the AES round function used here. -; -; This code provides the standard AES block size (128 bits, 16 bytes) and the -; three standard AES key sizes (128, 192 and 256 bits). It has the same call -; interface as my C implementation. The ebx, esi, edi and ebp registers are -; preserved across calls but eax, ecx and edx and the artihmetic status flags -; are not. Although this is a full assembler implementation, it can be used -; in conjunction with my C code which provides faster key scheduling using -; large tables. In this case aeskey.c should be compiled with ASM_X86_V2C -; defined. It is also important that the defines below match those used in the -; C code. This code uses the VC++ register saving conentions; if it is used -; with another compiler, conventions for using and saving registers may need -; to be checked (and calling conventions). The YASM command line for the VC++ -; custom build step is: -; -; yasm -Xvc -f win32 -D -o "$(TargetDir)\$(InputName).obj" "$(InputPath)" -; -; For the cryptlib build this is (pcg): -; -; yasm -Xvc -f win32 -D ASM_X86_V2C -o aescrypt2.obj aes_x86_v2.asm -; -; where is ASM_X86_V2 or ASM_X86_V2C. The calling intefaces are: -; -; AES_RETURN aes_encrypt(const unsigned char in_blk[], -; unsigned char out_blk[], const aes_encrypt_ctx cx[1]); -; -; AES_RETURN aes_decrypt(const unsigned char in_blk[], -; unsigned char out_blk[], const aes_decrypt_ctx cx[1]); -; -; AES_RETURN aes_encrypt_key(const unsigned char key[], -; const aes_encrypt_ctx cx[1]); -; -; AES_RETURN aes_decrypt_key(const unsigned char key[], -; const aes_decrypt_ctx cx[1]); -; -; AES_RETURN aes_encrypt_key(const unsigned char key[], -; unsigned int len, const aes_decrypt_ctx cx[1]); -; -; AES_RETURN aes_decrypt_key(const unsigned char key[], -; unsigned int len, const aes_decrypt_ctx cx[1]); -; -; where is 128, 102 or 256. In the last two calls the length can be in -; either bits or bytes. - -; The DLL interface must use the _stdcall convention in which the number -; of bytes of parameter space is added after an @ to the sutine's name. -; We must also remove our parameters from the stack before return (see -; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version. - -; -; Adapted for TrueCrypt: -; - All tables generated at run-time -; - Adapted for 16-bit environment -; - -CPU 386 -USE16 -SEGMENT _TEXT PUBLIC CLASS=CODE USE16 -SEGMENT _DATA PUBLIC CLASS=DATA USE16 - -GROUP DGROUP _TEXT _DATA - -extern _aes_dec_tab ; Aestab.c -extern _aes_enc_tab - -; %define DLL_EXPORT - -; The size of the code can be reduced by using functions for the encryption -; and decryption rounds in place of macro expansion - -%define REDUCE_CODE_SIZE - -; Comment in/out the following lines to obtain the desired subroutines. These -; selections MUST match those in the C header file aes.h - -; %define AES_128 ; define if AES with 128 bit keys is needed -; %define AES_192 ; define if AES with 192 bit keys is needed -%define AES_256 ; define if AES with 256 bit keys is needed -; %define AES_VAR ; define if a variable key size is needed -%define ENCRYPTION ; define if encryption is needed -%define DECRYPTION ; define if decryption is needed -; %define AES_REV_DKS ; define if key decryption schedule is reversed - -%ifndef ASM_X86_V2C -%define ENCRYPTION_KEY_SCHEDULE ; define if encryption key expansion is needed -%define DECRYPTION_KEY_SCHEDULE ; define if decryption key expansion is needed -%endif - -; The encryption key schedule has the following in memory layout where N is the -; number of rounds (10, 12 or 14): -; -; lo: | input key (round 0) | ; each round is four 32-bit words -; | encryption round 1 | -; | encryption round 2 | -; .... -; | encryption round N-1 | -; hi: | encryption round N | -; -; The decryption key schedule is normally set up so that it has the same -; layout as above by actually reversing the order of the encryption key -; schedule in memory (this happens when AES_REV_DKS is set): -; -; lo: | decryption round 0 | = | encryption round N | -; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ] -; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ] -; .... .... -; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ] -; hi: | decryption round N | = | input key (round 0) | -; -; with rounds except the first and last modified using inv_mix_column() -; But if AES_REV_DKS is NOT set the order of keys is left as it is for -; encryption so that it has to be accessed in reverse when used for -; decryption (although the inverse mix column modifications are done) -; -; lo: | decryption round 0 | = | input key (round 0) | -; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ] -; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ] -; .... .... -; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ] -; hi: | decryption round N | = | encryption round N | -; -; This layout is faster when the assembler key scheduling provided here -; is used. -; -; End of user defines - -%ifdef AES_VAR -%ifndef AES_128 -%define AES_128 -%endif -%ifndef AES_192 -%define AES_192 -%endif -%ifndef AES_256 -%define AES_256 -%endif -%endif - -%ifdef AES_VAR -%define KS_LENGTH 60 -%elifdef AES_256 -%define KS_LENGTH 60 -%elifdef AES_192 -%define KS_LENGTH 52 -%else -%define KS_LENGTH 44 -%endif - -; These macros implement stack based local variables - -%macro save 2 - mov [esp+4*%1],%2 -%endmacro - -%macro restore 2 - mov %1,[esp+4*%2] -%endmacro - -%ifdef REDUCE_CODE_SIZE - %macro mf_call 1 - call %1 - %endmacro -%else - %macro mf_call 1 - %1 - %endmacro -%endif - -; the DLL has to implement the _stdcall calling interface on return -; In this case we have to take our parameters (3 4-byte pointers) -; off the stack - -%define parms 12 - -%macro do_name 1-2 parms -%ifndef DLL_EXPORT - global %1 -%1: -%else - global %1@%2 - export %1@%2 -%1@%2: -%endif -%endmacro - -%macro do_call 1-2 parms -%ifndef DLL_EXPORT - call %1 - add esp,%2 -%else - call %1@%2 -%endif -%endmacro - -%macro do_exit 0-1 parms -%ifdef DLL_EXPORT - ret %1 -%else - ret -%endif -%endmacro - -; finite field multiplies by {02}, {04} and {08} - -%define f2(x) ((x<<1)^(((x>>7)&1)*0x11b)) -%define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b)) -%define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b)) - -; finite field multiplies required in table generation - -%define f3(x) (f2(x) ^ x) -%define f9(x) (f8(x) ^ x) -%define fb(x) (f8(x) ^ f2(x) ^ x) -%define fd(x) (f8(x) ^ f4(x) ^ x) -%define fe(x) (f8(x) ^ f4(x) ^ f2(x)) - -%define etab_0(x) [_aes_enc_tab+4+8*x] -%define etab_1(x) [_aes_enc_tab+3+8*x] -%define etab_2(x) [_aes_enc_tab+2+8*x] -%define etab_3(x) [_aes_enc_tab+1+8*x] -%define etab_b(x) byte [_aes_enc_tab+1+8*x] ; used with movzx for 0x000000xx -%define etab_w(x) word [_aes_enc_tab+8*x] ; used with movzx for 0x0000xx00 - -%define btab_0(x) [_aes_enc_tab+6+8*x] -%define btab_1(x) [_aes_enc_tab+5+8*x] -%define btab_2(x) [_aes_enc_tab+4+8*x] -%define btab_3(x) [_aes_enc_tab+3+8*x] - -; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the -; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX. -; -; Input: -; -; EAX column[0] -; EBX column[1] -; ECX column[2] -; EDX column[3] -; ESI column key[round][2] -; EDI column key[round][3] -; EBP scratch -; -; Output: -; -; EBP column[0] unkeyed -; EBX column[1] unkeyed -; ESI column[2] keyed -; EDI column[3] keyed -; EAX scratch -; ECX scratch -; EDX scratch - -%macro rnd_fun 2 - - rol ebx,16 - %1 esi, cl, 0, ebp - %1 esi, dh, 1, ebp - %1 esi, bh, 3, ebp - %1 edi, dl, 0, ebp - %1 edi, ah, 1, ebp - %1 edi, bl, 2, ebp - %2 ebp, al, 0, ebp - shr ebx,16 - and eax,0xffff0000 - or eax,ebx - shr edx,16 - %1 ebp, ah, 1, ebx - %1 ebp, dh, 3, ebx - %2 ebx, dl, 2, ebx - %1 ebx, ch, 1, edx - %1 ebx, al, 0, edx - shr eax,16 - shr ecx,16 - %1 ebp, cl, 2, edx - %1 edi, ch, 3, edx - %1 esi, al, 2, edx - %1 ebx, ah, 3, edx - -%endmacro - -; Basic MOV and XOR Operations for normal rounds - -%macro nr_xor 4 - movzx %4,%2 - xor %1,etab_%3(%4) -%endmacro - -%macro nr_mov 4 - movzx %4,%2 - mov %1,etab_%3(%4) -%endmacro - -; Basic MOV and XOR Operations for last round - -%if 1 - - %macro lr_xor 4 - movzx %4,%2 - movzx %4,etab_b(%4) - %if %3 != 0 - shl %4,8*%3 - %endif - xor %1,%4 - %endmacro - - %macro lr_mov 4 - movzx %4,%2 - movzx %1,etab_b(%4) - %if %3 != 0 - shl %1,8*%3 - %endif - %endmacro - -%else ; less effective but worth leaving as an option - - %macro lr_xor 4 - movzx %4,%2 - mov %4,btab_%3(%4) - and %4,0x000000ff << 8 * %3 - xor %1,%4 - %endmacro - - %macro lr_mov 4 - movzx %4,%2 - mov %1,btab_%3(%4) - and %1,0x000000ff << 8 * %3 - %endmacro - -%endif - -; Apply S-Box to the 4 bytes in a 32-bit word and rotate byte positions - -%ifdef REDUCE_CODE_SIZE - -l3s_col: - movzx ecx,al ; in eax - movzx ecx, etab_b(ecx) ; out eax - xor edx,ecx ; scratch ecx,edx - movzx ecx,ah - movzx ecx, etab_b(ecx) - shl ecx,8 - xor edx,ecx - shr eax,16 - movzx ecx,al - movzx ecx, etab_b(ecx) - shl ecx,16 - xor edx,ecx - movzx ecx,ah - movzx ecx, etab_b(ecx) - shl ecx,24 - xor edx,ecx - mov eax,edx - ret - -%else - -%macro l3s_col 0 - - movzx ecx,al ; in eax - movzx ecx, etab_b(ecx) ; out eax - xor edx,ecx ; scratch ecx,edx - movzx ecx,ah - movzx ecx, etab_b(ecx) - shl ecx,8 - xor edx,ecx - shr eax,16 - movzx ecx,al - movzx ecx, etab_b(ecx) - shl ecx,16 - xor edx,ecx - movzx ecx,ah - movzx ecx, etab_b(ecx) - shl ecx,24 - xor edx,ecx - mov eax,edx - -%endmacro - -%endif - -; offsets to parameters - -in_blk equ 2 ; input byte array address parameter -out_blk equ 4 ; output byte array address parameter -ctx equ 6 ; AES context structure -stk_spc equ 20 ; stack space - -%ifdef ENCRYPTION - -; %define ENCRYPTION_TABLE - -%ifdef REDUCE_CODE_SIZE - -enc_round: - sub sp, 2 - add ebp,16 - save 1,ebp - mov esi,[ebp+8] - mov edi,[ebp+12] - - rnd_fun nr_xor, nr_mov - - mov eax,ebp - mov ecx,esi - mov edx,edi - restore ebp,1 - xor eax,[ebp] - xor ebx,[ebp+4] - add sp, 2 - ret - -%else - -%macro enc_round 0 - - add ebp,16 - save 0,ebp - mov esi,[ebp+8] - mov edi,[ebp+12] - - rnd_fun nr_xor, nr_mov - - mov eax,ebp - mov ecx,esi - mov edx,edi - restore ebp,0 - xor eax,[ebp] - xor ebx,[ebp+4] - -%endmacro - -%endif - -%macro enc_last_round 0 - - add ebp,16 - save 0,ebp - mov esi,[ebp+8] - mov edi,[ebp+12] - - rnd_fun lr_xor, lr_mov - - mov eax,ebp - restore ebp,0 - xor eax,[ebp] - xor ebx,[ebp+4] - -%endmacro - - section _TEXT - -; AES Encryption Subroutine - - do_name _aes_encrypt,12 - - mov ax, sp - movzx esp, ax - - sub esp,stk_spc - mov [esp+16],ebp - mov [esp+12],ebx - mov [esp+ 8],esi - mov [esp+ 4],edi - - movzx esi,word [esp+in_blk+stk_spc] ; input pointer - mov eax,[esi ] - mov ebx,[esi+ 4] - mov ecx,[esi+ 8] - mov edx,[esi+12] - - movzx ebp,word [esp+ctx+stk_spc] ; key pointer - movzx edi,byte [ebp+4*KS_LENGTH] - xor eax,[ebp ] - xor ebx,[ebp+ 4] - xor ecx,[ebp+ 8] - xor edx,[ebp+12] - -; determine the number of rounds - -%ifndef AES_256 - cmp edi,10*16 - je .3 - cmp edi,12*16 - je .2 - cmp edi,14*16 - je .1 - mov eax,-1 - jmp .5 -%endif - -.1: mf_call enc_round - mf_call enc_round -.2: mf_call enc_round - mf_call enc_round -.3: mf_call enc_round - mf_call enc_round - mf_call enc_round - mf_call enc_round - mf_call enc_round - mf_call enc_round - mf_call enc_round - mf_call enc_round - mf_call enc_round - enc_last_round - - movzx edx,word [esp+out_blk+stk_spc] - mov [edx],eax - mov [edx+4],ebx - mov [edx+8],esi - mov [edx+12],edi - xor eax,eax - -.5: mov ebp,[esp+16] - mov ebx,[esp+12] - mov esi,[esp+ 8] - mov edi,[esp+ 4] - add esp,stk_spc - do_exit 12 - -%endif - -%macro f_key 2 - - push ecx - push edx - mov edx,esi - ror eax,8 - mf_call l3s_col - mov esi,eax - pop edx - pop ecx - xor esi,rc_val - - mov [ebp+%1*%2],esi - xor edi,esi - mov [ebp+%1*%2+4],edi - xor ecx,edi - mov [ebp+%1*%2+8],ecx - xor edx,ecx - mov [ebp+%1*%2+12],edx - mov eax,edx - -%if %2 == 24 - -%if %1 < 7 - xor eax,[ebp+%1*%2+16-%2] - mov [ebp+%1*%2+16],eax - xor eax,[ebp+%1*%2+20-%2] - mov [ebp+%1*%2+20],eax -%endif - -%elif %2 == 32 - -%if %1 < 6 - push ecx - push edx - mov edx,[ebp+%1*%2+16-%2] - mf_call l3s_col - pop edx - pop ecx - mov [ebp+%1*%2+16],eax - xor eax,[ebp+%1*%2+20-%2] - mov [ebp+%1*%2+20],eax - xor eax,[ebp+%1*%2+24-%2] - mov [ebp+%1*%2+24],eax - xor eax,[ebp+%1*%2+28-%2] - mov [ebp+%1*%2+28],eax -%endif - -%endif - -%assign rc_val f2(rc_val) - -%endmacro - -%ifdef ENCRYPTION_KEY_SCHEDULE - -%ifdef AES_128 - -%ifndef ENCRYPTION_TABLE -; %define ENCRYPTION_TABLE -%endif - -%assign rc_val 1 - - do_name _aes_encrypt_key128,8 - - push ebp - push ebx - push esi - push edi - - mov ebp,[esp+24] - mov [ebp+4*KS_LENGTH],dword 10*16 - mov ebx,[esp+20] - - mov esi,[ebx] - mov [ebp],esi - mov edi,[ebx+4] - mov [ebp+4],edi - mov ecx,[ebx+8] - mov [ebp+8],ecx - mov edx,[ebx+12] - mov [ebp+12],edx - add ebp,16 - mov eax,edx - - f_key 0,16 ; 11 * 4 = 44 unsigned longs - f_key 1,16 ; 4 + 4 * 10 generated = 44 - f_key 2,16 - f_key 3,16 - f_key 4,16 - f_key 5,16 - f_key 6,16 - f_key 7,16 - f_key 8,16 - f_key 9,16 - - pop edi - pop esi - pop ebx - pop ebp - xor eax,eax - do_exit 8 - -%endif - -%ifdef AES_192 - -%ifndef ENCRYPTION_TABLE -; %define ENCRYPTION_TABLE -%endif - -%assign rc_val 1 - - do_name _aes_encrypt_key192,8 - - push ebp - push ebx - push esi - push edi - - mov ebp,[esp+24] - mov [ebp+4*KS_LENGTH],dword 12 * 16 - mov ebx,[esp+20] - - mov esi,[ebx] - mov [ebp],esi - mov edi,[ebx+4] - mov [ebp+4],edi - mov ecx,[ebx+8] - mov [ebp+8],ecx - mov edx,[ebx+12] - mov [ebp+12],edx - mov eax,[ebx+16] - mov [ebp+16],eax - mov eax,[ebx+20] - mov [ebp+20],eax - add ebp,24 - - f_key 0,24 ; 13 * 4 = 52 unsigned longs - f_key 1,24 ; 6 + 6 * 8 generated = 54 - f_key 2,24 - f_key 3,24 - f_key 4,24 - f_key 5,24 - f_key 6,24 - f_key 7,24 - - pop edi - pop esi - pop ebx - pop ebp - xor eax,eax - do_exit 8 - -%endif - -%ifdef AES_256 - -%ifndef ENCRYPTION_TABLE -; %define ENCRYPTION_TABLE -%endif - -%assign rc_val 1 - - do_name _aes_encrypt_key256,8 - - mov ax, sp - movzx esp, ax - - push ebp - push ebx - push esi - push edi - - movzx ebp, word [esp+20] ; ks - mov [ebp+4*KS_LENGTH],dword 14 * 16 - movzx ebx, word [esp+18] ; key - - mov esi,[ebx] - mov [ebp],esi - mov edi,[ebx+4] - mov [ebp+4],edi - mov ecx,[ebx+8] - mov [ebp+8],ecx - mov edx,[ebx+12] - mov [ebp+12],edx - mov eax,[ebx+16] - mov [ebp+16],eax - mov eax,[ebx+20] - mov [ebp+20],eax - mov eax,[ebx+24] - mov [ebp+24],eax - mov eax,[ebx+28] - mov [ebp+28],eax - add ebp,32 - - f_key 0,32 ; 15 * 4 = 60 unsigned longs - f_key 1,32 ; 8 + 8 * 7 generated = 64 - f_key 2,32 - f_key 3,32 - f_key 4,32 - f_key 5,32 - f_key 6,32 - - pop edi - pop esi - pop ebx - pop ebp - xor eax,eax - do_exit 8 - -%endif - -%ifdef AES_VAR - -%ifndef ENCRYPTION_TABLE -; %define ENCRYPTION_TABLE -%endif - - do_name _aes_encrypt_key,12 - - mov ecx,[esp+4] - mov eax,[esp+8] - mov edx,[esp+12] - push edx - push ecx - - cmp eax,16 - je .1 - cmp eax,128 - je .1 - - cmp eax,24 - je .2 - cmp eax,192 - je .2 - - cmp eax,32 - je .3 - cmp eax,256 - je .3 - mov eax,-1 - add esp,8 - do_exit 12 - -.1: do_call _aes_encrypt_key128,8 - do_exit 12 -.2: do_call _aes_encrypt_key192,8 - do_exit 12 -.3: do_call _aes_encrypt_key256,8 - do_exit 12 - -%endif - -%endif - -%ifdef ENCRYPTION_TABLE - -; S-box data - 256 entries - - section _DATA - -%define u8(x) 0, x, x, f3(x), f2(x), x, x, f3(x) - -_aes_enc_tab: - db u8(0x63),u8(0x7c),u8(0x77),u8(0x7b),u8(0xf2),u8(0x6b),u8(0x6f),u8(0xc5) - db u8(0x30),u8(0x01),u8(0x67),u8(0x2b),u8(0xfe),u8(0xd7),u8(0xab),u8(0x76) - db u8(0xca),u8(0x82),u8(0xc9),u8(0x7d),u8(0xfa),u8(0x59),u8(0x47),u8(0xf0) - db u8(0xad),u8(0xd4),u8(0xa2),u8(0xaf),u8(0x9c),u8(0xa4),u8(0x72),u8(0xc0) - db u8(0xb7),u8(0xfd),u8(0x93),u8(0x26),u8(0x36),u8(0x3f),u8(0xf7),u8(0xcc) - db u8(0x34),u8(0xa5),u8(0xe5),u8(0xf1),u8(0x71),u8(0xd8),u8(0x31),u8(0x15) - db u8(0x04),u8(0xc7),u8(0x23),u8(0xc3),u8(0x18),u8(0x96),u8(0x05),u8(0x9a) - db u8(0x07),u8(0x12),u8(0x80),u8(0xe2),u8(0xeb),u8(0x27),u8(0xb2),u8(0x75) - db u8(0x09),u8(0x83),u8(0x2c),u8(0x1a),u8(0x1b),u8(0x6e),u8(0x5a),u8(0xa0) - db u8(0x52),u8(0x3b),u8(0xd6),u8(0xb3),u8(0x29),u8(0xe3),u8(0x2f),u8(0x84) - db u8(0x53),u8(0xd1),u8(0x00),u8(0xed),u8(0x20),u8(0xfc),u8(0xb1),u8(0x5b) - db u8(0x6a),u8(0xcb),u8(0xbe),u8(0x39),u8(0x4a),u8(0x4c),u8(0x58),u8(0xcf) - db u8(0xd0),u8(0xef),u8(0xaa),u8(0xfb),u8(0x43),u8(0x4d),u8(0x33),u8(0x85) - db u8(0x45),u8(0xf9),u8(0x02),u8(0x7f),u8(0x50),u8(0x3c),u8(0x9f),u8(0xa8) - db u8(0x51),u8(0xa3),u8(0x40),u8(0x8f),u8(0x92),u8(0x9d),u8(0x38),u8(0xf5) - db u8(0xbc),u8(0xb6),u8(0xda),u8(0x21),u8(0x10),u8(0xff),u8(0xf3),u8(0xd2) - db u8(0xcd),u8(0x0c),u8(0x13),u8(0xec),u8(0x5f),u8(0x97),u8(0x44),u8(0x17) - db u8(0xc4),u8(0xa7),u8(0x7e),u8(0x3d),u8(0x64),u8(0x5d),u8(0x19),u8(0x73) - db u8(0x60),u8(0x81),u8(0x4f),u8(0xdc),u8(0x22),u8(0x2a),u8(0x90),u8(0x88) - db u8(0x46),u8(0xee),u8(0xb8),u8(0x14),u8(0xde),u8(0x5e),u8(0x0b),u8(0xdb) - db u8(0xe0),u8(0x32),u8(0x3a),u8(0x0a),u8(0x49),u8(0x06),u8(0x24),u8(0x5c) - db u8(0xc2),u8(0xd3),u8(0xac),u8(0x62),u8(0x91),u8(0x95),u8(0xe4),u8(0x79) - db u8(0xe7),u8(0xc8),u8(0x37),u8(0x6d),u8(0x8d),u8(0xd5),u8(0x4e),u8(0xa9) - db u8(0x6c),u8(0x56),u8(0xf4),u8(0xea),u8(0x65),u8(0x7a),u8(0xae),u8(0x08) - db u8(0xba),u8(0x78),u8(0x25),u8(0x2e),u8(0x1c),u8(0xa6),u8(0xb4),u8(0xc6) - db u8(0xe8),u8(0xdd),u8(0x74),u8(0x1f),u8(0x4b),u8(0xbd),u8(0x8b),u8(0x8a) - db u8(0x70),u8(0x3e),u8(0xb5),u8(0x66),u8(0x48),u8(0x03),u8(0xf6),u8(0x0e) - db u8(0x61),u8(0x35),u8(0x57),u8(0xb9),u8(0x86),u8(0xc1),u8(0x1d),u8(0x9e) - db u8(0xe1),u8(0xf8),u8(0x98),u8(0x11),u8(0x69),u8(0xd9),u8(0x8e),u8(0x94) - db u8(0x9b),u8(0x1e),u8(0x87),u8(0xe9),u8(0xce),u8(0x55),u8(0x28),u8(0xdf) - db u8(0x8c),u8(0xa1),u8(0x89),u8(0x0d),u8(0xbf),u8(0xe6),u8(0x42),u8(0x68) - db u8(0x41),u8(0x99),u8(0x2d),u8(0x0f),u8(0xb0),u8(0x54),u8(0xbb),u8(0x16) - -%endif - -%ifdef DECRYPTION - -; %define DECRYPTION_TABLE - -%define dtab_0(x) [_aes_dec_tab+ 8*x] -%define dtab_1(x) [_aes_dec_tab+3+8*x] -%define dtab_2(x) [_aes_dec_tab+2+8*x] -%define dtab_3(x) [_aes_dec_tab+1+8*x] -%define dtab_x(x) byte [_aes_dec_tab+7+8*x] - -%macro irn_fun 2 - - rol eax,16 - %1 esi, cl, 0, ebp - %1 esi, bh, 1, ebp - %1 esi, al, 2, ebp - %1 edi, dl, 0, ebp - %1 edi, ch, 1, ebp - %1 edi, ah, 3, ebp - %2 ebp, bl, 0, ebp - shr eax,16 - and ebx,0xffff0000 - or ebx,eax - shr ecx,16 - %1 ebp, bh, 1, eax - %1 ebp, ch, 3, eax - %2 eax, cl, 2, ecx - %1 eax, bl, 0, ecx - %1 eax, dh, 1, ecx - shr ebx,16 - shr edx,16 - %1 esi, dh, 3, ecx - %1 ebp, dl, 2, ecx - %1 eax, bh, 3, ecx - %1 edi, bl, 2, ecx - -%endmacro - -; Basic MOV and XOR Operations for normal rounds - -%macro ni_xor 4 - movzx %4,%2 - xor %1,dtab_%3(%4) -%endmacro - -%macro ni_mov 4 - movzx %4,%2 - mov %1,dtab_%3(%4) -%endmacro - -; Basic MOV and XOR Operations for last round - -%macro li_xor 4 - movzx %4,%2 - movzx %4,dtab_x(%4) -%if %3 != 0 - shl %4,8*%3 -%endif - xor %1,%4 -%endmacro - -%macro li_mov 4 - movzx %4,%2 - movzx %1,dtab_x(%4) -%if %3 != 0 - shl %1,8*%3 -%endif -%endmacro - -%ifdef REDUCE_CODE_SIZE - -dec_round: - sub sp, 2 -%ifdef AES_REV_DKS - add ebp,16 -%else - sub ebp,16 -%endif - save 1,ebp - mov esi,[ebp+8] - mov edi,[ebp+12] - - irn_fun ni_xor, ni_mov - - mov ebx,ebp - mov ecx,esi - mov edx,edi - restore ebp,1 - xor eax,[ebp] - xor ebx,[ebp+4] - add sp, 2 - ret - -%else - -%macro dec_round 0 - -%ifdef AES_REV_DKS - add ebp,16 -%else - sub ebp,16 -%endif - save 0,ebp - mov esi,[ebp+8] - mov edi,[ebp+12] - - irn_fun ni_xor, ni_mov - - mov ebx,ebp - mov ecx,esi - mov edx,edi - restore ebp,0 - xor eax,[ebp] - xor ebx,[ebp+4] - -%endmacro - -%endif - -%macro dec_last_round 0 - -%ifdef AES_REV_DKS - add ebp,16 -%else - sub ebp,16 -%endif - save 0,ebp - mov esi,[ebp+8] - mov edi,[ebp+12] - - irn_fun li_xor, li_mov - - mov ebx,ebp - restore ebp,0 - xor eax,[ebp] - xor ebx,[ebp+4] - -%endmacro - - section _TEXT - -; AES Decryption Subroutine - - do_name _aes_decrypt,12 - - mov ax, sp - movzx esp, ax - - sub esp,stk_spc - mov [esp+16],ebp - mov [esp+12],ebx - mov [esp+ 8],esi - mov [esp+ 4],edi - -; input four columns and xor in first round key - - movzx esi,word [esp+in_blk+stk_spc] ; input pointer - mov eax,[esi ] - mov ebx,[esi+ 4] - mov ecx,[esi+ 8] - mov edx,[esi+12] - lea esi,[esi+16] - - movzx ebp, word [esp+ctx+stk_spc] ; key pointer - movzx edi,byte[ebp+4*KS_LENGTH] -%ifndef AES_REV_DKS ; if decryption key schedule is not reversed - lea ebp,[ebp+edi] ; we have to access it from the top down -%endif - xor eax,[ebp ] ; key schedule - xor ebx,[ebp+ 4] - xor ecx,[ebp+ 8] - xor edx,[ebp+12] - -; determine the number of rounds - -%ifndef AES_256 - cmp edi,10*16 - je .3 - cmp edi,12*16 - je .2 - cmp edi,14*16 - je .1 - mov eax,-1 - jmp .5 -%endif - -.1: mf_call dec_round - mf_call dec_round -.2: mf_call dec_round - mf_call dec_round -.3: mf_call dec_round - mf_call dec_round - mf_call dec_round - mf_call dec_round - mf_call dec_round - mf_call dec_round - mf_call dec_round - mf_call dec_round - mf_call dec_round - dec_last_round - -; move final values to the output array. - - movzx ebp,word [esp+out_blk+stk_spc] - mov [ebp],eax - mov [ebp+4],ebx - mov [ebp+8],esi - mov [ebp+12],edi - xor eax,eax - -.5: mov ebp,[esp+16] - mov ebx,[esp+12] - mov esi,[esp+ 8] - mov edi,[esp+ 4] - add esp,stk_spc - do_exit 12 - -%endif - -%ifdef REDUCE_CODE_SIZE - -inv_mix_col: - movzx ecx,dl ; input eax, edx - movzx ecx,etab_b(ecx) ; output eax - mov eax,dtab_0(ecx) ; used ecx - movzx ecx,dh - shr edx,16 - movzx ecx,etab_b(ecx) - xor eax,dtab_1(ecx) - movzx ecx,dl - movzx ecx,etab_b(ecx) - xor eax,dtab_2(ecx) - movzx ecx,dh - movzx ecx,etab_b(ecx) - xor eax,dtab_3(ecx) - ret - -%else - -%macro inv_mix_col 0 - - movzx ecx,dl ; input eax, edx - movzx ecx,etab_b(ecx) ; output eax - mov eax,dtab_0(ecx) ; used ecx - movzx ecx,dh - shr edx,16 - movzx ecx,etab_b(ecx) - xor eax,dtab_1(ecx) - movzx ecx,dl - movzx ecx,etab_b(ecx) - xor eax,dtab_2(ecx) - movzx ecx,dh - movzx ecx,etab_b(ecx) - xor eax,dtab_3(ecx) - -%endmacro - -%endif - -%ifdef DECRYPTION_KEY_SCHEDULE - -%ifdef AES_128 - -%ifndef DECRYPTION_TABLE -; %define DECRYPTION_TABLE -%endif - - do_name _aes_decrypt_key128,8 - - push ebp - push ebx - push esi - push edi - mov eax,[esp+24] ; context - mov edx,[esp+20] ; key - push eax - push edx - do_call _aes_encrypt_key128,8 ; generate expanded encryption key - mov eax,10*16 - mov esi,[esp+24] ; pointer to first round key - lea edi,[esi+eax] ; pointer to last round key - add esi,32 - ; the inverse mix column transformation - mov edx,[esi-16] ; needs to be applied to all round keys - mf_call inv_mix_col ; except first and last. Hence start by - mov [esi-16],eax ; transforming the four sub-keys in the - mov edx,[esi-12] ; second round key - mf_call inv_mix_col - mov [esi-12],eax ; transformations for subsequent rounds - mov edx,[esi-8] ; can then be made more efficient by - mf_call inv_mix_col ; noting that for three of the four sub-keys - mov [esi-8],eax ; in the encryption round key ek[r]: - mov edx,[esi-4] ; - mf_call inv_mix_col ; ek[r][n] = ek[r][n-1] ^ ek[r-1][n] - mov [esi-4],eax ; - ; where n is 1..3. Hence the corresponding -.0: mov edx,[esi] ; subkeys in the decryption round key dk[r] - mf_call inv_mix_col ; also obey since inv_mix_col is linear in - mov [esi],eax ; GF(256): - xor eax,[esi-12] ; - mov [esi+4],eax ; dk[r][n] = dk[r][n-1] ^ dk[r-1][n] - xor eax,[esi-8] ; - mov [esi+8],eax ; So we only need one inverse mix column - xor eax,[esi-4] ; operation (n = 0) for each four word cycle - mov [esi+12],eax ; in the expanded key. - add esi,16 - cmp edi,esi - jg .0 - jmp dec_end - -%endif - -%ifdef AES_192 - -%ifndef DECRYPTION_TABLE -; %define DECRYPTION_TABLE -%endif - - do_name _aes_decrypt_key192,8 - - push ebp - push ebx - push esi - push edi - mov eax,[esp+24] ; context - mov edx,[esp+20] ; key - push eax - push edx - do_call _aes_encrypt_key192,8 ; generate expanded encryption key - mov eax,12*16 - mov esi,[esp+24] ; first round key - lea edi,[esi+eax] ; last round key - add esi,48 ; the first 6 words are the key, of - ; which the top 2 words are part of - mov edx,[esi-32] ; the second round key and hence - mf_call inv_mix_col ; need to be modified. After this we - mov [esi-32],eax ; need to do a further six values prior - mov edx,[esi-28] ; to using a more efficient technique - mf_call inv_mix_col ; based on: - mov [esi-28],eax ; - ; dk[r][n] = dk[r][n-1] ^ dk[r-1][n] - mov edx,[esi-24] ; - mf_call inv_mix_col ; for n = 1 .. 5 where the key expansion - mov [esi-24],eax ; cycle is now 6 words long - mov edx,[esi-20] - mf_call inv_mix_col - mov [esi-20],eax - mov edx,[esi-16] - mf_call inv_mix_col - mov [esi-16],eax - mov edx,[esi-12] - mf_call inv_mix_col - mov [esi-12],eax - mov edx,[esi-8] - mf_call inv_mix_col - mov [esi-8],eax - mov edx,[esi-4] - mf_call inv_mix_col - mov [esi-4],eax - -.0: mov edx,[esi] ; the expanded key is 13 * 4 = 44 32-bit words - mf_call inv_mix_col ; of which 11 * 4 = 44 have to be modified - mov [esi],eax ; using inv_mix_col. We have already done 8 - xor eax,[esi-20] ; of these so 36 are left - hence we need - mov [esi+4],eax ; exactly 6 loops of six here - xor eax,[esi-16] - mov [esi+8],eax - xor eax,[esi-12] - mov [esi+12],eax - xor eax,[esi-8] - mov [esi+16],eax - xor eax,[esi-4] - mov [esi+20],eax - add esi,24 - cmp edi,esi - jg .0 - jmp dec_end - -%endif - -%ifdef AES_256 - -%ifndef DECRYPTION_TABLE -; %define DECRYPTION_TABLE -%endif - - do_name _aes_decrypt_key256,8 - - mov ax, sp - movzx esp, ax - push ebp - push ebx - push esi - push edi - - movzx eax, word [esp+20] ; ks - movzx edx, word [esp+18] ; key - push ax - push dx - do_call _aes_encrypt_key256,4 ; generate expanded encryption key - mov eax,14*16 - movzx esi, word [esp+20] ; ks - lea edi,[esi+eax] - add esi,64 - - mov edx,[esi-48] ; the primary key is 8 words, of which - mf_call inv_mix_col ; the top four require modification - mov [esi-48],eax - mov edx,[esi-44] - mf_call inv_mix_col - mov [esi-44],eax - mov edx,[esi-40] - mf_call inv_mix_col - mov [esi-40],eax - mov edx,[esi-36] - mf_call inv_mix_col - mov [esi-36],eax - - mov edx,[esi-32] ; the encryption key expansion cycle is - mf_call inv_mix_col ; now eight words long so we need to - mov [esi-32],eax ; start by doing one complete block - mov edx,[esi-28] - mf_call inv_mix_col - mov [esi-28],eax - mov edx,[esi-24] - mf_call inv_mix_col - mov [esi-24],eax - mov edx,[esi-20] - mf_call inv_mix_col - mov [esi-20],eax - mov edx,[esi-16] - mf_call inv_mix_col - mov [esi-16],eax - mov edx,[esi-12] - mf_call inv_mix_col - mov [esi-12],eax - mov edx,[esi-8] - mf_call inv_mix_col - mov [esi-8],eax - mov edx,[esi-4] - mf_call inv_mix_col - mov [esi-4],eax - -.0: mov edx,[esi] ; we can now speed up the remaining - mf_call inv_mix_col ; rounds by using the technique - mov [esi],eax ; outlined earlier. But note that - xor eax,[esi-28] ; there is one extra inverse mix - mov [esi+4],eax ; column operation as the 256 bit - xor eax,[esi-24] ; key has an extra non-linear step - mov [esi+8],eax ; for the midway element. - xor eax,[esi-20] - mov [esi+12],eax ; the expanded key is 15 * 4 = 60 - mov edx,[esi+16] ; 32-bit words of which 52 need to - mf_call inv_mix_col ; be modified. We have already done - mov [esi+16],eax ; 12 so 40 are left - which means - xor eax,[esi-12] ; that we need exactly 5 loops of 8 - mov [esi+20],eax - xor eax,[esi-8] - mov [esi+24],eax - xor eax,[esi-4] - mov [esi+28],eax - add esi,32 - cmp edi,esi - jg .0 - -%endif - -dec_end: - -%ifdef AES_REV_DKS - - movzx esi,word [esp+20] ; this reverses the order of the -.1: mov eax,[esi] ; round keys if required - mov ebx,[esi+4] - mov ebp,[edi] - mov edx,[edi+4] - mov [esi],ebp - mov [esi+4],edx - mov [edi],eax - mov [edi+4],ebx - - mov eax,[esi+8] - mov ebx,[esi+12] - mov ebp,[edi+8] - mov edx,[edi+12] - mov [esi+8],ebp - mov [esi+12],edx - mov [edi+8],eax - mov [edi+12],ebx - - add esi,16 - sub edi,16 - cmp edi,esi - jg .1 - -%endif - - pop edi - pop esi - pop ebx - pop ebp - xor eax,eax - do_exit 8 - -%ifdef AES_VAR - - do_name _aes_decrypt_key,12 - - mov ecx,[esp+4] - mov eax,[esp+8] - mov edx,[esp+12] - push edx - push ecx - - cmp eax,16 - je .1 - cmp eax,128 - je .1 - - cmp eax,24 - je .2 - cmp eax,192 - je .2 - - cmp eax,32 - je .3 - cmp eax,256 - je .3 - mov eax,-1 - add esp,8 - do_exit 12 - -.1: do_call _aes_decrypt_key128,8 - do_exit 12 -.2: do_call _aes_decrypt_key192,8 - do_exit 12 -.3: do_call _aes_decrypt_key256,8 - do_exit 12 - -%endif - -%endif - -%ifdef DECRYPTION_TABLE - -; Inverse S-box data - 256 entries - - section _DATA - -%define v8(x) fe(x), f9(x), fd(x), fb(x), fe(x), f9(x), fd(x), x - -_aes_dec_tab: - db v8(0x52),v8(0x09),v8(0x6a),v8(0xd5),v8(0x30),v8(0x36),v8(0xa5),v8(0x38) - db v8(0xbf),v8(0x40),v8(0xa3),v8(0x9e),v8(0x81),v8(0xf3),v8(0xd7),v8(0xfb) - db v8(0x7c),v8(0xe3),v8(0x39),v8(0x82),v8(0x9b),v8(0x2f),v8(0xff),v8(0x87) - db v8(0x34),v8(0x8e),v8(0x43),v8(0x44),v8(0xc4),v8(0xde),v8(0xe9),v8(0xcb) - db v8(0x54),v8(0x7b),v8(0x94),v8(0x32),v8(0xa6),v8(0xc2),v8(0x23),v8(0x3d) - db v8(0xee),v8(0x4c),v8(0x95),v8(0x0b),v8(0x42),v8(0xfa),v8(0xc3),v8(0x4e) - db v8(0x08),v8(0x2e),v8(0xa1),v8(0x66),v8(0x28),v8(0xd9),v8(0x24),v8(0xb2) - db v8(0x76),v8(0x5b),v8(0xa2),v8(0x49),v8(0x6d),v8(0x8b),v8(0xd1),v8(0x25) - db v8(0x72),v8(0xf8),v8(0xf6),v8(0x64),v8(0x86),v8(0x68),v8(0x98),v8(0x16) - db v8(0xd4),v8(0xa4),v8(0x5c),v8(0xcc),v8(0x5d),v8(0x65),v8(0xb6),v8(0x92) - db v8(0x6c),v8(0x70),v8(0x48),v8(0x50),v8(0xfd),v8(0xed),v8(0xb9),v8(0xda) - db v8(0x5e),v8(0x15),v8(0x46),v8(0x57),v8(0xa7),v8(0x8d),v8(0x9d),v8(0x84) - db v8(0x90),v8(0xd8),v8(0xab),v8(0x00),v8(0x8c),v8(0xbc),v8(0xd3),v8(0x0a) - db v8(0xf7),v8(0xe4),v8(0x58),v8(0x05),v8(0xb8),v8(0xb3),v8(0x45),v8(0x06) - db v8(0xd0),v8(0x2c),v8(0x1e),v8(0x8f),v8(0xca),v8(0x3f),v8(0x0f),v8(0x02) - db v8(0xc1),v8(0xaf),v8(0xbd),v8(0x03),v8(0x01),v8(0x13),v8(0x8a),v8(0x6b) - db v8(0x3a),v8(0x91),v8(0x11),v8(0x41),v8(0x4f),v8(0x67),v8(0xdc),v8(0xea) - db v8(0x97),v8(0xf2),v8(0xcf),v8(0xce),v8(0xf0),v8(0xb4),v8(0xe6),v8(0x73) - db v8(0x96),v8(0xac),v8(0x74),v8(0x22),v8(0xe7),v8(0xad),v8(0x35),v8(0x85) - db v8(0xe2),v8(0xf9),v8(0x37),v8(0xe8),v8(0x1c),v8(0x75),v8(0xdf),v8(0x6e) - db v8(0x47),v8(0xf1),v8(0x1a),v8(0x71),v8(0x1d),v8(0x29),v8(0xc5),v8(0x89) - db v8(0x6f),v8(0xb7),v8(0x62),v8(0x0e),v8(0xaa),v8(0x18),v8(0xbe),v8(0x1b) - db v8(0xfc),v8(0x56),v8(0x3e),v8(0x4b),v8(0xc6),v8(0xd2),v8(0x79),v8(0x20) - db v8(0x9a),v8(0xdb),v8(0xc0),v8(0xfe),v8(0x78),v8(0xcd),v8(0x5a),v8(0xf4) - db v8(0x1f),v8(0xdd),v8(0xa8),v8(0x33),v8(0x88),v8(0x07),v8(0xc7),v8(0x31) - db v8(0xb1),v8(0x12),v8(0x10),v8(0x59),v8(0x27),v8(0x80),v8(0xec),v8(0x5f) - db v8(0x60),v8(0x51),v8(0x7f),v8(0xa9),v8(0x19),v8(0xb5),v8(0x4a),v8(0x0d) - db v8(0x2d),v8(0xe5),v8(0x7a),v8(0x9f),v8(0x93),v8(0xc9),v8(0x9c),v8(0xef) - db v8(0xa0),v8(0xe0),v8(0x3b),v8(0x4d),v8(0xae),v8(0x2a),v8(0xf5),v8(0xb0) - db v8(0xc8),v8(0xeb),v8(0xbb),v8(0x3c),v8(0x83),v8(0x53),v8(0x99),v8(0x61) - db v8(0x17),v8(0x2b),v8(0x04),v8(0x7e),v8(0xba),v8(0x77),v8(0xd6),v8(0x26) - db v8(0xe1),v8(0x69),v8(0x14),v8(0x63),v8(0x55),v8(0x21),v8(0x0c),v8(0x7d) - -%endif + +; --------------------------------------------------------------------------- +; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. +; +; LICENSE TERMS +; +; The free distribution and use of this software is allowed (with or without +; changes) provided that: +; +; 1. source code distributions include the above copyright notice, this +; list of conditions and the following disclaimer; +; +; 2. binary distributions include the above copyright notice, this list +; of conditions and the following disclaimer in their documentation; +; +; 3. the name of the copyright holder is not used to endorse products +; built using this software without specific written permission. +; +; DISCLAIMER +; +; This software is provided 'as is' with no explicit or implied warranties +; in respect of its properties, including, but not limited to, correctness +; and/or fitness for purpose. +; --------------------------------------------------------------------------- +; Issue 20/12/2007 +; +; This code requires either ASM_X86_V2 or ASM_X86_V2C to be set in aesopt.h +; and the same define to be set here as well. If AES_V2C is set this file +; requires the C files aeskey.c and aestab.c for support. + +; An AES implementation for x86 processors using the YASM (or NASM) assembler. +; This is a full assembler implementation covering encryption, decryption and +; key scheduling. It uses 2k bytes of tables but its encryption and decryption +; performance is very close to that obtained using large tables. Key schedule +; expansion is slower for both encryption and decryption but this is likely to +; be offset by the much smaller load that this version places on the processor +; cache. I acknowledge the contribution made by Daniel Bernstein to aspects of +; the design of the AES round function used here. +; +; This code provides the standard AES block size (128 bits, 16 bytes) and the +; three standard AES key sizes (128, 192 and 256 bits). It has the same call +; interface as my C implementation. The ebx, esi, edi and ebp registers are +; preserved across calls but eax, ecx and edx and the artihmetic status flags +; are not. Although this is a full assembler implementation, it can be used +; in conjunction with my C code which provides faster key scheduling using +; large tables. In this case aeskey.c should be compiled with ASM_X86_V2C +; defined. It is also important that the defines below match those used in the +; C code. This code uses the VC++ register saving conentions; if it is used +; with another compiler, conventions for using and saving registers may need +; to be checked (and calling conventions). The YASM command line for the VC++ +; custom build step is: +; +; yasm -Xvc -f win32 -D -o "$(TargetDir)\$(InputName).obj" "$(InputPath)" +; +; For the cryptlib build this is (pcg): +; +; yasm -Xvc -f win32 -D ASM_X86_V2C -o aescrypt2.obj aes_x86_v2.asm +; +; where is ASM_X86_V2 or ASM_X86_V2C. The calling intefaces are: +; +; AES_RETURN aes_encrypt(const unsigned char in_blk[], +; unsigned char out_blk[], const aes_encrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt(const unsigned char in_blk[], +; unsigned char out_blk[], const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_encrypt_key(const unsigned char key[], +; const aes_encrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt_key(const unsigned char key[], +; const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_encrypt_key(const unsigned char key[], +; unsigned int len, const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt_key(const unsigned char key[], +; unsigned int len, const aes_decrypt_ctx cx[1]); +; +; where is 128, 102 or 256. In the last two calls the length can be in +; either bits or bytes. + +; The DLL interface must use the _stdcall convention in which the number +; of bytes of parameter space is added after an @ to the sutine's name. +; We must also remove our parameters from the stack before return (see +; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version. + +; +; Adapted for TrueCrypt: +; - All tables generated at run-time +; - Adapted for 16-bit environment +; + +CPU 386 +USE16 +SEGMENT _TEXT PUBLIC CLASS=CODE USE16 +SEGMENT _DATA PUBLIC CLASS=DATA USE16 + +GROUP DGROUP _TEXT _DATA + +extern _aes_dec_tab ; Aestab.c +extern _aes_enc_tab + +; %define DLL_EXPORT + +; The size of the code can be reduced by using functions for the encryption +; and decryption rounds in place of macro expansion + +%define REDUCE_CODE_SIZE + +; Comment in/out the following lines to obtain the desired subroutines. These +; selections MUST match those in the C header file aes.h + +; %define AES_128 ; define if AES with 128 bit keys is needed +; %define AES_192 ; define if AES with 192 bit keys is needed +%define AES_256 ; define if AES with 256 bit keys is needed +; %define AES_VAR ; define if a variable key size is needed +%define ENCRYPTION ; define if encryption is needed +%define DECRYPTION ; define if decryption is needed +; %define AES_REV_DKS ; define if key decryption schedule is reversed + +%ifndef ASM_X86_V2C +%define ENCRYPTION_KEY_SCHEDULE ; define if encryption key expansion is needed +%define DECRYPTION_KEY_SCHEDULE ; define if decryption key expansion is needed +%endif + +; The encryption key schedule has the following in memory layout where N is the +; number of rounds (10, 12 or 14): +; +; lo: | input key (round 0) | ; each round is four 32-bit words +; | encryption round 1 | +; | encryption round 2 | +; .... +; | encryption round N-1 | +; hi: | encryption round N | +; +; The decryption key schedule is normally set up so that it has the same +; layout as above by actually reversing the order of the encryption key +; schedule in memory (this happens when AES_REV_DKS is set): +; +; lo: | decryption round 0 | = | encryption round N | +; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ] +; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ] +; .... .... +; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ] +; hi: | decryption round N | = | input key (round 0) | +; +; with rounds except the first and last modified using inv_mix_column() +; But if AES_REV_DKS is NOT set the order of keys is left as it is for +; encryption so that it has to be accessed in reverse when used for +; decryption (although the inverse mix column modifications are done) +; +; lo: | decryption round 0 | = | input key (round 0) | +; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ] +; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ] +; .... .... +; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ] +; hi: | decryption round N | = | encryption round N | +; +; This layout is faster when the assembler key scheduling provided here +; is used. +; +; End of user defines + +%ifdef AES_VAR +%ifndef AES_128 +%define AES_128 +%endif +%ifndef AES_192 +%define AES_192 +%endif +%ifndef AES_256 +%define AES_256 +%endif +%endif + +%ifdef AES_VAR +%define KS_LENGTH 60 +%elifdef AES_256 +%define KS_LENGTH 60 +%elifdef AES_192 +%define KS_LENGTH 52 +%else +%define KS_LENGTH 44 +%endif + +; These macros implement stack based local variables + +%macro save 2 + mov [esp+4*%1],%2 +%endmacro + +%macro restore 2 + mov %1,[esp+4*%2] +%endmacro + +%ifdef REDUCE_CODE_SIZE + %macro mf_call 1 + call %1 + %endmacro +%else + %macro mf_call 1 + %1 + %endmacro +%endif + +; the DLL has to implement the _stdcall calling interface on return +; In this case we have to take our parameters (3 4-byte pointers) +; off the stack + +%define parms 12 + +%macro do_name 1-2 parms +%ifndef DLL_EXPORT + global %1 +%1: +%else + global %1@%2 + export %1@%2 +%1@%2: +%endif +%endmacro + +%macro do_call 1-2 parms +%ifndef DLL_EXPORT + call %1 + add esp,%2 +%else + call %1@%2 +%endif +%endmacro + +%macro do_exit 0-1 parms +%ifdef DLL_EXPORT + ret %1 +%else + ret +%endif +%endmacro + +; finite field multiplies by {02}, {04} and {08} + +%define f2(x) ((x<<1)^(((x>>7)&1)*0x11b)) +%define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b)) +%define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b)) + +; finite field multiplies required in table generation + +%define f3(x) (f2(x) ^ x) +%define f9(x) (f8(x) ^ x) +%define fb(x) (f8(x) ^ f2(x) ^ x) +%define fd(x) (f8(x) ^ f4(x) ^ x) +%define fe(x) (f8(x) ^ f4(x) ^ f2(x)) + +%define etab_0(x) [_aes_enc_tab+4+8*x] +%define etab_1(x) [_aes_enc_tab+3+8*x] +%define etab_2(x) [_aes_enc_tab+2+8*x] +%define etab_3(x) [_aes_enc_tab+1+8*x] +%define etab_b(x) byte [_aes_enc_tab+1+8*x] ; used with movzx for 0x000000xx +%define etab_w(x) word [_aes_enc_tab+8*x] ; used with movzx for 0x0000xx00 + +%define btab_0(x) [_aes_enc_tab+6+8*x] +%define btab_1(x) [_aes_enc_tab+5+8*x] +%define btab_2(x) [_aes_enc_tab+4+8*x] +%define btab_3(x) [_aes_enc_tab+3+8*x] + +; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the +; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX. +; +; Input: +; +; EAX column[0] +; EBX column[1] +; ECX column[2] +; EDX column[3] +; ESI column key[round][2] +; EDI column key[round][3] +; EBP scratch +; +; Output: +; +; EBP column[0] unkeyed +; EBX column[1] unkeyed +; ESI column[2] keyed +; EDI column[3] keyed +; EAX scratch +; ECX scratch +; EDX scratch + +%macro rnd_fun 2 + + rol ebx,16 + %1 esi, cl, 0, ebp + %1 esi, dh, 1, ebp + %1 esi, bh, 3, ebp + %1 edi, dl, 0, ebp + %1 edi, ah, 1, ebp + %1 edi, bl, 2, ebp + %2 ebp, al, 0, ebp + shr ebx,16 + and eax,0xffff0000 + or eax,ebx + shr edx,16 + %1 ebp, ah, 1, ebx + %1 ebp, dh, 3, ebx + %2 ebx, dl, 2, ebx + %1 ebx, ch, 1, edx + %1 ebx, al, 0, edx + shr eax,16 + shr ecx,16 + %1 ebp, cl, 2, edx + %1 edi, ch, 3, edx + %1 esi, al, 2, edx + %1 ebx, ah, 3, edx + +%endmacro + +; Basic MOV and XOR Operations for normal rounds + +%macro nr_xor 4 + movzx %4,%2 + xor %1,etab_%3(%4) +%endmacro + +%macro nr_mov 4 + movzx %4,%2 + mov %1,etab_%3(%4) +%endmacro + +; Basic MOV and XOR Operations for last round + +%if 1 + + %macro lr_xor 4 + movzx %4,%2 + movzx %4,etab_b(%4) + %if %3 != 0 + shl %4,8*%3 + %endif + xor %1,%4 + %endmacro + + %macro lr_mov 4 + movzx %4,%2 + movzx %1,etab_b(%4) + %if %3 != 0 + shl %1,8*%3 + %endif + %endmacro + +%else ; less effective but worth leaving as an option + + %macro lr_xor 4 + movzx %4,%2 + mov %4,btab_%3(%4) + and %4,0x000000ff << 8 * %3 + xor %1,%4 + %endmacro + + %macro lr_mov 4 + movzx %4,%2 + mov %1,btab_%3(%4) + and %1,0x000000ff << 8 * %3 + %endmacro + +%endif + +; Apply S-Box to the 4 bytes in a 32-bit word and rotate byte positions + +%ifdef REDUCE_CODE_SIZE + +l3s_col: + movzx ecx,al ; in eax + movzx ecx, etab_b(ecx) ; out eax + xor edx,ecx ; scratch ecx,edx + movzx ecx,ah + movzx ecx, etab_b(ecx) + shl ecx,8 + xor edx,ecx + shr eax,16 + movzx ecx,al + movzx ecx, etab_b(ecx) + shl ecx,16 + xor edx,ecx + movzx ecx,ah + movzx ecx, etab_b(ecx) + shl ecx,24 + xor edx,ecx + mov eax,edx + ret + +%else + +%macro l3s_col 0 + + movzx ecx,al ; in eax + movzx ecx, etab_b(ecx) ; out eax + xor edx,ecx ; scratch ecx,edx + movzx ecx,ah + movzx ecx, etab_b(ecx) + shl ecx,8 + xor edx,ecx + shr eax,16 + movzx ecx,al + movzx ecx, etab_b(ecx) + shl ecx,16 + xor edx,ecx + movzx ecx,ah + movzx ecx, etab_b(ecx) + shl ecx,24 + xor edx,ecx + mov eax,edx + +%endmacro + +%endif + +; offsets to parameters + +in_blk equ 2 ; input byte array address parameter +out_blk equ 4 ; output byte array address parameter +ctx equ 6 ; AES context structure +stk_spc equ 20 ; stack space + +%ifdef ENCRYPTION + +; %define ENCRYPTION_TABLE + +%ifdef REDUCE_CODE_SIZE + +enc_round: + sub sp, 2 + add ebp,16 + save 1,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + rnd_fun nr_xor, nr_mov + + mov eax,ebp + mov ecx,esi + mov edx,edi + restore ebp,1 + xor eax,[ebp] + xor ebx,[ebp+4] + add sp, 2 + ret + +%else + +%macro enc_round 0 + + add ebp,16 + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + rnd_fun nr_xor, nr_mov + + mov eax,ebp + mov ecx,esi + mov edx,edi + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + +%endif + +%macro enc_last_round 0 + + add ebp,16 + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + rnd_fun lr_xor, lr_mov + + mov eax,ebp + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + + section _TEXT + +; AES Encryption Subroutine + + do_name _aes_encrypt,12 + + mov ax, sp + movzx esp, ax + + sub esp,stk_spc + mov [esp+16],ebp + mov [esp+12],ebx + mov [esp+ 8],esi + mov [esp+ 4],edi + + movzx esi,word [esp+in_blk+stk_spc] ; input pointer + mov eax,[esi ] + mov ebx,[esi+ 4] + mov ecx,[esi+ 8] + mov edx,[esi+12] + + movzx ebp,word [esp+ctx+stk_spc] ; key pointer + movzx edi,byte [ebp+4*KS_LENGTH] + xor eax,[ebp ] + xor ebx,[ebp+ 4] + xor ecx,[ebp+ 8] + xor edx,[ebp+12] + +; determine the number of rounds + +%ifndef AES_256 + cmp edi,10*16 + je .3 + cmp edi,12*16 + je .2 + cmp edi,14*16 + je .1 + mov eax,-1 + jmp .5 +%endif + +.1: mf_call enc_round + mf_call enc_round +.2: mf_call enc_round + mf_call enc_round +.3: mf_call enc_round + mf_call enc_round + mf_call enc_round + mf_call enc_round + mf_call enc_round + mf_call enc_round + mf_call enc_round + mf_call enc_round + mf_call enc_round + enc_last_round + + movzx edx,word [esp+out_blk+stk_spc] + mov [edx],eax + mov [edx+4],ebx + mov [edx+8],esi + mov [edx+12],edi + xor eax,eax + +.5: mov ebp,[esp+16] + mov ebx,[esp+12] + mov esi,[esp+ 8] + mov edi,[esp+ 4] + add esp,stk_spc + do_exit 12 + +%endif + +%macro f_key 2 + + push ecx + push edx + mov edx,esi + ror eax,8 + mf_call l3s_col + mov esi,eax + pop edx + pop ecx + xor esi,rc_val + + mov [ebp+%1*%2],esi + xor edi,esi + mov [ebp+%1*%2+4],edi + xor ecx,edi + mov [ebp+%1*%2+8],ecx + xor edx,ecx + mov [ebp+%1*%2+12],edx + mov eax,edx + +%if %2 == 24 + +%if %1 < 7 + xor eax,[ebp+%1*%2+16-%2] + mov [ebp+%1*%2+16],eax + xor eax,[ebp+%1*%2+20-%2] + mov [ebp+%1*%2+20],eax +%endif + +%elif %2 == 32 + +%if %1 < 6 + push ecx + push edx + mov edx,[ebp+%1*%2+16-%2] + mf_call l3s_col + pop edx + pop ecx + mov [ebp+%1*%2+16],eax + xor eax,[ebp+%1*%2+20-%2] + mov [ebp+%1*%2+20],eax + xor eax,[ebp+%1*%2+24-%2] + mov [ebp+%1*%2+24],eax + xor eax,[ebp+%1*%2+28-%2] + mov [ebp+%1*%2+28],eax +%endif + +%endif + +%assign rc_val f2(rc_val) + +%endmacro + +%ifdef ENCRYPTION_KEY_SCHEDULE + +%ifdef AES_128 + +%ifndef ENCRYPTION_TABLE +; %define ENCRYPTION_TABLE +%endif + +%assign rc_val 1 + + do_name _aes_encrypt_key128,8 + + push ebp + push ebx + push esi + push edi + + mov ebp,[esp+24] + mov [ebp+4*KS_LENGTH],dword 10*16 + mov ebx,[esp+20] + + mov esi,[ebx] + mov [ebp],esi + mov edi,[ebx+4] + mov [ebp+4],edi + mov ecx,[ebx+8] + mov [ebp+8],ecx + mov edx,[ebx+12] + mov [ebp+12],edx + add ebp,16 + mov eax,edx + + f_key 0,16 ; 11 * 4 = 44 unsigned longs + f_key 1,16 ; 4 + 4 * 10 generated = 44 + f_key 2,16 + f_key 3,16 + f_key 4,16 + f_key 5,16 + f_key 6,16 + f_key 7,16 + f_key 8,16 + f_key 9,16 + + pop edi + pop esi + pop ebx + pop ebp + xor eax,eax + do_exit 8 + +%endif + +%ifdef AES_192 + +%ifndef ENCRYPTION_TABLE +; %define ENCRYPTION_TABLE +%endif + +%assign rc_val 1 + + do_name _aes_encrypt_key192,8 + + push ebp + push ebx + push esi + push edi + + mov ebp,[esp+24] + mov [ebp+4*KS_LENGTH],dword 12 * 16 + mov ebx,[esp+20] + + mov esi,[ebx] + mov [ebp],esi + mov edi,[ebx+4] + mov [ebp+4],edi + mov ecx,[ebx+8] + mov [ebp+8],ecx + mov edx,[ebx+12] + mov [ebp+12],edx + mov eax,[ebx+16] + mov [ebp+16],eax + mov eax,[ebx+20] + mov [ebp+20],eax + add ebp,24 + + f_key 0,24 ; 13 * 4 = 52 unsigned longs + f_key 1,24 ; 6 + 6 * 8 generated = 54 + f_key 2,24 + f_key 3,24 + f_key 4,24 + f_key 5,24 + f_key 6,24 + f_key 7,24 + + pop edi + pop esi + pop ebx + pop ebp + xor eax,eax + do_exit 8 + +%endif + +%ifdef AES_256 + +%ifndef ENCRYPTION_TABLE +; %define ENCRYPTION_TABLE +%endif + +%assign rc_val 1 + + do_name _aes_encrypt_key256,8 + + mov ax, sp + movzx esp, ax + + push ebp + push ebx + push esi + push edi + + movzx ebp, word [esp+20] ; ks + mov [ebp+4*KS_LENGTH],dword 14 * 16 + movzx ebx, word [esp+18] ; key + + mov esi,[ebx] + mov [ebp],esi + mov edi,[ebx+4] + mov [ebp+4],edi + mov ecx,[ebx+8] + mov [ebp+8],ecx + mov edx,[ebx+12] + mov [ebp+12],edx + mov eax,[ebx+16] + mov [ebp+16],eax + mov eax,[ebx+20] + mov [ebp+20],eax + mov eax,[ebx+24] + mov [ebp+24],eax + mov eax,[ebx+28] + mov [ebp+28],eax + add ebp,32 + + f_key 0,32 ; 15 * 4 = 60 unsigned longs + f_key 1,32 ; 8 + 8 * 7 generated = 64 + f_key 2,32 + f_key 3,32 + f_key 4,32 + f_key 5,32 + f_key 6,32 + + pop edi + pop esi + pop ebx + pop ebp + xor eax,eax + do_exit 8 + +%endif + +%ifdef AES_VAR + +%ifndef ENCRYPTION_TABLE +; %define ENCRYPTION_TABLE +%endif + + do_name _aes_encrypt_key,12 + + mov ecx,[esp+4] + mov eax,[esp+8] + mov edx,[esp+12] + push edx + push ecx + + cmp eax,16 + je .1 + cmp eax,128 + je .1 + + cmp eax,24 + je .2 + cmp eax,192 + je .2 + + cmp eax,32 + je .3 + cmp eax,256 + je .3 + mov eax,-1 + add esp,8 + do_exit 12 + +.1: do_call _aes_encrypt_key128,8 + do_exit 12 +.2: do_call _aes_encrypt_key192,8 + do_exit 12 +.3: do_call _aes_encrypt_key256,8 + do_exit 12 + +%endif + +%endif + +%ifdef ENCRYPTION_TABLE + +; S-box data - 256 entries + + section _DATA + +%define u8(x) 0, x, x, f3(x), f2(x), x, x, f3(x) + +_aes_enc_tab: + db u8(0x63),u8(0x7c),u8(0x77),u8(0x7b),u8(0xf2),u8(0x6b),u8(0x6f),u8(0xc5) + db u8(0x30),u8(0x01),u8(0x67),u8(0x2b),u8(0xfe),u8(0xd7),u8(0xab),u8(0x76) + db u8(0xca),u8(0x82),u8(0xc9),u8(0x7d),u8(0xfa),u8(0x59),u8(0x47),u8(0xf0) + db u8(0xad),u8(0xd4),u8(0xa2),u8(0xaf),u8(0x9c),u8(0xa4),u8(0x72),u8(0xc0) + db u8(0xb7),u8(0xfd),u8(0x93),u8(0x26),u8(0x36),u8(0x3f),u8(0xf7),u8(0xcc) + db u8(0x34),u8(0xa5),u8(0xe5),u8(0xf1),u8(0x71),u8(0xd8),u8(0x31),u8(0x15) + db u8(0x04),u8(0xc7),u8(0x23),u8(0xc3),u8(0x18),u8(0x96),u8(0x05),u8(0x9a) + db u8(0x07),u8(0x12),u8(0x80),u8(0xe2),u8(0xeb),u8(0x27),u8(0xb2),u8(0x75) + db u8(0x09),u8(0x83),u8(0x2c),u8(0x1a),u8(0x1b),u8(0x6e),u8(0x5a),u8(0xa0) + db u8(0x52),u8(0x3b),u8(0xd6),u8(0xb3),u8(0x29),u8(0xe3),u8(0x2f),u8(0x84) + db u8(0x53),u8(0xd1),u8(0x00),u8(0xed),u8(0x20),u8(0xfc),u8(0xb1),u8(0x5b) + db u8(0x6a),u8(0xcb),u8(0xbe),u8(0x39),u8(0x4a),u8(0x4c),u8(0x58),u8(0xcf) + db u8(0xd0),u8(0xef),u8(0xaa),u8(0xfb),u8(0x43),u8(0x4d),u8(0x33),u8(0x85) + db u8(0x45),u8(0xf9),u8(0x02),u8(0x7f),u8(0x50),u8(0x3c),u8(0x9f),u8(0xa8) + db u8(0x51),u8(0xa3),u8(0x40),u8(0x8f),u8(0x92),u8(0x9d),u8(0x38),u8(0xf5) + db u8(0xbc),u8(0xb6),u8(0xda),u8(0x21),u8(0x10),u8(0xff),u8(0xf3),u8(0xd2) + db u8(0xcd),u8(0x0c),u8(0x13),u8(0xec),u8(0x5f),u8(0x97),u8(0x44),u8(0x17) + db u8(0xc4),u8(0xa7),u8(0x7e),u8(0x3d),u8(0x64),u8(0x5d),u8(0x19),u8(0x73) + db u8(0x60),u8(0x81),u8(0x4f),u8(0xdc),u8(0x22),u8(0x2a),u8(0x90),u8(0x88) + db u8(0x46),u8(0xee),u8(0xb8),u8(0x14),u8(0xde),u8(0x5e),u8(0x0b),u8(0xdb) + db u8(0xe0),u8(0x32),u8(0x3a),u8(0x0a),u8(0x49),u8(0x06),u8(0x24),u8(0x5c) + db u8(0xc2),u8(0xd3),u8(0xac),u8(0x62),u8(0x91),u8(0x95),u8(0xe4),u8(0x79) + db u8(0xe7),u8(0xc8),u8(0x37),u8(0x6d),u8(0x8d),u8(0xd5),u8(0x4e),u8(0xa9) + db u8(0x6c),u8(0x56),u8(0xf4),u8(0xea),u8(0x65),u8(0x7a),u8(0xae),u8(0x08) + db u8(0xba),u8(0x78),u8(0x25),u8(0x2e),u8(0x1c),u8(0xa6),u8(0xb4),u8(0xc6) + db u8(0xe8),u8(0xdd),u8(0x74),u8(0x1f),u8(0x4b),u8(0xbd),u8(0x8b),u8(0x8a) + db u8(0x70),u8(0x3e),u8(0xb5),u8(0x66),u8(0x48),u8(0x03),u8(0xf6),u8(0x0e) + db u8(0x61),u8(0x35),u8(0x57),u8(0xb9),u8(0x86),u8(0xc1),u8(0x1d),u8(0x9e) + db u8(0xe1),u8(0xf8),u8(0x98),u8(0x11),u8(0x69),u8(0xd9),u8(0x8e),u8(0x94) + db u8(0x9b),u8(0x1e),u8(0x87),u8(0xe9),u8(0xce),u8(0x55),u8(0x28),u8(0xdf) + db u8(0x8c),u8(0xa1),u8(0x89),u8(0x0d),u8(0xbf),u8(0xe6),u8(0x42),u8(0x68) + db u8(0x41),u8(0x99),u8(0x2d),u8(0x0f),u8(0xb0),u8(0x54),u8(0xbb),u8(0x16) + +%endif + +%ifdef DECRYPTION + +; %define DECRYPTION_TABLE + +%define dtab_0(x) [_aes_dec_tab+ 8*x] +%define dtab_1(x) [_aes_dec_tab+3+8*x] +%define dtab_2(x) [_aes_dec_tab+2+8*x] +%define dtab_3(x) [_aes_dec_tab+1+8*x] +%define dtab_x(x) byte [_aes_dec_tab+7+8*x] + +%macro irn_fun 2 + + rol eax,16 + %1 esi, cl, 0, ebp + %1 esi, bh, 1, ebp + %1 esi, al, 2, ebp + %1 edi, dl, 0, ebp + %1 edi, ch, 1, ebp + %1 edi, ah, 3, ebp + %2 ebp, bl, 0, ebp + shr eax,16 + and ebx,0xffff0000 + or ebx,eax + shr ecx,16 + %1 ebp, bh, 1, eax + %1 ebp, ch, 3, eax + %2 eax, cl, 2, ecx + %1 eax, bl, 0, ecx + %1 eax, dh, 1, ecx + shr ebx,16 + shr edx,16 + %1 esi, dh, 3, ecx + %1 ebp, dl, 2, ecx + %1 eax, bh, 3, ecx + %1 edi, bl, 2, ecx + +%endmacro + +; Basic MOV and XOR Operations for normal rounds + +%macro ni_xor 4 + movzx %4,%2 + xor %1,dtab_%3(%4) +%endmacro + +%macro ni_mov 4 + movzx %4,%2 + mov %1,dtab_%3(%4) +%endmacro + +; Basic MOV and XOR Operations for last round + +%macro li_xor 4 + movzx %4,%2 + movzx %4,dtab_x(%4) +%if %3 != 0 + shl %4,8*%3 +%endif + xor %1,%4 +%endmacro + +%macro li_mov 4 + movzx %4,%2 + movzx %1,dtab_x(%4) +%if %3 != 0 + shl %1,8*%3 +%endif +%endmacro + +%ifdef REDUCE_CODE_SIZE + +dec_round: + sub sp, 2 +%ifdef AES_REV_DKS + add ebp,16 +%else + sub ebp,16 +%endif + save 1,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + irn_fun ni_xor, ni_mov + + mov ebx,ebp + mov ecx,esi + mov edx,edi + restore ebp,1 + xor eax,[ebp] + xor ebx,[ebp+4] + add sp, 2 + ret + +%else + +%macro dec_round 0 + +%ifdef AES_REV_DKS + add ebp,16 +%else + sub ebp,16 +%endif + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + irn_fun ni_xor, ni_mov + + mov ebx,ebp + mov ecx,esi + mov edx,edi + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + +%endif + +%macro dec_last_round 0 + +%ifdef AES_REV_DKS + add ebp,16 +%else + sub ebp,16 +%endif + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + irn_fun li_xor, li_mov + + mov ebx,ebp + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + + section _TEXT + +; AES Decryption Subroutine + + do_name _aes_decrypt,12 + + mov ax, sp + movzx esp, ax + + sub esp,stk_spc + mov [esp+16],ebp + mov [esp+12],ebx + mov [esp+ 8],esi + mov [esp+ 4],edi + +; input four columns and xor in first round key + + movzx esi,word [esp+in_blk+stk_spc] ; input pointer + mov eax,[esi ] + mov ebx,[esi+ 4] + mov ecx,[esi+ 8] + mov edx,[esi+12] + lea esi,[esi+16] + + movzx ebp, word [esp+ctx+stk_spc] ; key pointer + movzx edi,byte[ebp+4*KS_LENGTH] +%ifndef AES_REV_DKS ; if decryption key schedule is not reversed + lea ebp,[ebp+edi] ; we have to access it from the top down +%endif + xor eax,[ebp ] ; key schedule + xor ebx,[ebp+ 4] + xor ecx,[ebp+ 8] + xor edx,[ebp+12] + +; determine the number of rounds + +%ifndef AES_256 + cmp edi,10*16 + je .3 + cmp edi,12*16 + je .2 + cmp edi,14*16 + je .1 + mov eax,-1 + jmp .5 +%endif + +.1: mf_call dec_round + mf_call dec_round +.2: mf_call dec_round + mf_call dec_round +.3: mf_call dec_round + mf_call dec_round + mf_call dec_round + mf_call dec_round + mf_call dec_round + mf_call dec_round + mf_call dec_round + mf_call dec_round + mf_call dec_round + dec_last_round + +; move final values to the output array. + + movzx ebp,word [esp+out_blk+stk_spc] + mov [ebp],eax + mov [ebp+4],ebx + mov [ebp+8],esi + mov [ebp+12],edi + xor eax,eax + +.5: mov ebp,[esp+16] + mov ebx,[esp+12] + mov esi,[esp+ 8] + mov edi,[esp+ 4] + add esp,stk_spc + do_exit 12 + +%endif + +%ifdef REDUCE_CODE_SIZE + +inv_mix_col: + movzx ecx,dl ; input eax, edx + movzx ecx,etab_b(ecx) ; output eax + mov eax,dtab_0(ecx) ; used ecx + movzx ecx,dh + shr edx,16 + movzx ecx,etab_b(ecx) + xor eax,dtab_1(ecx) + movzx ecx,dl + movzx ecx,etab_b(ecx) + xor eax,dtab_2(ecx) + movzx ecx,dh + movzx ecx,etab_b(ecx) + xor eax,dtab_3(ecx) + ret + +%else + +%macro inv_mix_col 0 + + movzx ecx,dl ; input eax, edx + movzx ecx,etab_b(ecx) ; output eax + mov eax,dtab_0(ecx) ; used ecx + movzx ecx,dh + shr edx,16 + movzx ecx,etab_b(ecx) + xor eax,dtab_1(ecx) + movzx ecx,dl + movzx ecx,etab_b(ecx) + xor eax,dtab_2(ecx) + movzx ecx,dh + movzx ecx,etab_b(ecx) + xor eax,dtab_3(ecx) + +%endmacro + +%endif + +%ifdef DECRYPTION_KEY_SCHEDULE + +%ifdef AES_128 + +%ifndef DECRYPTION_TABLE +; %define DECRYPTION_TABLE +%endif + + do_name _aes_decrypt_key128,8 + + push ebp + push ebx + push esi + push edi + mov eax,[esp+24] ; context + mov edx,[esp+20] ; key + push eax + push edx + do_call _aes_encrypt_key128,8 ; generate expanded encryption key + mov eax,10*16 + mov esi,[esp+24] ; pointer to first round key + lea edi,[esi+eax] ; pointer to last round key + add esi,32 + ; the inverse mix column transformation + mov edx,[esi-16] ; needs to be applied to all round keys + mf_call inv_mix_col ; except first and last. Hence start by + mov [esi-16],eax ; transforming the four sub-keys in the + mov edx,[esi-12] ; second round key + mf_call inv_mix_col + mov [esi-12],eax ; transformations for subsequent rounds + mov edx,[esi-8] ; can then be made more efficient by + mf_call inv_mix_col ; noting that for three of the four sub-keys + mov [esi-8],eax ; in the encryption round key ek[r]: + mov edx,[esi-4] ; + mf_call inv_mix_col ; ek[r][n] = ek[r][n-1] ^ ek[r-1][n] + mov [esi-4],eax ; + ; where n is 1..3. Hence the corresponding +.0: mov edx,[esi] ; subkeys in the decryption round key dk[r] + mf_call inv_mix_col ; also obey since inv_mix_col is linear in + mov [esi],eax ; GF(256): + xor eax,[esi-12] ; + mov [esi+4],eax ; dk[r][n] = dk[r][n-1] ^ dk[r-1][n] + xor eax,[esi-8] ; + mov [esi+8],eax ; So we only need one inverse mix column + xor eax,[esi-4] ; operation (n = 0) for each four word cycle + mov [esi+12],eax ; in the expanded key. + add esi,16 + cmp edi,esi + jg .0 + jmp dec_end + +%endif + +%ifdef AES_192 + +%ifndef DECRYPTION_TABLE +; %define DECRYPTION_TABLE +%endif + + do_name _aes_decrypt_key192,8 + + push ebp + push ebx + push esi + push edi + mov eax,[esp+24] ; context + mov edx,[esp+20] ; key + push eax + push edx + do_call _aes_encrypt_key192,8 ; generate expanded encryption key + mov eax,12*16 + mov esi,[esp+24] ; first round key + lea edi,[esi+eax] ; last round key + add esi,48 ; the first 6 words are the key, of + ; which the top 2 words are part of + mov edx,[esi-32] ; the second round key and hence + mf_call inv_mix_col ; need to be modified. After this we + mov [esi-32],eax ; need to do a further six values prior + mov edx,[esi-28] ; to using a more efficient technique + mf_call inv_mix_col ; based on: + mov [esi-28],eax ; + ; dk[r][n] = dk[r][n-1] ^ dk[r-1][n] + mov edx,[esi-24] ; + mf_call inv_mix_col ; for n = 1 .. 5 where the key expansion + mov [esi-24],eax ; cycle is now 6 words long + mov edx,[esi-20] + mf_call inv_mix_col + mov [esi-20],eax + mov edx,[esi-16] + mf_call inv_mix_col + mov [esi-16],eax + mov edx,[esi-12] + mf_call inv_mix_col + mov [esi-12],eax + mov edx,[esi-8] + mf_call inv_mix_col + mov [esi-8],eax + mov edx,[esi-4] + mf_call inv_mix_col + mov [esi-4],eax + +.0: mov edx,[esi] ; the expanded key is 13 * 4 = 44 32-bit words + mf_call inv_mix_col ; of which 11 * 4 = 44 have to be modified + mov [esi],eax ; using inv_mix_col. We have already done 8 + xor eax,[esi-20] ; of these so 36 are left - hence we need + mov [esi+4],eax ; exactly 6 loops of six here + xor eax,[esi-16] + mov [esi+8],eax + xor eax,[esi-12] + mov [esi+12],eax + xor eax,[esi-8] + mov [esi+16],eax + xor eax,[esi-4] + mov [esi+20],eax + add esi,24 + cmp edi,esi + jg .0 + jmp dec_end + +%endif + +%ifdef AES_256 + +%ifndef DECRYPTION_TABLE +; %define DECRYPTION_TABLE +%endif + + do_name _aes_decrypt_key256,8 + + mov ax, sp + movzx esp, ax + push ebp + push ebx + push esi + push edi + + movzx eax, word [esp+20] ; ks + movzx edx, word [esp+18] ; key + push ax + push dx + do_call _aes_encrypt_key256,4 ; generate expanded encryption key + mov eax,14*16 + movzx esi, word [esp+20] ; ks + lea edi,[esi+eax] + add esi,64 + + mov edx,[esi-48] ; the primary key is 8 words, of which + mf_call inv_mix_col ; the top four require modification + mov [esi-48],eax + mov edx,[esi-44] + mf_call inv_mix_col + mov [esi-44],eax + mov edx,[esi-40] + mf_call inv_mix_col + mov [esi-40],eax + mov edx,[esi-36] + mf_call inv_mix_col + mov [esi-36],eax + + mov edx,[esi-32] ; the encryption key expansion cycle is + mf_call inv_mix_col ; now eight words long so we need to + mov [esi-32],eax ; start by doing one complete block + mov edx,[esi-28] + mf_call inv_mix_col + mov [esi-28],eax + mov edx,[esi-24] + mf_call inv_mix_col + mov [esi-24],eax + mov edx,[esi-20] + mf_call inv_mix_col + mov [esi-20],eax + mov edx,[esi-16] + mf_call inv_mix_col + mov [esi-16],eax + mov edx,[esi-12] + mf_call inv_mix_col + mov [esi-12],eax + mov edx,[esi-8] + mf_call inv_mix_col + mov [esi-8],eax + mov edx,[esi-4] + mf_call inv_mix_col + mov [esi-4],eax + +.0: mov edx,[esi] ; we can now speed up the remaining + mf_call inv_mix_col ; rounds by using the technique + mov [esi],eax ; outlined earlier. But note that + xor eax,[esi-28] ; there is one extra inverse mix + mov [esi+4],eax ; column operation as the 256 bit + xor eax,[esi-24] ; key has an extra non-linear step + mov [esi+8],eax ; for the midway element. + xor eax,[esi-20] + mov [esi+12],eax ; the expanded key is 15 * 4 = 60 + mov edx,[esi+16] ; 32-bit words of which 52 need to + mf_call inv_mix_col ; be modified. We have already done + mov [esi+16],eax ; 12 so 40 are left - which means + xor eax,[esi-12] ; that we need exactly 5 loops of 8 + mov [esi+20],eax + xor eax,[esi-8] + mov [esi+24],eax + xor eax,[esi-4] + mov [esi+28],eax + add esi,32 + cmp edi,esi + jg .0 + +%endif + +dec_end: + +%ifdef AES_REV_DKS + + movzx esi,word [esp+20] ; this reverses the order of the +.1: mov eax,[esi] ; round keys if required + mov ebx,[esi+4] + mov ebp,[edi] + mov edx,[edi+4] + mov [esi],ebp + mov [esi+4],edx + mov [edi],eax + mov [edi+4],ebx + + mov eax,[esi+8] + mov ebx,[esi+12] + mov ebp,[edi+8] + mov edx,[edi+12] + mov [esi+8],ebp + mov [esi+12],edx + mov [edi+8],eax + mov [edi+12],ebx + + add esi,16 + sub edi,16 + cmp edi,esi + jg .1 + +%endif + + pop edi + pop esi + pop ebx + pop ebp + xor eax,eax + do_exit 8 + +%ifdef AES_VAR + + do_name _aes_decrypt_key,12 + + mov ecx,[esp+4] + mov eax,[esp+8] + mov edx,[esp+12] + push edx + push ecx + + cmp eax,16 + je .1 + cmp eax,128 + je .1 + + cmp eax,24 + je .2 + cmp eax,192 + je .2 + + cmp eax,32 + je .3 + cmp eax,256 + je .3 + mov eax,-1 + add esp,8 + do_exit 12 + +.1: do_call _aes_decrypt_key128,8 + do_exit 12 +.2: do_call _aes_decrypt_key192,8 + do_exit 12 +.3: do_call _aes_decrypt_key256,8 + do_exit 12 + +%endif + +%endif + +%ifdef DECRYPTION_TABLE + +; Inverse S-box data - 256 entries + + section _DATA + +%define v8(x) fe(x), f9(x), fd(x), fb(x), fe(x), f9(x), fd(x), x + +_aes_dec_tab: + db v8(0x52),v8(0x09),v8(0x6a),v8(0xd5),v8(0x30),v8(0x36),v8(0xa5),v8(0x38) + db v8(0xbf),v8(0x40),v8(0xa3),v8(0x9e),v8(0x81),v8(0xf3),v8(0xd7),v8(0xfb) + db v8(0x7c),v8(0xe3),v8(0x39),v8(0x82),v8(0x9b),v8(0x2f),v8(0xff),v8(0x87) + db v8(0x34),v8(0x8e),v8(0x43),v8(0x44),v8(0xc4),v8(0xde),v8(0xe9),v8(0xcb) + db v8(0x54),v8(0x7b),v8(0x94),v8(0x32),v8(0xa6),v8(0xc2),v8(0x23),v8(0x3d) + db v8(0xee),v8(0x4c),v8(0x95),v8(0x0b),v8(0x42),v8(0xfa),v8(0xc3),v8(0x4e) + db v8(0x08),v8(0x2e),v8(0xa1),v8(0x66),v8(0x28),v8(0xd9),v8(0x24),v8(0xb2) + db v8(0x76),v8(0x5b),v8(0xa2),v8(0x49),v8(0x6d),v8(0x8b),v8(0xd1),v8(0x25) + db v8(0x72),v8(0xf8),v8(0xf6),v8(0x64),v8(0x86),v8(0x68),v8(0x98),v8(0x16) + db v8(0xd4),v8(0xa4),v8(0x5c),v8(0xcc),v8(0x5d),v8(0x65),v8(0xb6),v8(0x92) + db v8(0x6c),v8(0x70),v8(0x48),v8(0x50),v8(0xfd),v8(0xed),v8(0xb9),v8(0xda) + db v8(0x5e),v8(0x15),v8(0x46),v8(0x57),v8(0xa7),v8(0x8d),v8(0x9d),v8(0x84) + db v8(0x90),v8(0xd8),v8(0xab),v8(0x00),v8(0x8c),v8(0xbc),v8(0xd3),v8(0x0a) + db v8(0xf7),v8(0xe4),v8(0x58),v8(0x05),v8(0xb8),v8(0xb3),v8(0x45),v8(0x06) + db v8(0xd0),v8(0x2c),v8(0x1e),v8(0x8f),v8(0xca),v8(0x3f),v8(0x0f),v8(0x02) + db v8(0xc1),v8(0xaf),v8(0xbd),v8(0x03),v8(0x01),v8(0x13),v8(0x8a),v8(0x6b) + db v8(0x3a),v8(0x91),v8(0x11),v8(0x41),v8(0x4f),v8(0x67),v8(0xdc),v8(0xea) + db v8(0x97),v8(0xf2),v8(0xcf),v8(0xce),v8(0xf0),v8(0xb4),v8(0xe6),v8(0x73) + db v8(0x96),v8(0xac),v8(0x74),v8(0x22),v8(0xe7),v8(0xad),v8(0x35),v8(0x85) + db v8(0xe2),v8(0xf9),v8(0x37),v8(0xe8),v8(0x1c),v8(0x75),v8(0xdf),v8(0x6e) + db v8(0x47),v8(0xf1),v8(0x1a),v8(0x71),v8(0x1d),v8(0x29),v8(0xc5),v8(0x89) + db v8(0x6f),v8(0xb7),v8(0x62),v8(0x0e),v8(0xaa),v8(0x18),v8(0xbe),v8(0x1b) + db v8(0xfc),v8(0x56),v8(0x3e),v8(0x4b),v8(0xc6),v8(0xd2),v8(0x79),v8(0x20) + db v8(0x9a),v8(0xdb),v8(0xc0),v8(0xfe),v8(0x78),v8(0xcd),v8(0x5a),v8(0xf4) + db v8(0x1f),v8(0xdd),v8(0xa8),v8(0x33),v8(0x88),v8(0x07),v8(0xc7),v8(0x31) + db v8(0xb1),v8(0x12),v8(0x10),v8(0x59),v8(0x27),v8(0x80),v8(0xec),v8(0x5f) + db v8(0x60),v8(0x51),v8(0x7f),v8(0xa9),v8(0x19),v8(0xb5),v8(0x4a),v8(0x0d) + db v8(0x2d),v8(0xe5),v8(0x7a),v8(0x9f),v8(0x93),v8(0xc9),v8(0x9c),v8(0xef) + db v8(0xa0),v8(0xe0),v8(0x3b),v8(0x4d),v8(0xae),v8(0x2a),v8(0xf5),v8(0xb0) + db v8(0xc8),v8(0xeb),v8(0xbb),v8(0x3c),v8(0x83),v8(0x53),v8(0x99),v8(0x61) + db v8(0x17),v8(0x2b),v8(0x04),v8(0x7e),v8(0xba),v8(0x77),v8(0xd6),v8(0x26) + db v8(0xe1),v8(0x69),v8(0x14),v8(0x63),v8(0x55),v8(0x21),v8(0x0c),v8(0x7d) + +%endif diff --git a/src/Crypto/Aes_hw_cpu.asm b/src/Crypto/Aes_hw_cpu.asm index 64c3bad8..53852665 100644 --- a/src/Crypto/Aes_hw_cpu.asm +++ b/src/Crypto/Aes_hw_cpu.asm @@ -1,330 +1,330 @@ -; -; Copyright (c) 2010 TrueCrypt Developers Association. All rights reserved. -; -; Governed by the TrueCrypt License 3.0 the full text of which is contained in -; the file License.txt included in TrueCrypt binary and source code distribution -; packages. -; - - -%ifidn __BITS__, 16 - %define R e -%elifidn __BITS__, 32 - %define R e -%elifidn __BITS__, 64 - %define R r -%endif - - -%macro export_function 1-2 0 - - %ifdef MS_STDCALL - global %1@%2 - export _%1@%2 - %1@%2: - %elifidn __BITS__, 16 - global _%1 - _%1: - %else - global %1 - %1: - %endif - -%endmacro - - -%macro aes_function_entry 1 - - ; void (const byte *ks, byte *data); - - export_function %1, 8 - - %ifidn __BITS__, 32 - mov ecx, [esp + 4 + 4 * 0] - mov edx, [esp + 4 + 4 * 1] - %elifidn __BITS__, 64 - %ifnidn __OUTPUT_FORMAT__, win64 - mov rcx, rdi - mov rdx, rsi - %endif - %endif - - ; ecx/rcx = ks - ; edx/rdx = data - -%endmacro - - -%macro aes_function_exit 0 - - ; void (const byte *, byte *); - - %ifdef MS_STDCALL - ret 8 - %else - ret - %endif - -%endmacro - - -%macro push_xmm 2 - sub rsp, 16 * (%2 - %1 + 1) - - %assign stackoffset 0 - %assign regnumber %1 - - %rep (%2 - %1 + 1) - movdqu [rsp + 16 * stackoffset], xmm%[regnumber] - - %assign stackoffset stackoffset+1 - %assign regnumber regnumber+1 - %endrep -%endmacro - - -%macro pop_xmm 2 - %assign stackoffset 0 - %assign regnumber %1 - - %rep (%2 - %1 + 1) - movdqu xmm%[regnumber], [rsp + 16 * stackoffset] - - %assign stackoffset stackoffset+1 - %assign regnumber regnumber+1 - %endrep - - add rsp, 16 * (%2 - %1 + 1) -%endmacro - - -%macro aes_hw_cpu 2 - %define OPERATION %1 - %define BLOCK_COUNT %2 - - ; Load data blocks - %assign block 1 - %rep BLOCK_COUNT - movdqu xmm%[block], [%[R]dx + 16 * (block - 1)] - %assign block block+1 - %endrep - - ; Encrypt/decrypt data blocks - %assign round 0 - %rep 15 - movdqu xmm0, [%[R]cx + 16 * round] - - %assign block 1 - %rep BLOCK_COUNT - - %if round = 0 - pxor xmm%[block], xmm0 - %else - %if round < 14 - aes%[OPERATION] xmm%[block], xmm0 - %else - aes%[OPERATION]last xmm%[block], xmm0 - %endif - %endif - - %assign block block+1 - %endrep - - %assign round round+1 - %endrep - - ; Store data blocks - %assign block 1 - %rep BLOCK_COUNT - movdqu [%[R]dx + 16 * (block - 1)], xmm%[block] - %assign block block+1 - %endrep - - %undef OPERATION - %undef BLOCK_COUNT -%endmacro - - -%macro aes_hw_cpu_32_blocks 1 - %define OPERATION_32_BLOCKS %1 - - %ifidn __BITS__, 64 - %define MAX_REG_BLOCK_COUNT 15 - %else - %define MAX_REG_BLOCK_COUNT 7 - %endif - - %ifidn __OUTPUT_FORMAT__, win64 - %if MAX_REG_BLOCK_COUNT > 5 - push_xmm 6, MAX_REG_BLOCK_COUNT - %endif - %endif - - mov eax, 32 / MAX_REG_BLOCK_COUNT - .1: - aes_hw_cpu %[OPERATION_32_BLOCKS], MAX_REG_BLOCK_COUNT - - add %[R]dx, 16 * MAX_REG_BLOCK_COUNT - dec eax - jnz .1 - - %if (32 % MAX_REG_BLOCK_COUNT) != 0 - aes_hw_cpu %[OPERATION_32_BLOCKS], (32 % MAX_REG_BLOCK_COUNT) - %endif - - %ifidn __OUTPUT_FORMAT__, win64 - %if MAX_REG_BLOCK_COUNT > 5 - pop_xmm 6, MAX_REG_BLOCK_COUNT - %endif - %endif - - %undef OPERATION_32_BLOCKS - %undef MAX_REG_BLOCK_COUNT -%endmacro - - -%ifidn __BITS__, 16 - - USE16 - SEGMENT _TEXT PUBLIC CLASS=CODE USE16 - SEGMENT _DATA PUBLIC CLASS=DATA USE16 - GROUP DGROUP _TEXT _DATA - SECTION _TEXT - -%else - - SECTION .text - -%endif - - -; void aes_hw_cpu_enable_sse (); - - export_function aes_hw_cpu_enable_sse - mov %[R]ax, cr4 - or ax, 1 << 9 - mov cr4, %[R]ax - ret - - -%ifidn __BITS__, 16 - - -; byte is_aes_hw_cpu_supported (); - - export_function is_aes_hw_cpu_supported - mov eax, 1 - cpuid - mov eax, ecx - shr eax, 25 - and al, 1 - ret - - -; void aes_hw_cpu_decrypt (const byte *ks, byte *data); - - export_function aes_hw_cpu_decrypt - mov ax, -16 - jmp aes_hw_cpu_encrypt_decrypt - -; void aes_hw_cpu_encrypt (const byte *ks, byte *data); - - export_function aes_hw_cpu_encrypt - mov ax, 16 - - aes_hw_cpu_encrypt_decrypt: - push bp - mov bp, sp - push di - push si - - mov si, [bp + 4] ; ks - mov di, [bp + 4 + 2] ; data - - movdqu xmm0, [si] - movdqu xmm1, [di] - - pxor xmm1, xmm0 - - mov cx, 13 - - .round1_13: - add si, ax - movdqu xmm0, [si] - - cmp ax, 0 - jl .decrypt - - aesenc xmm1, xmm0 - jmp .2 - .decrypt: - aesdec xmm1, xmm0 - .2: - loop .round1_13 - - add si, ax - movdqu xmm0, [si] - - cmp ax, 0 - jl .decrypt_last - - aesenclast xmm1, xmm0 - jmp .3 - .decrypt_last: - aesdeclast xmm1, xmm0 - .3: - movdqu [di], xmm1 - - pop si - pop di - pop bp - ret - - -%else ; __BITS__ != 16 - - -; byte is_aes_hw_cpu_supported (); - - export_function is_aes_hw_cpu_supported - push %[R]bx - - mov eax, 1 - cpuid - mov eax, ecx - shr eax, 25 - and eax, 1 - - pop %[R]bx - ret - - -; void aes_hw_cpu_decrypt (const byte *ks, byte *data); - - aes_function_entry aes_hw_cpu_decrypt - aes_hw_cpu dec, 1 - aes_function_exit - - -; void aes_hw_cpu_decrypt_32_blocks (const byte *ks, byte *data); - - aes_function_entry aes_hw_cpu_decrypt_32_blocks - aes_hw_cpu_32_blocks dec - aes_function_exit - - -; void aes_hw_cpu_encrypt (const byte *ks, byte *data); - - aes_function_entry aes_hw_cpu_encrypt - aes_hw_cpu enc, 1 - aes_function_exit - - -; void aes_hw_cpu_encrypt_32_blocks (const byte *ks, byte *data); - - aes_function_entry aes_hw_cpu_encrypt_32_blocks - aes_hw_cpu_32_blocks enc - aes_function_exit - - -%endif ; __BITS__ != 16 +; +; Copyright (c) 2010 TrueCrypt Developers Association. All rights reserved. +; +; Governed by the TrueCrypt License 3.0 the full text of which is contained in +; the file License.txt included in TrueCrypt binary and source code distribution +; packages. +; + + +%ifidn __BITS__, 16 + %define R e +%elifidn __BITS__, 32 + %define R e +%elifidn __BITS__, 64 + %define R r +%endif + + +%macro export_function 1-2 0 + + %ifdef MS_STDCALL + global %1@%2 + export _%1@%2 + %1@%2: + %elifidn __BITS__, 16 + global _%1 + _%1: + %else + global %1 + %1: + %endif + +%endmacro + + +%macro aes_function_entry 1 + + ; void (const byte *ks, byte *data); + + export_function %1, 8 + + %ifidn __BITS__, 32 + mov ecx, [esp + 4 + 4 * 0] + mov edx, [esp + 4 + 4 * 1] + %elifidn __BITS__, 64 + %ifnidn __OUTPUT_FORMAT__, win64 + mov rcx, rdi + mov rdx, rsi + %endif + %endif + + ; ecx/rcx = ks + ; edx/rdx = data + +%endmacro + + +%macro aes_function_exit 0 + + ; void (const byte *, byte *); + + %ifdef MS_STDCALL + ret 8 + %else + ret + %endif + +%endmacro + + +%macro push_xmm 2 + sub rsp, 16 * (%2 - %1 + 1) + + %assign stackoffset 0 + %assign regnumber %1 + + %rep (%2 - %1 + 1) + movdqu [rsp + 16 * stackoffset], xmm%[regnumber] + + %assign stackoffset stackoffset+1 + %assign regnumber regnumber+1 + %endrep +%endmacro + + +%macro pop_xmm 2 + %assign stackoffset 0 + %assign regnumber %1 + + %rep (%2 - %1 + 1) + movdqu xmm%[regnumber], [rsp + 16 * stackoffset] + + %assign stackoffset stackoffset+1 + %assign regnumber regnumber+1 + %endrep + + add rsp, 16 * (%2 - %1 + 1) +%endmacro + + +%macro aes_hw_cpu 2 + %define OPERATION %1 + %define BLOCK_COUNT %2 + + ; Load data blocks + %assign block 1 + %rep BLOCK_COUNT + movdqu xmm%[block], [%[R]dx + 16 * (block - 1)] + %assign block block+1 + %endrep + + ; Encrypt/decrypt data blocks + %assign round 0 + %rep 15 + movdqu xmm0, [%[R]cx + 16 * round] + + %assign block 1 + %rep BLOCK_COUNT + + %if round = 0 + pxor xmm%[block], xmm0 + %else + %if round < 14 + aes%[OPERATION] xmm%[block], xmm0 + %else + aes%[OPERATION]last xmm%[block], xmm0 + %endif + %endif + + %assign block block+1 + %endrep + + %assign round round+1 + %endrep + + ; Store data blocks + %assign block 1 + %rep BLOCK_COUNT + movdqu [%[R]dx + 16 * (block - 1)], xmm%[block] + %assign block block+1 + %endrep + + %undef OPERATION + %undef BLOCK_COUNT +%endmacro + + +%macro aes_hw_cpu_32_blocks 1 + %define OPERATION_32_BLOCKS %1 + + %ifidn __BITS__, 64 + %define MAX_REG_BLOCK_COUNT 15 + %else + %define MAX_REG_BLOCK_COUNT 7 + %endif + + %ifidn __OUTPUT_FORMAT__, win64 + %if MAX_REG_BLOCK_COUNT > 5 + push_xmm 6, MAX_REG_BLOCK_COUNT + %endif + %endif + + mov eax, 32 / MAX_REG_BLOCK_COUNT + .1: + aes_hw_cpu %[OPERATION_32_BLOCKS], MAX_REG_BLOCK_COUNT + + add %[R]dx, 16 * MAX_REG_BLOCK_COUNT + dec eax + jnz .1 + + %if (32 % MAX_REG_BLOCK_COUNT) != 0 + aes_hw_cpu %[OPERATION_32_BLOCKS], (32 % MAX_REG_BLOCK_COUNT) + %endif + + %ifidn __OUTPUT_FORMAT__, win64 + %if MAX_REG_BLOCK_COUNT > 5 + pop_xmm 6, MAX_REG_BLOCK_COUNT + %endif + %endif + + %undef OPERATION_32_BLOCKS + %undef MAX_REG_BLOCK_COUNT +%endmacro + + +%ifidn __BITS__, 16 + + USE16 + SEGMENT _TEXT PUBLIC CLASS=CODE USE16 + SEGMENT _DATA PUBLIC CLASS=DATA USE16 + GROUP DGROUP _TEXT _DATA + SECTION _TEXT + +%else + + SECTION .text + +%endif + + +; void aes_hw_cpu_enable_sse (); + + export_function aes_hw_cpu_enable_sse + mov %[R]ax, cr4 + or ax, 1 << 9 + mov cr4, %[R]ax + ret + + +%ifidn __BITS__, 16 + + +; byte is_aes_hw_cpu_supported (); + + export_function is_aes_hw_cpu_supported + mov eax, 1 + cpuid + mov eax, ecx + shr eax, 25 + and al, 1 + ret + + +; void aes_hw_cpu_decrypt (const byte *ks, byte *data); + + export_function aes_hw_cpu_decrypt + mov ax, -16 + jmp aes_hw_cpu_encrypt_decrypt + +; void aes_hw_cpu_encrypt (const byte *ks, byte *data); + + export_function aes_hw_cpu_encrypt + mov ax, 16 + + aes_hw_cpu_encrypt_decrypt: + push bp + mov bp, sp + push di + push si + + mov si, [bp + 4] ; ks + mov di, [bp + 4 + 2] ; data + + movdqu xmm0, [si] + movdqu xmm1, [di] + + pxor xmm1, xmm0 + + mov cx, 13 + + .round1_13: + add si, ax + movdqu xmm0, [si] + + cmp ax, 0 + jl .decrypt + + aesenc xmm1, xmm0 + jmp .2 + .decrypt: + aesdec xmm1, xmm0 + .2: + loop .round1_13 + + add si, ax + movdqu xmm0, [si] + + cmp ax, 0 + jl .decrypt_last + + aesenclast xmm1, xmm0 + jmp .3 + .decrypt_last: + aesdeclast xmm1, xmm0 + .3: + movdqu [di], xmm1 + + pop si + pop di + pop bp + ret + + +%else ; __BITS__ != 16 + + +; byte is_aes_hw_cpu_supported (); + + export_function is_aes_hw_cpu_supported + push %[R]bx + + mov eax, 1 + cpuid + mov eax, ecx + shr eax, 25 + and eax, 1 + + pop %[R]bx + ret + + +; void aes_hw_cpu_decrypt (const byte *ks, byte *data); + + aes_function_entry aes_hw_cpu_decrypt + aes_hw_cpu dec, 1 + aes_function_exit + + +; void aes_hw_cpu_decrypt_32_blocks (const byte *ks, byte *data); + + aes_function_entry aes_hw_cpu_decrypt_32_blocks + aes_hw_cpu_32_blocks dec + aes_function_exit + + +; void aes_hw_cpu_encrypt (const byte *ks, byte *data); + + aes_function_entry aes_hw_cpu_encrypt + aes_hw_cpu enc, 1 + aes_function_exit + + +; void aes_hw_cpu_encrypt_32_blocks (const byte *ks, byte *data); + + aes_function_entry aes_hw_cpu_encrypt_32_blocks + aes_hw_cpu_32_blocks enc + aes_function_exit + + +%endif ; __BITS__ != 16 diff --git a/src/Crypto/Aes_hw_cpu.h b/src/Crypto/Aes_hw_cpu.h index 2342b4c5..e2fed1a1 100644 --- a/src/Crypto/Aes_hw_cpu.h +++ b/src/Crypto/Aes_hw_cpu.h @@ -8,27 +8,27 @@ and are governed by the Apache License 2.0 the full text of which is contained in the file License.txt included in VeraCrypt binary and source code distribution packages. -*/ - -#ifndef TC_HEADER_Crypto_Aes_Hw_Cpu -#define TC_HEADER_Crypto_Aes_Hw_Cpu - -#include "Common/Tcdefs.h" - -#if defined(__cplusplus) -extern "C" -{ -#endif - -byte is_aes_hw_cpu_supported (); -void aes_hw_cpu_enable_sse (); -void aes_hw_cpu_decrypt (const byte *ks, byte *data); -void aes_hw_cpu_decrypt_32_blocks (const byte *ks, byte *data); -void aes_hw_cpu_encrypt (const byte *ks, byte *data); -void aes_hw_cpu_encrypt_32_blocks (const byte *ks, byte *data); - -#if defined(__cplusplus) -} -#endif - -#endif // TC_HEADER_Crypto_Aes_Hw_Cpu +*/ + +#ifndef TC_HEADER_Crypto_Aes_Hw_Cpu +#define TC_HEADER_Crypto_Aes_Hw_Cpu + +#include "Common/Tcdefs.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +byte is_aes_hw_cpu_supported (); +void aes_hw_cpu_enable_sse (); +void aes_hw_cpu_decrypt (const byte *ks, byte *data); +void aes_hw_cpu_decrypt_32_blocks (const byte *ks, byte *data); +void aes_hw_cpu_encrypt (const byte *ks, byte *data); +void aes_hw_cpu_encrypt_32_blocks (const byte *ks, byte *data); + +#if defined(__cplusplus) +} +#endif + +#endif // TC_HEADER_Crypto_Aes_Hw_Cpu diff --git a/src/Crypto/Aes_x64.asm b/src/Crypto/Aes_x64.asm index b29fdcac..06d57ac2 100644 --- a/src/Crypto/Aes_x64.asm +++ b/src/Crypto/Aes_x64.asm @@ -1,907 +1,907 @@ - -; --------------------------------------------------------------------------- -; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. -; -; LICENSE TERMS -; -; The free distribution and use of this software is allowed (with or without -; changes) provided that: -; -; 1. source code distributions include the above copyright notice, this -; list of conditions and the following disclaimer; -; -; 2. binary distributions include the above copyright notice, this list -; of conditions and the following disclaimer in their documentation; -; -; 3. the name of the copyright holder is not used to endorse products -; built using this software without specific written permission. -; -; DISCLAIMER -; -; This software is provided 'as is' with no explicit or implied warranties -; in respect of its properties, including, but not limited to, correctness -; and/or fitness for purpose. -; --------------------------------------------------------------------------- -; Issue 20/12/2007 -; -; I am grateful to Dag Arne Osvik for many discussions of the techniques that -; can be used to optimise AES assembler code on AMD64/EM64T architectures. -; Some of the techniques used in this implementation are the result of -; suggestions made by him for which I am most grateful. - -; -; Adapted for TrueCrypt: -; - Compatibility with NASM -; - -; An AES implementation for AMD64 processors using the YASM assembler. This -; implemetation provides only encryption, decryption and hence requires key -; scheduling support in C. It uses 8k bytes of tables but its encryption and -; decryption performance is very close to that obtained using large tables. -; It can use either Windows or Gnu/Linux calling conventions, which are as -; follows: -; windows gnu/linux -; -; in_blk rcx rdi -; out_blk rdx rsi -; context (cx) r8 rdx -; -; preserved rsi - + rbx, rbp, rsp, r12, r13, r14 & r15 -; registers rdi - on both -; -; destroyed - rsi + rax, rcx, rdx, r8, r9, r10 & r11 -; registers - rdi on both -; -; The default convention is that for windows, the gnu/linux convention being -; used if __GNUC__ is defined. -; -; Define _SEH_ to include support for Win64 structured exception handling -; (this requires YASM version 0.6 or later). -; -; This code provides the standard AES block size (128 bits, 16 bytes) and the -; three standard AES key sizes (128, 192 and 256 bits). It has the same call -; interface as my C implementation. It uses the Microsoft C AMD64 calling -; conventions in which the three parameters are placed in rcx, rdx and r8 -; respectively. The rbx, rsi, rdi, rbp and r12..r15 registers are preserved. -; -; AES_RETURN aes_encrypt(const unsigned char in_blk[], -; unsigned char out_blk[], const aes_encrypt_ctx cx[1]); -; -; AES_RETURN aes_decrypt(const unsigned char in_blk[], -; unsigned char out_blk[], const aes_decrypt_ctx cx[1]); -; -; AES_RETURN aes_encrypt_key(const unsigned char key[], -; const aes_encrypt_ctx cx[1]); -; -; AES_RETURN aes_decrypt_key(const unsigned char key[], -; const aes_decrypt_ctx cx[1]); -; -; AES_RETURN aes_encrypt_key(const unsigned char key[], -; unsigned int len, const aes_decrypt_ctx cx[1]); -; -; AES_RETURN aes_decrypt_key(const unsigned char key[], -; unsigned int len, const aes_decrypt_ctx cx[1]); -; -; where is 128, 102 or 256. In the last two calls the length can be in -; either bits or bytes. -; -; Comment in/out the following lines to obtain the desired subroutines. These -; selections MUST match those in the C header file aes.h - -; %define AES_128 ; define if AES with 128 bit keys is needed -; %define AES_192 ; define if AES with 192 bit keys is needed -%define AES_256 ; define if AES with 256 bit keys is needed -; %define AES_VAR ; define if a variable key size is needed -%define ENCRYPTION ; define if encryption is needed -%define DECRYPTION ; define if decryption is needed -%define AES_REV_DKS ; define if key decryption schedule is reversed -%define LAST_ROUND_TABLES ; define for the faster version using extra tables - -; The encryption key schedule has the following in memory layout where N is the -; number of rounds (10, 12 or 14): -; -; lo: | input key (round 0) | ; each round is four 32-bit words -; | encryption round 1 | -; | encryption round 2 | -; .... -; | encryption round N-1 | -; hi: | encryption round N | -; -; The decryption key schedule is normally set up so that it has the same -; layout as above by actually reversing the order of the encryption key -; schedule in memory (this happens when AES_REV_DKS is set): -; -; lo: | decryption round 0 | = | encryption round N | -; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ] -; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ] -; .... .... -; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ] -; hi: | decryption round N | = | input key (round 0) | -; -; with rounds except the first and last modified using inv_mix_column() -; But if AES_REV_DKS is NOT set the order of keys is left as it is for -; encryption so that it has to be accessed in reverse when used for -; decryption (although the inverse mix column modifications are done) -; -; lo: | decryption round 0 | = | input key (round 0) | -; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ] -; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ] -; .... .... -; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ] -; hi: | decryption round N | = | encryption round N | -; -; This layout is faster when the assembler key scheduling provided here -; is used. -; -; The DLL interface must use the _stdcall convention in which the number -; of bytes of parameter space is added after an @ to the sutine's name. -; We must also remove our parameters from the stack before return (see -; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version. - -;%define DLL_EXPORT - -; End of user defines - -%ifdef AES_VAR -%ifndef AES_128 -%define AES_128 -%endif -%ifndef AES_192 -%define AES_192 -%endif -%ifndef AES_256 -%define AES_256 -%endif -%endif - -%ifdef AES_VAR -%define KS_LENGTH 60 -%elifdef AES_256 -%define KS_LENGTH 60 -%elifdef AES_192 -%define KS_LENGTH 52 -%else -%define KS_LENGTH 44 -%endif - -%define r0 rax -%define r1 rdx -%define r2 rcx -%define r3 rbx -%define r4 rsi -%define r5 rdi -%define r6 rbp -%define r7 rsp - -%define raxd eax -%define rdxd edx -%define rcxd ecx -%define rbxd ebx -%define rsid esi -%define rdid edi -%define rbpd ebp -%define rspd esp - -%define raxb al -%define rdxb dl -%define rcxb cl -%define rbxb bl -%define rsib sil -%define rdib dil -%define rbpb bpl -%define rspb spl - -%define r0h ah -%define r1h dh -%define r2h ch -%define r3h bh - -%define r0d eax -%define r1d edx -%define r2d ecx -%define r3d ebx - -; finite field multiplies by {02}, {04} and {08} - -%define f2(x) ((x<<1)^(((x>>7)&1)*0x11b)) -%define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b)) -%define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b)) - -; finite field multiplies required in table generation - -%define f3(x) (f2(x) ^ x) -%define f9(x) (f8(x) ^ x) -%define fb(x) (f8(x) ^ f2(x) ^ x) -%define fd(x) (f8(x) ^ f4(x) ^ x) -%define fe(x) (f8(x) ^ f4(x) ^ f2(x)) - -; macro for expanding S-box data - -%macro enc_vals 1 - db %1(0x63),%1(0x7c),%1(0x77),%1(0x7b),%1(0xf2),%1(0x6b),%1(0x6f),%1(0xc5) - db %1(0x30),%1(0x01),%1(0x67),%1(0x2b),%1(0xfe),%1(0xd7),%1(0xab),%1(0x76) - db %1(0xca),%1(0x82),%1(0xc9),%1(0x7d),%1(0xfa),%1(0x59),%1(0x47),%1(0xf0) - db %1(0xad),%1(0xd4),%1(0xa2),%1(0xaf),%1(0x9c),%1(0xa4),%1(0x72),%1(0xc0) - db %1(0xb7),%1(0xfd),%1(0x93),%1(0x26),%1(0x36),%1(0x3f),%1(0xf7),%1(0xcc) - db %1(0x34),%1(0xa5),%1(0xe5),%1(0xf1),%1(0x71),%1(0xd8),%1(0x31),%1(0x15) - db %1(0x04),%1(0xc7),%1(0x23),%1(0xc3),%1(0x18),%1(0x96),%1(0x05),%1(0x9a) - db %1(0x07),%1(0x12),%1(0x80),%1(0xe2),%1(0xeb),%1(0x27),%1(0xb2),%1(0x75) - db %1(0x09),%1(0x83),%1(0x2c),%1(0x1a),%1(0x1b),%1(0x6e),%1(0x5a),%1(0xa0) - db %1(0x52),%1(0x3b),%1(0xd6),%1(0xb3),%1(0x29),%1(0xe3),%1(0x2f),%1(0x84) - db %1(0x53),%1(0xd1),%1(0x00),%1(0xed),%1(0x20),%1(0xfc),%1(0xb1),%1(0x5b) - db %1(0x6a),%1(0xcb),%1(0xbe),%1(0x39),%1(0x4a),%1(0x4c),%1(0x58),%1(0xcf) - db %1(0xd0),%1(0xef),%1(0xaa),%1(0xfb),%1(0x43),%1(0x4d),%1(0x33),%1(0x85) - db %1(0x45),%1(0xf9),%1(0x02),%1(0x7f),%1(0x50),%1(0x3c),%1(0x9f),%1(0xa8) - db %1(0x51),%1(0xa3),%1(0x40),%1(0x8f),%1(0x92),%1(0x9d),%1(0x38),%1(0xf5) - db %1(0xbc),%1(0xb6),%1(0xda),%1(0x21),%1(0x10),%1(0xff),%1(0xf3),%1(0xd2) - db %1(0xcd),%1(0x0c),%1(0x13),%1(0xec),%1(0x5f),%1(0x97),%1(0x44),%1(0x17) - db %1(0xc4),%1(0xa7),%1(0x7e),%1(0x3d),%1(0x64),%1(0x5d),%1(0x19),%1(0x73) - db %1(0x60),%1(0x81),%1(0x4f),%1(0xdc),%1(0x22),%1(0x2a),%1(0x90),%1(0x88) - db %1(0x46),%1(0xee),%1(0xb8),%1(0x14),%1(0xde),%1(0x5e),%1(0x0b),%1(0xdb) - db %1(0xe0),%1(0x32),%1(0x3a),%1(0x0a),%1(0x49),%1(0x06),%1(0x24),%1(0x5c) - db %1(0xc2),%1(0xd3),%1(0xac),%1(0x62),%1(0x91),%1(0x95),%1(0xe4),%1(0x79) - db %1(0xe7),%1(0xc8),%1(0x37),%1(0x6d),%1(0x8d),%1(0xd5),%1(0x4e),%1(0xa9) - db %1(0x6c),%1(0x56),%1(0xf4),%1(0xea),%1(0x65),%1(0x7a),%1(0xae),%1(0x08) - db %1(0xba),%1(0x78),%1(0x25),%1(0x2e),%1(0x1c),%1(0xa6),%1(0xb4),%1(0xc6) - db %1(0xe8),%1(0xdd),%1(0x74),%1(0x1f),%1(0x4b),%1(0xbd),%1(0x8b),%1(0x8a) - db %1(0x70),%1(0x3e),%1(0xb5),%1(0x66),%1(0x48),%1(0x03),%1(0xf6),%1(0x0e) - db %1(0x61),%1(0x35),%1(0x57),%1(0xb9),%1(0x86),%1(0xc1),%1(0x1d),%1(0x9e) - db %1(0xe1),%1(0xf8),%1(0x98),%1(0x11),%1(0x69),%1(0xd9),%1(0x8e),%1(0x94) - db %1(0x9b),%1(0x1e),%1(0x87),%1(0xe9),%1(0xce),%1(0x55),%1(0x28),%1(0xdf) - db %1(0x8c),%1(0xa1),%1(0x89),%1(0x0d),%1(0xbf),%1(0xe6),%1(0x42),%1(0x68) - db %1(0x41),%1(0x99),%1(0x2d),%1(0x0f),%1(0xb0),%1(0x54),%1(0xbb),%1(0x16) -%endmacro - -%macro dec_vals 1 - db %1(0x52),%1(0x09),%1(0x6a),%1(0xd5),%1(0x30),%1(0x36),%1(0xa5),%1(0x38) - db %1(0xbf),%1(0x40),%1(0xa3),%1(0x9e),%1(0x81),%1(0xf3),%1(0xd7),%1(0xfb) - db %1(0x7c),%1(0xe3),%1(0x39),%1(0x82),%1(0x9b),%1(0x2f),%1(0xff),%1(0x87) - db %1(0x34),%1(0x8e),%1(0x43),%1(0x44),%1(0xc4),%1(0xde),%1(0xe9),%1(0xcb) - db %1(0x54),%1(0x7b),%1(0x94),%1(0x32),%1(0xa6),%1(0xc2),%1(0x23),%1(0x3d) - db %1(0xee),%1(0x4c),%1(0x95),%1(0x0b),%1(0x42),%1(0xfa),%1(0xc3),%1(0x4e) - db %1(0x08),%1(0x2e),%1(0xa1),%1(0x66),%1(0x28),%1(0xd9),%1(0x24),%1(0xb2) - db %1(0x76),%1(0x5b),%1(0xa2),%1(0x49),%1(0x6d),%1(0x8b),%1(0xd1),%1(0x25) - db %1(0x72),%1(0xf8),%1(0xf6),%1(0x64),%1(0x86),%1(0x68),%1(0x98),%1(0x16) - db %1(0xd4),%1(0xa4),%1(0x5c),%1(0xcc),%1(0x5d),%1(0x65),%1(0xb6),%1(0x92) - db %1(0x6c),%1(0x70),%1(0x48),%1(0x50),%1(0xfd),%1(0xed),%1(0xb9),%1(0xda) - db %1(0x5e),%1(0x15),%1(0x46),%1(0x57),%1(0xa7),%1(0x8d),%1(0x9d),%1(0x84) - db %1(0x90),%1(0xd8),%1(0xab),%1(0x00),%1(0x8c),%1(0xbc),%1(0xd3),%1(0x0a) - db %1(0xf7),%1(0xe4),%1(0x58),%1(0x05),%1(0xb8),%1(0xb3),%1(0x45),%1(0x06) - db %1(0xd0),%1(0x2c),%1(0x1e),%1(0x8f),%1(0xca),%1(0x3f),%1(0x0f),%1(0x02) - db %1(0xc1),%1(0xaf),%1(0xbd),%1(0x03),%1(0x01),%1(0x13),%1(0x8a),%1(0x6b) - db %1(0x3a),%1(0x91),%1(0x11),%1(0x41),%1(0x4f),%1(0x67),%1(0xdc),%1(0xea) - db %1(0x97),%1(0xf2),%1(0xcf),%1(0xce),%1(0xf0),%1(0xb4),%1(0xe6),%1(0x73) - db %1(0x96),%1(0xac),%1(0x74),%1(0x22),%1(0xe7),%1(0xad),%1(0x35),%1(0x85) - db %1(0xe2),%1(0xf9),%1(0x37),%1(0xe8),%1(0x1c),%1(0x75),%1(0xdf),%1(0x6e) - db %1(0x47),%1(0xf1),%1(0x1a),%1(0x71),%1(0x1d),%1(0x29),%1(0xc5),%1(0x89) - db %1(0x6f),%1(0xb7),%1(0x62),%1(0x0e),%1(0xaa),%1(0x18),%1(0xbe),%1(0x1b) - db %1(0xfc),%1(0x56),%1(0x3e),%1(0x4b),%1(0xc6),%1(0xd2),%1(0x79),%1(0x20) - db %1(0x9a),%1(0xdb),%1(0xc0),%1(0xfe),%1(0x78),%1(0xcd),%1(0x5a),%1(0xf4) - db %1(0x1f),%1(0xdd),%1(0xa8),%1(0x33),%1(0x88),%1(0x07),%1(0xc7),%1(0x31) - db %1(0xb1),%1(0x12),%1(0x10),%1(0x59),%1(0x27),%1(0x80),%1(0xec),%1(0x5f) - db %1(0x60),%1(0x51),%1(0x7f),%1(0xa9),%1(0x19),%1(0xb5),%1(0x4a),%1(0x0d) - db %1(0x2d),%1(0xe5),%1(0x7a),%1(0x9f),%1(0x93),%1(0xc9),%1(0x9c),%1(0xef) - db %1(0xa0),%1(0xe0),%1(0x3b),%1(0x4d),%1(0xae),%1(0x2a),%1(0xf5),%1(0xb0) - db %1(0xc8),%1(0xeb),%1(0xbb),%1(0x3c),%1(0x83),%1(0x53),%1(0x99),%1(0x61) - db %1(0x17),%1(0x2b),%1(0x04),%1(0x7e),%1(0xba),%1(0x77),%1(0xd6),%1(0x26) - db %1(0xe1),%1(0x69),%1(0x14),%1(0x63),%1(0x55),%1(0x21),%1(0x0c),%1(0x7d) -%endmacro - -%define u8(x) f2(x), x, x, f3(x), f2(x), x, x, f3(x) -%define v8(x) fe(x), f9(x), fd(x), fb(x), fe(x), f9(x), fd(x), x -%define w8(x) x, 0, 0, 0, x, 0, 0, 0 - -%define tptr rbp ; table pointer -%define kptr r8 ; key schedule pointer -%define fofs 128 ; adjust offset in key schedule to keep |disp| < 128 -%define fk_ref(x,y) [kptr-16*x+fofs+4*y] -%ifdef AES_REV_DKS -%define rofs 128 -%define ik_ref(x,y) [kptr-16*x+rofs+4*y] -%else -%define rofs -128 -%define ik_ref(x,y) [kptr+16*x+rofs+4*y] -%endif - -%define tab_0(x) [tptr+8*x] -%define tab_1(x) [tptr+8*x+3] -%define tab_2(x) [tptr+8*x+2] -%define tab_3(x) [tptr+8*x+1] -%define tab_f(x) byte [tptr+8*x+1] -%define tab_i(x) byte [tptr+8*x+7] -%define t_ref(x,r) tab_ %+ x(r) - -%macro ff_rnd 5 ; normal forward round - mov %1d, fk_ref(%5,0) - mov %2d, fk_ref(%5,1) - mov %3d, fk_ref(%5,2) - mov %4d, fk_ref(%5,3) - - movzx esi, al - movzx edi, ah - shr eax, 16 - xor %1d, t_ref(0,rsi) - xor %4d, t_ref(1,rdi) - movzx esi, al - movzx edi, ah - xor %3d, t_ref(2,rsi) - xor %2d, t_ref(3,rdi) - - movzx esi, bl - movzx edi, bh - shr ebx, 16 - xor %2d, t_ref(0,rsi) - xor %1d, t_ref(1,rdi) - movzx esi, bl - movzx edi, bh - xor %4d, t_ref(2,rsi) - xor %3d, t_ref(3,rdi) - - movzx esi, cl - movzx edi, ch - shr ecx, 16 - xor %3d, t_ref(0,rsi) - xor %2d, t_ref(1,rdi) - movzx esi, cl - movzx edi, ch - xor %1d, t_ref(2,rsi) - xor %4d, t_ref(3,rdi) - - movzx esi, dl - movzx edi, dh - shr edx, 16 - xor %4d, t_ref(0,rsi) - xor %3d, t_ref(1,rdi) - movzx esi, dl - movzx edi, dh - xor %2d, t_ref(2,rsi) - xor %1d, t_ref(3,rdi) - - mov eax,%1d - mov ebx,%2d - mov ecx,%3d - mov edx,%4d -%endmacro - -%ifdef LAST_ROUND_TABLES - -%macro fl_rnd 5 ; last forward round - add tptr, 2048 - mov %1d, fk_ref(%5,0) - mov %2d, fk_ref(%5,1) - mov %3d, fk_ref(%5,2) - mov %4d, fk_ref(%5,3) - - movzx esi, al - movzx edi, ah - shr eax, 16 - xor %1d, t_ref(0,rsi) - xor %4d, t_ref(1,rdi) - movzx esi, al - movzx edi, ah - xor %3d, t_ref(2,rsi) - xor %2d, t_ref(3,rdi) - - movzx esi, bl - movzx edi, bh - shr ebx, 16 - xor %2d, t_ref(0,rsi) - xor %1d, t_ref(1,rdi) - movzx esi, bl - movzx edi, bh - xor %4d, t_ref(2,rsi) - xor %3d, t_ref(3,rdi) - - movzx esi, cl - movzx edi, ch - shr ecx, 16 - xor %3d, t_ref(0,rsi) - xor %2d, t_ref(1,rdi) - movzx esi, cl - movzx edi, ch - xor %1d, t_ref(2,rsi) - xor %4d, t_ref(3,rdi) - - movzx esi, dl - movzx edi, dh - shr edx, 16 - xor %4d, t_ref(0,rsi) - xor %3d, t_ref(1,rdi) - movzx esi, dl - movzx edi, dh - xor %2d, t_ref(2,rsi) - xor %1d, t_ref(3,rdi) -%endmacro - -%else - -%macro fl_rnd 5 ; last forward round - mov %1d, fk_ref(%5,0) - mov %2d, fk_ref(%5,1) - mov %3d, fk_ref(%5,2) - mov %4d, fk_ref(%5,3) - - movzx esi, al - movzx edi, ah - shr eax, 16 - movzx esi, t_ref(f,rsi) - movzx edi, t_ref(f,rdi) - xor %1d, esi - rol edi, 8 - xor %4d, edi - movzx esi, al - movzx edi, ah - movzx esi, t_ref(f,rsi) - movzx edi, t_ref(f,rdi) - rol esi, 16 - rol edi, 24 - xor %3d, esi - xor %2d, edi - - movzx esi, bl - movzx edi, bh - shr ebx, 16 - movzx esi, t_ref(f,rsi) - movzx edi, t_ref(f,rdi) - xor %2d, esi - rol edi, 8 - xor %1d, edi - movzx esi, bl - movzx edi, bh - movzx esi, t_ref(f,rsi) - movzx edi, t_ref(f,rdi) - rol esi, 16 - rol edi, 24 - xor %4d, esi - xor %3d, edi - - movzx esi, cl - movzx edi, ch - movzx esi, t_ref(f,rsi) - movzx edi, t_ref(f,rdi) - shr ecx, 16 - xor %3d, esi - rol edi, 8 - xor %2d, edi - movzx esi, cl - movzx edi, ch - movzx esi, t_ref(f,rsi) - movzx edi, t_ref(f,rdi) - rol esi, 16 - rol edi, 24 - xor %1d, esi - xor %4d, edi - - movzx esi, dl - movzx edi, dh - movzx esi, t_ref(f,rsi) - movzx edi, t_ref(f,rdi) - shr edx, 16 - xor %4d, esi - rol edi, 8 - xor %3d, edi - movzx esi, dl - movzx edi, dh - movzx esi, t_ref(f,rsi) - movzx edi, t_ref(f,rdi) - rol esi, 16 - rol edi, 24 - xor %2d, esi - xor %1d, edi -%endmacro - -%endif - -%macro ii_rnd 5 ; normal inverse round - mov %1d, ik_ref(%5,0) - mov %2d, ik_ref(%5,1) - mov %3d, ik_ref(%5,2) - mov %4d, ik_ref(%5,3) - - movzx esi, al - movzx edi, ah - shr eax, 16 - xor %1d, t_ref(0,rsi) - xor %2d, t_ref(1,rdi) - movzx esi, al - movzx edi, ah - xor %3d, t_ref(2,rsi) - xor %4d, t_ref(3,rdi) - - movzx esi, bl - movzx edi, bh - shr ebx, 16 - xor %2d, t_ref(0,rsi) - xor %3d, t_ref(1,rdi) - movzx esi, bl - movzx edi, bh - xor %4d, t_ref(2,rsi) - xor %1d, t_ref(3,rdi) - - movzx esi, cl - movzx edi, ch - shr ecx, 16 - xor %3d, t_ref(0,rsi) - xor %4d, t_ref(1,rdi) - movzx esi, cl - movzx edi, ch - xor %1d, t_ref(2,rsi) - xor %2d, t_ref(3,rdi) - - movzx esi, dl - movzx edi, dh - shr edx, 16 - xor %4d, t_ref(0,rsi) - xor %1d, t_ref(1,rdi) - movzx esi, dl - movzx edi, dh - xor %2d, t_ref(2,rsi) - xor %3d, t_ref(3,rdi) - - mov eax,%1d - mov ebx,%2d - mov ecx,%3d - mov edx,%4d -%endmacro - -%ifdef LAST_ROUND_TABLES - -%macro il_rnd 5 ; last inverse round - add tptr, 2048 - mov %1d, ik_ref(%5,0) - mov %2d, ik_ref(%5,1) - mov %3d, ik_ref(%5,2) - mov %4d, ik_ref(%5,3) - - movzx esi, al - movzx edi, ah - shr eax, 16 - xor %1d, t_ref(0,rsi) - xor %2d, t_ref(1,rdi) - movzx esi, al - movzx edi, ah - xor %3d, t_ref(2,rsi) - xor %4d, t_ref(3,rdi) - - movzx esi, bl - movzx edi, bh - shr ebx, 16 - xor %2d, t_ref(0,rsi) - xor %3d, t_ref(1,rdi) - movzx esi, bl - movzx edi, bh - xor %4d, t_ref(2,rsi) - xor %1d, t_ref(3,rdi) - - movzx esi, cl - movzx edi, ch - shr ecx, 16 - xor %3d, t_ref(0,rsi) - xor %4d, t_ref(1,rdi) - movzx esi, cl - movzx edi, ch - xor %1d, t_ref(2,rsi) - xor %2d, t_ref(3,rdi) - - movzx esi, dl - movzx edi, dh - shr edx, 16 - xor %4d, t_ref(0,rsi) - xor %1d, t_ref(1,rdi) - movzx esi, dl - movzx edi, dh - xor %2d, t_ref(2,rsi) - xor %3d, t_ref(3,rdi) -%endmacro - -%else - -%macro il_rnd 5 ; last inverse round - mov %1d, ik_ref(%5,0) - mov %2d, ik_ref(%5,1) - mov %3d, ik_ref(%5,2) - mov %4d, ik_ref(%5,3) - - movzx esi, al - movzx edi, ah - movzx esi, t_ref(i,rsi) - movzx edi, t_ref(i,rdi) - shr eax, 16 - xor %1d, esi - rol edi, 8 - xor %2d, edi - movzx esi, al - movzx edi, ah - movzx esi, t_ref(i,rsi) - movzx edi, t_ref(i,rdi) - rol esi, 16 - rol edi, 24 - xor %3d, esi - xor %4d, edi - - movzx esi, bl - movzx edi, bh - movzx esi, t_ref(i,rsi) - movzx edi, t_ref(i,rdi) - shr ebx, 16 - xor %2d, esi - rol edi, 8 - xor %3d, edi - movzx esi, bl - movzx edi, bh - movzx esi, t_ref(i,rsi) - movzx edi, t_ref(i,rdi) - rol esi, 16 - rol edi, 24 - xor %4d, esi - xor %1d, edi - - movzx esi, cl - movzx edi, ch - movzx esi, t_ref(i,rsi) - movzx edi, t_ref(i,rdi) - shr ecx, 16 - xor %3d, esi - rol edi, 8 - xor %4d, edi - movzx esi, cl - movzx edi, ch - movzx esi, t_ref(i,rsi) - movzx edi, t_ref(i,rdi) - rol esi, 16 - rol edi, 24 - xor %1d, esi - xor %2d, edi - - movzx esi, dl - movzx edi, dh - movzx esi, t_ref(i,rsi) - movzx edi, t_ref(i,rdi) - shr edx, 16 - xor %4d, esi - rol edi, 8 - xor %1d, edi - movzx esi, dl - movzx edi, dh - movzx esi, t_ref(i,rsi) - movzx edi, t_ref(i,rdi) - rol esi, 16 - rol edi, 24 - xor %2d, esi - xor %3d, edi -%endmacro - -%endif - -%ifdef ENCRYPTION - - global aes_encrypt -%ifdef DLL_EXPORT - export aes_encrypt -%endif - - section .data align=64 - align 64 -enc_tab: - enc_vals u8 -%ifdef LAST_ROUND_TABLES - enc_vals w8 -%endif - - section .text align=16 - align 16 - -%ifdef _SEH_ -proc_frame aes_encrypt - alloc_stack 7*8 ; 7 to align stack to 16 bytes - save_reg rsi,4*8 - save_reg rdi,5*8 - save_reg rbx,1*8 - save_reg rbp,2*8 - save_reg r12,3*8 -end_prologue - mov rdi, rcx ; input pointer - mov [rsp+0*8], rdx ; output pointer -%else - aes_encrypt: - %ifdef __GNUC__ - sub rsp, 4*8 ; gnu/linux binary interface - mov [rsp+0*8], rsi ; output pointer - mov r8, rdx ; context - %else - sub rsp, 6*8 ; windows binary interface - mov [rsp+4*8], rsi - mov [rsp+5*8], rdi - mov rdi, rcx ; input pointer - mov [rsp+0*8], rdx ; output pointer - %endif - mov [rsp+1*8], rbx ; input pointer in rdi - mov [rsp+2*8], rbp ; output pointer in [rsp] - mov [rsp+3*8], r12 ; context in r8 -%endif - - movzx esi, byte [kptr+4*KS_LENGTH] - lea tptr, [rel enc_tab] - sub kptr, fofs - - mov eax, [rdi+0*4] - mov ebx, [rdi+1*4] - mov ecx, [rdi+2*4] - mov edx, [rdi+3*4] - - xor eax, [kptr+fofs] - xor ebx, [kptr+fofs+4] - xor ecx, [kptr+fofs+8] - xor edx, [kptr+fofs+12] - - lea kptr,[kptr+rsi] - cmp esi, 10*16 - je .3 - cmp esi, 12*16 - je .2 - cmp esi, 14*16 - je .1 - mov rax, -1 - jmp .4 - -.1: ff_rnd r9, r10, r11, r12, 13 - ff_rnd r9, r10, r11, r12, 12 -.2: ff_rnd r9, r10, r11, r12, 11 - ff_rnd r9, r10, r11, r12, 10 -.3: ff_rnd r9, r10, r11, r12, 9 - ff_rnd r9, r10, r11, r12, 8 - ff_rnd r9, r10, r11, r12, 7 - ff_rnd r9, r10, r11, r12, 6 - ff_rnd r9, r10, r11, r12, 5 - ff_rnd r9, r10, r11, r12, 4 - ff_rnd r9, r10, r11, r12, 3 - ff_rnd r9, r10, r11, r12, 2 - ff_rnd r9, r10, r11, r12, 1 - fl_rnd r9, r10, r11, r12, 0 - - mov rbx, [rsp] - mov [rbx], r9d - mov [rbx+4], r10d - mov [rbx+8], r11d - mov [rbx+12], r12d - xor rax, rax -.4: - mov rbx, [rsp+1*8] - mov rbp, [rsp+2*8] - mov r12, [rsp+3*8] -%ifdef __GNUC__ - add rsp, 4*8 - ret -%else - mov rsi, [rsp+4*8] - mov rdi, [rsp+5*8] - %ifdef _SEH_ - add rsp, 7*8 - ret - endproc_frame - %else - add rsp, 6*8 - ret - %endif -%endif - -%endif - -%ifdef DECRYPTION - - global aes_decrypt -%ifdef DLL_EXPORT - export aes_decrypt -%endif - - section .data - align 64 -dec_tab: - dec_vals v8 -%ifdef LAST_ROUND_TABLES - dec_vals w8 -%endif - - section .text - align 16 - -%ifdef _SEH_ -proc_frame aes_decrypt - alloc_stack 7*8 ; 7 to align stack to 16 bytes - save_reg rsi,4*8 - save_reg rdi,5*8 - save_reg rbx,1*8 - save_reg rbp,2*8 - save_reg r12,3*8 -end_prologue - mov rdi, rcx ; input pointer - mov [rsp+0*8], rdx ; output pointer -%else - aes_decrypt: - %ifdef __GNUC__ - sub rsp, 4*8 ; gnu/linux binary interface - mov [rsp+0*8], rsi ; output pointer - mov r8, rdx ; context - %else - sub rsp, 6*8 ; windows binary interface - mov [rsp+4*8], rsi - mov [rsp+5*8], rdi - mov rdi, rcx ; input pointer - mov [rsp+0*8], rdx ; output pointer - %endif - mov [rsp+1*8], rbx ; input pointer in rdi - mov [rsp+2*8], rbp ; output pointer in [rsp] - mov [rsp+3*8], r12 ; context in r8 -%endif - - movzx esi,byte[kptr+4*KS_LENGTH] - lea tptr, [rel dec_tab] - sub kptr, rofs - - mov eax, [rdi+0*4] - mov ebx, [rdi+1*4] - mov ecx, [rdi+2*4] - mov edx, [rdi+3*4] - -%ifdef AES_REV_DKS - mov rdi, kptr - lea kptr,[kptr+rsi] -%else - lea rdi,[kptr+rsi] -%endif - - xor eax, [rdi+rofs] - xor ebx, [rdi+rofs+4] - xor ecx, [rdi+rofs+8] - xor edx, [rdi+rofs+12] - - cmp esi, 10*16 - je .3 - cmp esi, 12*16 - je .2 - cmp esi, 14*16 - je .1 - mov rax, -1 - jmp .4 - -.1: ii_rnd r9, r10, r11, r12, 13 - ii_rnd r9, r10, r11, r12, 12 -.2: ii_rnd r9, r10, r11, r12, 11 - ii_rnd r9, r10, r11, r12, 10 -.3: ii_rnd r9, r10, r11, r12, 9 - ii_rnd r9, r10, r11, r12, 8 - ii_rnd r9, r10, r11, r12, 7 - ii_rnd r9, r10, r11, r12, 6 - ii_rnd r9, r10, r11, r12, 5 - ii_rnd r9, r10, r11, r12, 4 - ii_rnd r9, r10, r11, r12, 3 - ii_rnd r9, r10, r11, r12, 2 - ii_rnd r9, r10, r11, r12, 1 - il_rnd r9, r10, r11, r12, 0 - - mov rbx, [rsp] - mov [rbx], r9d - mov [rbx+4], r10d - mov [rbx+8], r11d - mov [rbx+12], r12d - xor rax, rax -.4: mov rbx, [rsp+1*8] - mov rbp, [rsp+2*8] - mov r12, [rsp+3*8] -%ifdef __GNUC__ - add rsp, 4*8 - ret -%else - mov rsi, [rsp+4*8] - mov rdi, [rsp+5*8] - %ifdef _SEH_ - add rsp, 7*8 - ret - endproc_frame - %else - add rsp, 6*8 - ret - %endif -%endif - -%endif + +; --------------------------------------------------------------------------- +; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. +; +; LICENSE TERMS +; +; The free distribution and use of this software is allowed (with or without +; changes) provided that: +; +; 1. source code distributions include the above copyright notice, this +; list of conditions and the following disclaimer; +; +; 2. binary distributions include the above copyright notice, this list +; of conditions and the following disclaimer in their documentation; +; +; 3. the name of the copyright holder is not used to endorse products +; built using this software without specific written permission. +; +; DISCLAIMER +; +; This software is provided 'as is' with no explicit or implied warranties +; in respect of its properties, including, but not limited to, correctness +; and/or fitness for purpose. +; --------------------------------------------------------------------------- +; Issue 20/12/2007 +; +; I am grateful to Dag Arne Osvik for many discussions of the techniques that +; can be used to optimise AES assembler code on AMD64/EM64T architectures. +; Some of the techniques used in this implementation are the result of +; suggestions made by him for which I am most grateful. + +; +; Adapted for TrueCrypt: +; - Compatibility with NASM +; + +; An AES implementation for AMD64 processors using the YASM assembler. This +; implemetation provides only encryption, decryption and hence requires key +; scheduling support in C. It uses 8k bytes of tables but its encryption and +; decryption performance is very close to that obtained using large tables. +; It can use either Windows or Gnu/Linux calling conventions, which are as +; follows: +; windows gnu/linux +; +; in_blk rcx rdi +; out_blk rdx rsi +; context (cx) r8 rdx +; +; preserved rsi - + rbx, rbp, rsp, r12, r13, r14 & r15 +; registers rdi - on both +; +; destroyed - rsi + rax, rcx, rdx, r8, r9, r10 & r11 +; registers - rdi on both +; +; The default convention is that for windows, the gnu/linux convention being +; used if __GNUC__ is defined. +; +; Define _SEH_ to include support for Win64 structured exception handling +; (this requires YASM version 0.6 or later). +; +; This code provides the standard AES block size (128 bits, 16 bytes) and the +; three standard AES key sizes (128, 192 and 256 bits). It has the same call +; interface as my C implementation. It uses the Microsoft C AMD64 calling +; conventions in which the three parameters are placed in rcx, rdx and r8 +; respectively. The rbx, rsi, rdi, rbp and r12..r15 registers are preserved. +; +; AES_RETURN aes_encrypt(const unsigned char in_blk[], +; unsigned char out_blk[], const aes_encrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt(const unsigned char in_blk[], +; unsigned char out_blk[], const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_encrypt_key(const unsigned char key[], +; const aes_encrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt_key(const unsigned char key[], +; const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_encrypt_key(const unsigned char key[], +; unsigned int len, const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt_key(const unsigned char key[], +; unsigned int len, const aes_decrypt_ctx cx[1]); +; +; where is 128, 102 or 256. In the last two calls the length can be in +; either bits or bytes. +; +; Comment in/out the following lines to obtain the desired subroutines. These +; selections MUST match those in the C header file aes.h + +; %define AES_128 ; define if AES with 128 bit keys is needed +; %define AES_192 ; define if AES with 192 bit keys is needed +%define AES_256 ; define if AES with 256 bit keys is needed +; %define AES_VAR ; define if a variable key size is needed +%define ENCRYPTION ; define if encryption is needed +%define DECRYPTION ; define if decryption is needed +%define AES_REV_DKS ; define if key decryption schedule is reversed +%define LAST_ROUND_TABLES ; define for the faster version using extra tables + +; The encryption key schedule has the following in memory layout where N is the +; number of rounds (10, 12 or 14): +; +; lo: | input key (round 0) | ; each round is four 32-bit words +; | encryption round 1 | +; | encryption round 2 | +; .... +; | encryption round N-1 | +; hi: | encryption round N | +; +; The decryption key schedule is normally set up so that it has the same +; layout as above by actually reversing the order of the encryption key +; schedule in memory (this happens when AES_REV_DKS is set): +; +; lo: | decryption round 0 | = | encryption round N | +; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ] +; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ] +; .... .... +; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ] +; hi: | decryption round N | = | input key (round 0) | +; +; with rounds except the first and last modified using inv_mix_column() +; But if AES_REV_DKS is NOT set the order of keys is left as it is for +; encryption so that it has to be accessed in reverse when used for +; decryption (although the inverse mix column modifications are done) +; +; lo: | decryption round 0 | = | input key (round 0) | +; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ] +; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ] +; .... .... +; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ] +; hi: | decryption round N | = | encryption round N | +; +; This layout is faster when the assembler key scheduling provided here +; is used. +; +; The DLL interface must use the _stdcall convention in which the number +; of bytes of parameter space is added after an @ to the sutine's name. +; We must also remove our parameters from the stack before return (see +; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version. + +;%define DLL_EXPORT + +; End of user defines + +%ifdef AES_VAR +%ifndef AES_128 +%define AES_128 +%endif +%ifndef AES_192 +%define AES_192 +%endif +%ifndef AES_256 +%define AES_256 +%endif +%endif + +%ifdef AES_VAR +%define KS_LENGTH 60 +%elifdef AES_256 +%define KS_LENGTH 60 +%elifdef AES_192 +%define KS_LENGTH 52 +%else +%define KS_LENGTH 44 +%endif + +%define r0 rax +%define r1 rdx +%define r2 rcx +%define r3 rbx +%define r4 rsi +%define r5 rdi +%define r6 rbp +%define r7 rsp + +%define raxd eax +%define rdxd edx +%define rcxd ecx +%define rbxd ebx +%define rsid esi +%define rdid edi +%define rbpd ebp +%define rspd esp + +%define raxb al +%define rdxb dl +%define rcxb cl +%define rbxb bl +%define rsib sil +%define rdib dil +%define rbpb bpl +%define rspb spl + +%define r0h ah +%define r1h dh +%define r2h ch +%define r3h bh + +%define r0d eax +%define r1d edx +%define r2d ecx +%define r3d ebx + +; finite field multiplies by {02}, {04} and {08} + +%define f2(x) ((x<<1)^(((x>>7)&1)*0x11b)) +%define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b)) +%define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b)) + +; finite field multiplies required in table generation + +%define f3(x) (f2(x) ^ x) +%define f9(x) (f8(x) ^ x) +%define fb(x) (f8(x) ^ f2(x) ^ x) +%define fd(x) (f8(x) ^ f4(x) ^ x) +%define fe(x) (f8(x) ^ f4(x) ^ f2(x)) + +; macro for expanding S-box data + +%macro enc_vals 1 + db %1(0x63),%1(0x7c),%1(0x77),%1(0x7b),%1(0xf2),%1(0x6b),%1(0x6f),%1(0xc5) + db %1(0x30),%1(0x01),%1(0x67),%1(0x2b),%1(0xfe),%1(0xd7),%1(0xab),%1(0x76) + db %1(0xca),%1(0x82),%1(0xc9),%1(0x7d),%1(0xfa),%1(0x59),%1(0x47),%1(0xf0) + db %1(0xad),%1(0xd4),%1(0xa2),%1(0xaf),%1(0x9c),%1(0xa4),%1(0x72),%1(0xc0) + db %1(0xb7),%1(0xfd),%1(0x93),%1(0x26),%1(0x36),%1(0x3f),%1(0xf7),%1(0xcc) + db %1(0x34),%1(0xa5),%1(0xe5),%1(0xf1),%1(0x71),%1(0xd8),%1(0x31),%1(0x15) + db %1(0x04),%1(0xc7),%1(0x23),%1(0xc3),%1(0x18),%1(0x96),%1(0x05),%1(0x9a) + db %1(0x07),%1(0x12),%1(0x80),%1(0xe2),%1(0xeb),%1(0x27),%1(0xb2),%1(0x75) + db %1(0x09),%1(0x83),%1(0x2c),%1(0x1a),%1(0x1b),%1(0x6e),%1(0x5a),%1(0xa0) + db %1(0x52),%1(0x3b),%1(0xd6),%1(0xb3),%1(0x29),%1(0xe3),%1(0x2f),%1(0x84) + db %1(0x53),%1(0xd1),%1(0x00),%1(0xed),%1(0x20),%1(0xfc),%1(0xb1),%1(0x5b) + db %1(0x6a),%1(0xcb),%1(0xbe),%1(0x39),%1(0x4a),%1(0x4c),%1(0x58),%1(0xcf) + db %1(0xd0),%1(0xef),%1(0xaa),%1(0xfb),%1(0x43),%1(0x4d),%1(0x33),%1(0x85) + db %1(0x45),%1(0xf9),%1(0x02),%1(0x7f),%1(0x50),%1(0x3c),%1(0x9f),%1(0xa8) + db %1(0x51),%1(0xa3),%1(0x40),%1(0x8f),%1(0x92),%1(0x9d),%1(0x38),%1(0xf5) + db %1(0xbc),%1(0xb6),%1(0xda),%1(0x21),%1(0x10),%1(0xff),%1(0xf3),%1(0xd2) + db %1(0xcd),%1(0x0c),%1(0x13),%1(0xec),%1(0x5f),%1(0x97),%1(0x44),%1(0x17) + db %1(0xc4),%1(0xa7),%1(0x7e),%1(0x3d),%1(0x64),%1(0x5d),%1(0x19),%1(0x73) + db %1(0x60),%1(0x81),%1(0x4f),%1(0xdc),%1(0x22),%1(0x2a),%1(0x90),%1(0x88) + db %1(0x46),%1(0xee),%1(0xb8),%1(0x14),%1(0xde),%1(0x5e),%1(0x0b),%1(0xdb) + db %1(0xe0),%1(0x32),%1(0x3a),%1(0x0a),%1(0x49),%1(0x06),%1(0x24),%1(0x5c) + db %1(0xc2),%1(0xd3),%1(0xac),%1(0x62),%1(0x91),%1(0x95),%1(0xe4),%1(0x79) + db %1(0xe7),%1(0xc8),%1(0x37),%1(0x6d),%1(0x8d),%1(0xd5),%1(0x4e),%1(0xa9) + db %1(0x6c),%1(0x56),%1(0xf4),%1(0xea),%1(0x65),%1(0x7a),%1(0xae),%1(0x08) + db %1(0xba),%1(0x78),%1(0x25),%1(0x2e),%1(0x1c),%1(0xa6),%1(0xb4),%1(0xc6) + db %1(0xe8),%1(0xdd),%1(0x74),%1(0x1f),%1(0x4b),%1(0xbd),%1(0x8b),%1(0x8a) + db %1(0x70),%1(0x3e),%1(0xb5),%1(0x66),%1(0x48),%1(0x03),%1(0xf6),%1(0x0e) + db %1(0x61),%1(0x35),%1(0x57),%1(0xb9),%1(0x86),%1(0xc1),%1(0x1d),%1(0x9e) + db %1(0xe1),%1(0xf8),%1(0x98),%1(0x11),%1(0x69),%1(0xd9),%1(0x8e),%1(0x94) + db %1(0x9b),%1(0x1e),%1(0x87),%1(0xe9),%1(0xce),%1(0x55),%1(0x28),%1(0xdf) + db %1(0x8c),%1(0xa1),%1(0x89),%1(0x0d),%1(0xbf),%1(0xe6),%1(0x42),%1(0x68) + db %1(0x41),%1(0x99),%1(0x2d),%1(0x0f),%1(0xb0),%1(0x54),%1(0xbb),%1(0x16) +%endmacro + +%macro dec_vals 1 + db %1(0x52),%1(0x09),%1(0x6a),%1(0xd5),%1(0x30),%1(0x36),%1(0xa5),%1(0x38) + db %1(0xbf),%1(0x40),%1(0xa3),%1(0x9e),%1(0x81),%1(0xf3),%1(0xd7),%1(0xfb) + db %1(0x7c),%1(0xe3),%1(0x39),%1(0x82),%1(0x9b),%1(0x2f),%1(0xff),%1(0x87) + db %1(0x34),%1(0x8e),%1(0x43),%1(0x44),%1(0xc4),%1(0xde),%1(0xe9),%1(0xcb) + db %1(0x54),%1(0x7b),%1(0x94),%1(0x32),%1(0xa6),%1(0xc2),%1(0x23),%1(0x3d) + db %1(0xee),%1(0x4c),%1(0x95),%1(0x0b),%1(0x42),%1(0xfa),%1(0xc3),%1(0x4e) + db %1(0x08),%1(0x2e),%1(0xa1),%1(0x66),%1(0x28),%1(0xd9),%1(0x24),%1(0xb2) + db %1(0x76),%1(0x5b),%1(0xa2),%1(0x49),%1(0x6d),%1(0x8b),%1(0xd1),%1(0x25) + db %1(0x72),%1(0xf8),%1(0xf6),%1(0x64),%1(0x86),%1(0x68),%1(0x98),%1(0x16) + db %1(0xd4),%1(0xa4),%1(0x5c),%1(0xcc),%1(0x5d),%1(0x65),%1(0xb6),%1(0x92) + db %1(0x6c),%1(0x70),%1(0x48),%1(0x50),%1(0xfd),%1(0xed),%1(0xb9),%1(0xda) + db %1(0x5e),%1(0x15),%1(0x46),%1(0x57),%1(0xa7),%1(0x8d),%1(0x9d),%1(0x84) + db %1(0x90),%1(0xd8),%1(0xab),%1(0x00),%1(0x8c),%1(0xbc),%1(0xd3),%1(0x0a) + db %1(0xf7),%1(0xe4),%1(0x58),%1(0x05),%1(0xb8),%1(0xb3),%1(0x45),%1(0x06) + db %1(0xd0),%1(0x2c),%1(0x1e),%1(0x8f),%1(0xca),%1(0x3f),%1(0x0f),%1(0x02) + db %1(0xc1),%1(0xaf),%1(0xbd),%1(0x03),%1(0x01),%1(0x13),%1(0x8a),%1(0x6b) + db %1(0x3a),%1(0x91),%1(0x11),%1(0x41),%1(0x4f),%1(0x67),%1(0xdc),%1(0xea) + db %1(0x97),%1(0xf2),%1(0xcf),%1(0xce),%1(0xf0),%1(0xb4),%1(0xe6),%1(0x73) + db %1(0x96),%1(0xac),%1(0x74),%1(0x22),%1(0xe7),%1(0xad),%1(0x35),%1(0x85) + db %1(0xe2),%1(0xf9),%1(0x37),%1(0xe8),%1(0x1c),%1(0x75),%1(0xdf),%1(0x6e) + db %1(0x47),%1(0xf1),%1(0x1a),%1(0x71),%1(0x1d),%1(0x29),%1(0xc5),%1(0x89) + db %1(0x6f),%1(0xb7),%1(0x62),%1(0x0e),%1(0xaa),%1(0x18),%1(0xbe),%1(0x1b) + db %1(0xfc),%1(0x56),%1(0x3e),%1(0x4b),%1(0xc6),%1(0xd2),%1(0x79),%1(0x20) + db %1(0x9a),%1(0xdb),%1(0xc0),%1(0xfe),%1(0x78),%1(0xcd),%1(0x5a),%1(0xf4) + db %1(0x1f),%1(0xdd),%1(0xa8),%1(0x33),%1(0x88),%1(0x07),%1(0xc7),%1(0x31) + db %1(0xb1),%1(0x12),%1(0x10),%1(0x59),%1(0x27),%1(0x80),%1(0xec),%1(0x5f) + db %1(0x60),%1(0x51),%1(0x7f),%1(0xa9),%1(0x19),%1(0xb5),%1(0x4a),%1(0x0d) + db %1(0x2d),%1(0xe5),%1(0x7a),%1(0x9f),%1(0x93),%1(0xc9),%1(0x9c),%1(0xef) + db %1(0xa0),%1(0xe0),%1(0x3b),%1(0x4d),%1(0xae),%1(0x2a),%1(0xf5),%1(0xb0) + db %1(0xc8),%1(0xeb),%1(0xbb),%1(0x3c),%1(0x83),%1(0x53),%1(0x99),%1(0x61) + db %1(0x17),%1(0x2b),%1(0x04),%1(0x7e),%1(0xba),%1(0x77),%1(0xd6),%1(0x26) + db %1(0xe1),%1(0x69),%1(0x14),%1(0x63),%1(0x55),%1(0x21),%1(0x0c),%1(0x7d) +%endmacro + +%define u8(x) f2(x), x, x, f3(x), f2(x), x, x, f3(x) +%define v8(x) fe(x), f9(x), fd(x), fb(x), fe(x), f9(x), fd(x), x +%define w8(x) x, 0, 0, 0, x, 0, 0, 0 + +%define tptr rbp ; table pointer +%define kptr r8 ; key schedule pointer +%define fofs 128 ; adjust offset in key schedule to keep |disp| < 128 +%define fk_ref(x,y) [kptr-16*x+fofs+4*y] +%ifdef AES_REV_DKS +%define rofs 128 +%define ik_ref(x,y) [kptr-16*x+rofs+4*y] +%else +%define rofs -128 +%define ik_ref(x,y) [kptr+16*x+rofs+4*y] +%endif + +%define tab_0(x) [tptr+8*x] +%define tab_1(x) [tptr+8*x+3] +%define tab_2(x) [tptr+8*x+2] +%define tab_3(x) [tptr+8*x+1] +%define tab_f(x) byte [tptr+8*x+1] +%define tab_i(x) byte [tptr+8*x+7] +%define t_ref(x,r) tab_ %+ x(r) + +%macro ff_rnd 5 ; normal forward round + mov %1d, fk_ref(%5,0) + mov %2d, fk_ref(%5,1) + mov %3d, fk_ref(%5,2) + mov %4d, fk_ref(%5,3) + + movzx esi, al + movzx edi, ah + shr eax, 16 + xor %1d, t_ref(0,rsi) + xor %4d, t_ref(1,rdi) + movzx esi, al + movzx edi, ah + xor %3d, t_ref(2,rsi) + xor %2d, t_ref(3,rdi) + + movzx esi, bl + movzx edi, bh + shr ebx, 16 + xor %2d, t_ref(0,rsi) + xor %1d, t_ref(1,rdi) + movzx esi, bl + movzx edi, bh + xor %4d, t_ref(2,rsi) + xor %3d, t_ref(3,rdi) + + movzx esi, cl + movzx edi, ch + shr ecx, 16 + xor %3d, t_ref(0,rsi) + xor %2d, t_ref(1,rdi) + movzx esi, cl + movzx edi, ch + xor %1d, t_ref(2,rsi) + xor %4d, t_ref(3,rdi) + + movzx esi, dl + movzx edi, dh + shr edx, 16 + xor %4d, t_ref(0,rsi) + xor %3d, t_ref(1,rdi) + movzx esi, dl + movzx edi, dh + xor %2d, t_ref(2,rsi) + xor %1d, t_ref(3,rdi) + + mov eax,%1d + mov ebx,%2d + mov ecx,%3d + mov edx,%4d +%endmacro + +%ifdef LAST_ROUND_TABLES + +%macro fl_rnd 5 ; last forward round + add tptr, 2048 + mov %1d, fk_ref(%5,0) + mov %2d, fk_ref(%5,1) + mov %3d, fk_ref(%5,2) + mov %4d, fk_ref(%5,3) + + movzx esi, al + movzx edi, ah + shr eax, 16 + xor %1d, t_ref(0,rsi) + xor %4d, t_ref(1,rdi) + movzx esi, al + movzx edi, ah + xor %3d, t_ref(2,rsi) + xor %2d, t_ref(3,rdi) + + movzx esi, bl + movzx edi, bh + shr ebx, 16 + xor %2d, t_ref(0,rsi) + xor %1d, t_ref(1,rdi) + movzx esi, bl + movzx edi, bh + xor %4d, t_ref(2,rsi) + xor %3d, t_ref(3,rdi) + + movzx esi, cl + movzx edi, ch + shr ecx, 16 + xor %3d, t_ref(0,rsi) + xor %2d, t_ref(1,rdi) + movzx esi, cl + movzx edi, ch + xor %1d, t_ref(2,rsi) + xor %4d, t_ref(3,rdi) + + movzx esi, dl + movzx edi, dh + shr edx, 16 + xor %4d, t_ref(0,rsi) + xor %3d, t_ref(1,rdi) + movzx esi, dl + movzx edi, dh + xor %2d, t_ref(2,rsi) + xor %1d, t_ref(3,rdi) +%endmacro + +%else + +%macro fl_rnd 5 ; last forward round + mov %1d, fk_ref(%5,0) + mov %2d, fk_ref(%5,1) + mov %3d, fk_ref(%5,2) + mov %4d, fk_ref(%5,3) + + movzx esi, al + movzx edi, ah + shr eax, 16 + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + xor %1d, esi + rol edi, 8 + xor %4d, edi + movzx esi, al + movzx edi, ah + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + rol esi, 16 + rol edi, 24 + xor %3d, esi + xor %2d, edi + + movzx esi, bl + movzx edi, bh + shr ebx, 16 + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + xor %2d, esi + rol edi, 8 + xor %1d, edi + movzx esi, bl + movzx edi, bh + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + rol esi, 16 + rol edi, 24 + xor %4d, esi + xor %3d, edi + + movzx esi, cl + movzx edi, ch + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + shr ecx, 16 + xor %3d, esi + rol edi, 8 + xor %2d, edi + movzx esi, cl + movzx edi, ch + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + rol esi, 16 + rol edi, 24 + xor %1d, esi + xor %4d, edi + + movzx esi, dl + movzx edi, dh + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + shr edx, 16 + xor %4d, esi + rol edi, 8 + xor %3d, edi + movzx esi, dl + movzx edi, dh + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + rol esi, 16 + rol edi, 24 + xor %2d, esi + xor %1d, edi +%endmacro + +%endif + +%macro ii_rnd 5 ; normal inverse round + mov %1d, ik_ref(%5,0) + mov %2d, ik_ref(%5,1) + mov %3d, ik_ref(%5,2) + mov %4d, ik_ref(%5,3) + + movzx esi, al + movzx edi, ah + shr eax, 16 + xor %1d, t_ref(0,rsi) + xor %2d, t_ref(1,rdi) + movzx esi, al + movzx edi, ah + xor %3d, t_ref(2,rsi) + xor %4d, t_ref(3,rdi) + + movzx esi, bl + movzx edi, bh + shr ebx, 16 + xor %2d, t_ref(0,rsi) + xor %3d, t_ref(1,rdi) + movzx esi, bl + movzx edi, bh + xor %4d, t_ref(2,rsi) + xor %1d, t_ref(3,rdi) + + movzx esi, cl + movzx edi, ch + shr ecx, 16 + xor %3d, t_ref(0,rsi) + xor %4d, t_ref(1,rdi) + movzx esi, cl + movzx edi, ch + xor %1d, t_ref(2,rsi) + xor %2d, t_ref(3,rdi) + + movzx esi, dl + movzx edi, dh + shr edx, 16 + xor %4d, t_ref(0,rsi) + xor %1d, t_ref(1,rdi) + movzx esi, dl + movzx edi, dh + xor %2d, t_ref(2,rsi) + xor %3d, t_ref(3,rdi) + + mov eax,%1d + mov ebx,%2d + mov ecx,%3d + mov edx,%4d +%endmacro + +%ifdef LAST_ROUND_TABLES + +%macro il_rnd 5 ; last inverse round + add tptr, 2048 + mov %1d, ik_ref(%5,0) + mov %2d, ik_ref(%5,1) + mov %3d, ik_ref(%5,2) + mov %4d, ik_ref(%5,3) + + movzx esi, al + movzx edi, ah + shr eax, 16 + xor %1d, t_ref(0,rsi) + xor %2d, t_ref(1,rdi) + movzx esi, al + movzx edi, ah + xor %3d, t_ref(2,rsi) + xor %4d, t_ref(3,rdi) + + movzx esi, bl + movzx edi, bh + shr ebx, 16 + xor %2d, t_ref(0,rsi) + xor %3d, t_ref(1,rdi) + movzx esi, bl + movzx edi, bh + xor %4d, t_ref(2,rsi) + xor %1d, t_ref(3,rdi) + + movzx esi, cl + movzx edi, ch + shr ecx, 16 + xor %3d, t_ref(0,rsi) + xor %4d, t_ref(1,rdi) + movzx esi, cl + movzx edi, ch + xor %1d, t_ref(2,rsi) + xor %2d, t_ref(3,rdi) + + movzx esi, dl + movzx edi, dh + shr edx, 16 + xor %4d, t_ref(0,rsi) + xor %1d, t_ref(1,rdi) + movzx esi, dl + movzx edi, dh + xor %2d, t_ref(2,rsi) + xor %3d, t_ref(3,rdi) +%endmacro + +%else + +%macro il_rnd 5 ; last inverse round + mov %1d, ik_ref(%5,0) + mov %2d, ik_ref(%5,1) + mov %3d, ik_ref(%5,2) + mov %4d, ik_ref(%5,3) + + movzx esi, al + movzx edi, ah + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + shr eax, 16 + xor %1d, esi + rol edi, 8 + xor %2d, edi + movzx esi, al + movzx edi, ah + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + rol esi, 16 + rol edi, 24 + xor %3d, esi + xor %4d, edi + + movzx esi, bl + movzx edi, bh + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + shr ebx, 16 + xor %2d, esi + rol edi, 8 + xor %3d, edi + movzx esi, bl + movzx edi, bh + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + rol esi, 16 + rol edi, 24 + xor %4d, esi + xor %1d, edi + + movzx esi, cl + movzx edi, ch + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + shr ecx, 16 + xor %3d, esi + rol edi, 8 + xor %4d, edi + movzx esi, cl + movzx edi, ch + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + rol esi, 16 + rol edi, 24 + xor %1d, esi + xor %2d, edi + + movzx esi, dl + movzx edi, dh + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + shr edx, 16 + xor %4d, esi + rol edi, 8 + xor %1d, edi + movzx esi, dl + movzx edi, dh + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + rol esi, 16 + rol edi, 24 + xor %2d, esi + xor %3d, edi +%endmacro + +%endif + +%ifdef ENCRYPTION + + global aes_encrypt +%ifdef DLL_EXPORT + export aes_encrypt +%endif + + section .data align=64 + align 64 +enc_tab: + enc_vals u8 +%ifdef LAST_ROUND_TABLES + enc_vals w8 +%endif + + section .text align=16 + align 16 + +%ifdef _SEH_ +proc_frame aes_encrypt + alloc_stack 7*8 ; 7 to align stack to 16 bytes + save_reg rsi,4*8 + save_reg rdi,5*8 + save_reg rbx,1*8 + save_reg rbp,2*8 + save_reg r12,3*8 +end_prologue + mov rdi, rcx ; input pointer + mov [rsp+0*8], rdx ; output pointer +%else + aes_encrypt: + %ifdef __GNUC__ + sub rsp, 4*8 ; gnu/linux binary interface + mov [rsp+0*8], rsi ; output pointer + mov r8, rdx ; context + %else + sub rsp, 6*8 ; windows binary interface + mov [rsp+4*8], rsi + mov [rsp+5*8], rdi + mov rdi, rcx ; input pointer + mov [rsp+0*8], rdx ; output pointer + %endif + mov [rsp+1*8], rbx ; input pointer in rdi + mov [rsp+2*8], rbp ; output pointer in [rsp] + mov [rsp+3*8], r12 ; context in r8 +%endif + + movzx esi, byte [kptr+4*KS_LENGTH] + lea tptr, [rel enc_tab] + sub kptr, fofs + + mov eax, [rdi+0*4] + mov ebx, [rdi+1*4] + mov ecx, [rdi+2*4] + mov edx, [rdi+3*4] + + xor eax, [kptr+fofs] + xor ebx, [kptr+fofs+4] + xor ecx, [kptr+fofs+8] + xor edx, [kptr+fofs+12] + + lea kptr,[kptr+rsi] + cmp esi, 10*16 + je .3 + cmp esi, 12*16 + je .2 + cmp esi, 14*16 + je .1 + mov rax, -1 + jmp .4 + +.1: ff_rnd r9, r10, r11, r12, 13 + ff_rnd r9, r10, r11, r12, 12 +.2: ff_rnd r9, r10, r11, r12, 11 + ff_rnd r9, r10, r11, r12, 10 +.3: ff_rnd r9, r10, r11, r12, 9 + ff_rnd r9, r10, r11, r12, 8 + ff_rnd r9, r10, r11, r12, 7 + ff_rnd r9, r10, r11, r12, 6 + ff_rnd r9, r10, r11, r12, 5 + ff_rnd r9, r10, r11, r12, 4 + ff_rnd r9, r10, r11, r12, 3 + ff_rnd r9, r10, r11, r12, 2 + ff_rnd r9, r10, r11, r12, 1 + fl_rnd r9, r10, r11, r12, 0 + + mov rbx, [rsp] + mov [rbx], r9d + mov [rbx+4], r10d + mov [rbx+8], r11d + mov [rbx+12], r12d + xor rax, rax +.4: + mov rbx, [rsp+1*8] + mov rbp, [rsp+2*8] + mov r12, [rsp+3*8] +%ifdef __GNUC__ + add rsp, 4*8 + ret +%else + mov rsi, [rsp+4*8] + mov rdi, [rsp+5*8] + %ifdef _SEH_ + add rsp, 7*8 + ret + endproc_frame + %else + add rsp, 6*8 + ret + %endif +%endif + +%endif + +%ifdef DECRYPTION + + global aes_decrypt +%ifdef DLL_EXPORT + export aes_decrypt +%endif + + section .data + align 64 +dec_tab: + dec_vals v8 +%ifdef LAST_ROUND_TABLES + dec_vals w8 +%endif + + section .text + align 16 + +%ifdef _SEH_ +proc_frame aes_decrypt + alloc_stack 7*8 ; 7 to align stack to 16 bytes + save_reg rsi,4*8 + save_reg rdi,5*8 + save_reg rbx,1*8 + save_reg rbp,2*8 + save_reg r12,3*8 +end_prologue + mov rdi, rcx ; input pointer + mov [rsp+0*8], rdx ; output pointer +%else + aes_decrypt: + %ifdef __GNUC__ + sub rsp, 4*8 ; gnu/linux binary interface + mov [rsp+0*8], rsi ; output pointer + mov r8, rdx ; context + %else + sub rsp, 6*8 ; windows binary interface + mov [rsp+4*8], rsi + mov [rsp+5*8], rdi + mov rdi, rcx ; input pointer + mov [rsp+0*8], rdx ; output pointer + %endif + mov [rsp+1*8], rbx ; input pointer in rdi + mov [rsp+2*8], rbp ; output pointer in [rsp] + mov [rsp+3*8], r12 ; context in r8 +%endif + + movzx esi,byte[kptr+4*KS_LENGTH] + lea tptr, [rel dec_tab] + sub kptr, rofs + + mov eax, [rdi+0*4] + mov ebx, [rdi+1*4] + mov ecx, [rdi+2*4] + mov edx, [rdi+3*4] + +%ifdef AES_REV_DKS + mov rdi, kptr + lea kptr,[kptr+rsi] +%else + lea rdi,[kptr+rsi] +%endif + + xor eax, [rdi+rofs] + xor ebx, [rdi+rofs+4] + xor ecx, [rdi+rofs+8] + xor edx, [rdi+rofs+12] + + cmp esi, 10*16 + je .3 + cmp esi, 12*16 + je .2 + cmp esi, 14*16 + je .1 + mov rax, -1 + jmp .4 + +.1: ii_rnd r9, r10, r11, r12, 13 + ii_rnd r9, r10, r11, r12, 12 +.2: ii_rnd r9, r10, r11, r12, 11 + ii_rnd r9, r10, r11, r12, 10 +.3: ii_rnd r9, r10, r11, r12, 9 + ii_rnd r9, r10, r11, r12, 8 + ii_rnd r9, r10, r11, r12, 7 + ii_rnd r9, r10, r11, r12, 6 + ii_rnd r9, r10, r11, r12, 5 + ii_rnd r9, r10, r11, r12, 4 + ii_rnd r9, r10, r11, r12, 3 + ii_rnd r9, r10, r11, r12, 2 + ii_rnd r9, r10, r11, r12, 1 + il_rnd r9, r10, r11, r12, 0 + + mov rbx, [rsp] + mov [rbx], r9d + mov [rbx+4], r10d + mov [rbx+8], r11d + mov [rbx+12], r12d + xor rax, rax +.4: mov rbx, [rsp+1*8] + mov rbp, [rsp+2*8] + mov r12, [rsp+3*8] +%ifdef __GNUC__ + add rsp, 4*8 + ret +%else + mov rsi, [rsp+4*8] + mov rdi, [rsp+5*8] + %ifdef _SEH_ + add rsp, 7*8 + ret + endproc_frame + %else + add rsp, 6*8 + ret + %endif +%endif + +%endif diff --git a/src/Crypto/Aes_x86.asm b/src/Crypto/Aes_x86.asm index 239da3e3..3825deee 100644 --- a/src/Crypto/Aes_x86.asm +++ b/src/Crypto/Aes_x86.asm @@ -1,646 +1,646 @@ - -; --------------------------------------------------------------------------- -; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. -; -; LICENSE TERMS -; -; The free distribution and use of this software is allowed (with or without -; changes) provided that: -; -; 1. source code distributions include the above copyright notice, this -; list of conditions and the following disclaimer; -; -; 2. binary distributions include the above copyright notice, this list -; of conditions and the following disclaimer in their documentation; -; -; 3. the name of the copyright holder is not used to endorse products -; built using this software without specific written permission. -; -; DISCLAIMER -; -; This software is provided 'as is' with no explicit or implied warranties -; in respect of its properties, including, but not limited to, correctness -; and/or fitness for purpose. -; --------------------------------------------------------------------------- -; Issue 20/12/2007 -; -; This code requires ASM_X86_V1C to be set in aesopt.h. It requires the C files -; aeskey.c and aestab.c for support. - -; -; Adapted for TrueCrypt: -; - Compatibility with NASM and GCC -; - -; An AES implementation for x86 processors using the YASM (or NASM) assembler. -; This is an assembler implementation that covers encryption and decryption -; only and is intended as a replacement of the C file aescrypt.c. It hence -; requires the file aeskey.c for keying and aestab.c for the AES tables. It -; employs full tables rather than compressed tables. - -; This code provides the standard AES block size (128 bits, 16 bytes) and the -; three standard AES key sizes (128, 192 and 256 bits). It has the same call -; interface as my C implementation. The ebx, esi, edi and ebp registers are -; preserved across calls but eax, ecx and edx and the artihmetic status flags -; are not. It is also important that the defines below match those used in the -; C code. This code uses the VC++ register saving conentions; if it is used -; with another compiler, conventions for using and saving registers may need to -; be checked (and calling conventions). The YASM command line for the VC++ -; custom build step is: -; -; yasm -Xvc -f win32 -o "$(TargetDir)\$(InputName).obj" "$(InputPath)" -; -; The calling intefaces are: -; -; AES_RETURN aes_encrypt(const unsigned char in_blk[], -; unsigned char out_blk[], const aes_encrypt_ctx cx[1]); -; -; AES_RETURN aes_decrypt(const unsigned char in_blk[], -; unsigned char out_blk[], const aes_decrypt_ctx cx[1]); -; -; AES_RETURN aes_encrypt_key(const unsigned char key[], -; const aes_encrypt_ctx cx[1]); -; -; AES_RETURN aes_decrypt_key(const unsigned char key[], -; const aes_decrypt_ctx cx[1]); -; -; AES_RETURN aes_encrypt_key(const unsigned char key[], -; unsigned int len, const aes_decrypt_ctx cx[1]); -; -; AES_RETURN aes_decrypt_key(const unsigned char key[], -; unsigned int len, const aes_decrypt_ctx cx[1]); -; -; where is 128, 102 or 256. In the last two calls the length can be in -; either bits or bytes. -; -; Comment in/out the following lines to obtain the desired subroutines. These -; selections MUST match those in the C header file aes.h - -; %define AES_128 ; define if AES with 128 bit keys is needed -; %define AES_192 ; define if AES with 192 bit keys is needed -%define AES_256 ; define if AES with 256 bit keys is needed -; %define AES_VAR ; define if a variable key size is needed -%define ENCRYPTION ; define if encryption is needed -%define DECRYPTION ; define if decryption is needed -%define AES_REV_DKS ; define if key decryption schedule is reversed -%define LAST_ROUND_TABLES ; define if tables are to be used for last round - -; offsets to parameters - -in_blk equ 4 ; input byte array address parameter -out_blk equ 8 ; output byte array address parameter -ctx equ 12 ; AES context structure -stk_spc equ 20 ; stack space -%define parms 12 ; parameter space on stack - -; The encryption key schedule has the following in memory layout where N is the -; number of rounds (10, 12 or 14): -; -; lo: | input key (round 0) | ; each round is four 32-bit words -; | encryption round 1 | -; | encryption round 2 | -; .... -; | encryption round N-1 | -; hi: | encryption round N | -; -; The decryption key schedule is normally set up so that it has the same -; layout as above by actually reversing the order of the encryption key -; schedule in memory (this happens when AES_REV_DKS is set): -; -; lo: | decryption round 0 | = | encryption round N | -; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ] -; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ] -; .... .... -; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ] -; hi: | decryption round N | = | input key (round 0) | -; -; with rounds except the first and last modified using inv_mix_column() -; But if AES_REV_DKS is NOT set the order of keys is left as it is for -; encryption so that it has to be accessed in reverse when used for -; decryption (although the inverse mix column modifications are done) -; -; lo: | decryption round 0 | = | input key (round 0) | -; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ] -; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ] -; .... .... -; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ] -; hi: | decryption round N | = | encryption round N | -; -; This layout is faster when the assembler key scheduling provided here -; is used. -; -; The DLL interface must use the _stdcall convention in which the number -; of bytes of parameter space is added after an @ to the sutine's name. -; We must also remove our parameters from the stack before return (see -; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version. - -;%define DLL_EXPORT - -; End of user defines - -%ifdef AES_VAR -%ifndef AES_128 -%define AES_128 -%endif -%ifndef AES_192 -%define AES_192 -%endif -%ifndef AES_256 -%define AES_256 -%endif -%endif - -%ifdef AES_VAR -%define KS_LENGTH 60 -%elifdef AES_256 -%define KS_LENGTH 60 -%elifdef AES_192 -%define KS_LENGTH 52 -%else -%define KS_LENGTH 44 -%endif - -; These macros implement stack based local variables - -%macro save 2 - mov [esp+4*%1],%2 -%endmacro - -%macro restore 2 - mov %1,[esp+4*%2] -%endmacro - -; the DLL has to implement the _stdcall calling interface on return -; In this case we have to take our parameters (3 4-byte pointers) -; off the stack - -%macro do_name 1-2 parms -%ifndef DLL_EXPORT - align 32 - global %1 -%1: -%else - align 32 - global %1@%2 - export _%1@%2 -%1@%2: -%endif -%endmacro - -%macro do_call 1-2 parms -%ifndef DLL_EXPORT - call %1 - add esp,%2 -%else - call %1@%2 -%endif -%endmacro - -%macro do_exit 0-1 parms -%ifdef DLL_EXPORT - ret %1 -%else - ret -%endif -%endmacro - -%ifdef ENCRYPTION - - extern t_fn - -%define etab_0(x) [t_fn+4*x] -%define etab_1(x) [t_fn+1024+4*x] -%define etab_2(x) [t_fn+2048+4*x] -%define etab_3(x) [t_fn+3072+4*x] - -%ifdef LAST_ROUND_TABLES - - extern t_fl - -%define eltab_0(x) [t_fl+4*x] -%define eltab_1(x) [t_fl+1024+4*x] -%define eltab_2(x) [t_fl+2048+4*x] -%define eltab_3(x) [t_fl+3072+4*x] - -%else - -%define etab_b(x) byte [t_fn+3072+4*x] - -%endif - -; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the -; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX. -; -; Input: -; -; EAX column[0] -; EBX column[1] -; ECX column[2] -; EDX column[3] -; ESI column key[round][2] -; EDI column key[round][3] -; EBP scratch -; -; Output: -; -; EBP column[0] unkeyed -; EBX column[1] unkeyed -; ESI column[2] keyed -; EDI column[3] keyed -; EAX scratch -; ECX scratch -; EDX scratch - -%macro rnd_fun 2 - - rol ebx,16 - %1 esi, cl, 0, ebp - %1 esi, dh, 1, ebp - %1 esi, bh, 3, ebp - %1 edi, dl, 0, ebp - %1 edi, ah, 1, ebp - %1 edi, bl, 2, ebp - %2 ebp, al, 0, ebp - shr ebx,16 - and eax,0xffff0000 - or eax,ebx - shr edx,16 - %1 ebp, ah, 1, ebx - %1 ebp, dh, 3, ebx - %2 ebx, dl, 2, ebx - %1 ebx, ch, 1, edx - %1 ebx, al, 0, edx - shr eax,16 - shr ecx,16 - %1 ebp, cl, 2, edx - %1 edi, ch, 3, edx - %1 esi, al, 2, edx - %1 ebx, ah, 3, edx - -%endmacro - -; Basic MOV and XOR Operations for normal rounds - -%macro nr_xor 4 - movzx %4,%2 - xor %1,etab_%3(%4) -%endmacro - -%macro nr_mov 4 - movzx %4,%2 - mov %1,etab_%3(%4) -%endmacro - -; Basic MOV and XOR Operations for last round - -%ifdef LAST_ROUND_TABLES - - %macro lr_xor 4 - movzx %4,%2 - xor %1,eltab_%3(%4) - %endmacro - - %macro lr_mov 4 - movzx %4,%2 - mov %1,eltab_%3(%4) - %endmacro - -%else - - %macro lr_xor 4 - movzx %4,%2 - movzx %4,etab_b(%4) - %if %3 != 0 - shl %4,8*%3 - %endif - xor %1,%4 - %endmacro - - %macro lr_mov 4 - movzx %4,%2 - movzx %1,etab_b(%4) - %if %3 != 0 - shl %1,8*%3 - %endif - %endmacro - -%endif - -%macro enc_round 0 - - add ebp,16 - save 0,ebp - mov esi,[ebp+8] - mov edi,[ebp+12] - - rnd_fun nr_xor, nr_mov - - mov eax,ebp - mov ecx,esi - mov edx,edi - restore ebp,0 - xor eax,[ebp] - xor ebx,[ebp+4] - -%endmacro - -%macro enc_last_round 0 - - add ebp,16 - save 0,ebp - mov esi,[ebp+8] - mov edi,[ebp+12] - - rnd_fun lr_xor, lr_mov - - mov eax,ebp - restore ebp,0 - xor eax,[ebp] - xor ebx,[ebp+4] - -%endmacro - - section .text align=32 - -; AES Encryption Subroutine - - do_name aes_encrypt - - sub esp,stk_spc - mov [esp+16],ebp - mov [esp+12],ebx - mov [esp+ 8],esi - mov [esp+ 4],edi - - mov esi,[esp+in_blk+stk_spc] ; input pointer - mov eax,[esi ] - mov ebx,[esi+ 4] - mov ecx,[esi+ 8] - mov edx,[esi+12] - - mov ebp,[esp+ctx+stk_spc] ; key pointer - movzx edi,byte [ebp+4*KS_LENGTH] - xor eax,[ebp ] - xor ebx,[ebp+ 4] - xor ecx,[ebp+ 8] - xor edx,[ebp+12] - -; determine the number of rounds - - cmp edi,10*16 - je .3 - cmp edi,12*16 - je .2 - cmp edi,14*16 - je .1 - mov eax,-1 - jmp .5 - -.1: enc_round - enc_round -.2: enc_round - enc_round -.3: enc_round - enc_round - enc_round - enc_round - enc_round - enc_round - enc_round - enc_round - enc_round - enc_last_round - - mov edx,[esp+out_blk+stk_spc] - mov [edx],eax - mov [edx+4],ebx - mov [edx+8],esi - mov [edx+12],edi - xor eax,eax - -.5: mov ebp,[esp+16] - mov ebx,[esp+12] - mov esi,[esp+ 8] - mov edi,[esp+ 4] - add esp,stk_spc - do_exit - -%endif - -%ifdef DECRYPTION - - extern t_in - -%define dtab_0(x) [t_in+4*x] -%define dtab_1(x) [t_in+1024+4*x] -%define dtab_2(x) [t_in+2048+4*x] -%define dtab_3(x) [t_in+3072+4*x] - -%ifdef LAST_ROUND_TABLES - - extern t_il - -%define dltab_0(x) [t_il+4*x] -%define dltab_1(x) [t_il+1024+4*x] -%define dltab_2(x) [t_il+2048+4*x] -%define dltab_3(x) [t_il+3072+4*x] - -%else - - extern _t_ibox - -%define dtab_x(x) byte [_t_ibox+x] - -%endif - -%macro irn_fun 2 - - rol eax,16 - %1 esi, cl, 0, ebp - %1 esi, bh, 1, ebp - %1 esi, al, 2, ebp - %1 edi, dl, 0, ebp - %1 edi, ch, 1, ebp - %1 edi, ah, 3, ebp - %2 ebp, bl, 0, ebp - shr eax,16 - and ebx,0xffff0000 - or ebx,eax - shr ecx,16 - %1 ebp, bh, 1, eax - %1 ebp, ch, 3, eax - %2 eax, cl, 2, ecx - %1 eax, bl, 0, ecx - %1 eax, dh, 1, ecx - shr ebx,16 - shr edx,16 - %1 esi, dh, 3, ecx - %1 ebp, dl, 2, ecx - %1 eax, bh, 3, ecx - %1 edi, bl, 2, ecx - -%endmacro - -; Basic MOV and XOR Operations for normal rounds - -%macro ni_xor 4 - movzx %4,%2 - xor %1,dtab_%3(%4) -%endmacro - -%macro ni_mov 4 - movzx %4,%2 - mov %1,dtab_%3(%4) -%endmacro - -; Basic MOV and XOR Operations for last round - -%ifdef LAST_ROUND_TABLES - -%macro li_xor 4 - movzx %4,%2 - xor %1,dltab_%3(%4) -%endmacro - -%macro li_mov 4 - movzx %4,%2 - mov %1,dltab_%3(%4) -%endmacro - -%else - - %macro li_xor 4 - movzx %4,%2 - movzx %4,dtab_x(%4) - %if %3 != 0 - shl %4,8*%3 - %endif - xor %1,%4 - %endmacro - - %macro li_mov 4 - movzx %4,%2 - movzx %1,dtab_x(%4) - %if %3 != 0 - shl %1,8*%3 - %endif - %endmacro - -%endif - -%macro dec_round 0 - -%ifdef AES_REV_DKS - add ebp,16 -%else - sub ebp,16 -%endif - save 0,ebp - mov esi,[ebp+8] - mov edi,[ebp+12] - - irn_fun ni_xor, ni_mov - - mov ebx,ebp - mov ecx,esi - mov edx,edi - restore ebp,0 - xor eax,[ebp] - xor ebx,[ebp+4] - -%endmacro - -%macro dec_last_round 0 - -%ifdef AES_REV_DKS - add ebp,16 -%else - sub ebp,16 -%endif - save 0,ebp - mov esi,[ebp+8] - mov edi,[ebp+12] - - irn_fun li_xor, li_mov - - mov ebx,ebp - restore ebp,0 - xor eax,[ebp] - xor ebx,[ebp+4] - -%endmacro - - section .text - -; AES Decryption Subroutine - - do_name aes_decrypt - - sub esp,stk_spc - mov [esp+16],ebp - mov [esp+12],ebx - mov [esp+ 8],esi - mov [esp+ 4],edi - -; input four columns and xor in first round key - - mov esi,[esp+in_blk+stk_spc] ; input pointer - mov eax,[esi ] - mov ebx,[esi+ 4] - mov ecx,[esi+ 8] - mov edx,[esi+12] - lea esi,[esi+16] - - mov ebp,[esp+ctx+stk_spc] ; key pointer - movzx edi,byte[ebp+4*KS_LENGTH] -%ifndef AES_REV_DKS ; if decryption key schedule is not reversed - lea ebp,[ebp+edi] ; we have to access it from the top down -%endif - xor eax,[ebp ] ; key schedule - xor ebx,[ebp+ 4] - xor ecx,[ebp+ 8] - xor edx,[ebp+12] - -; determine the number of rounds - - cmp edi,10*16 - je .3 - cmp edi,12*16 - je .2 - cmp edi,14*16 - je .1 - mov eax,-1 - jmp .5 - -.1: dec_round - dec_round -.2: dec_round - dec_round -.3: dec_round - dec_round - dec_round - dec_round - dec_round - dec_round - dec_round - dec_round - dec_round - dec_last_round - -; move final values to the output array. - - mov ebp,[esp+out_blk+stk_spc] - mov [ebp],eax - mov [ebp+4],ebx - mov [ebp+8],esi - mov [ebp+12],edi - xor eax,eax - -.5: mov ebp,[esp+16] - mov ebx,[esp+12] - mov esi,[esp+ 8] - mov edi,[esp+ 4] - add esp,stk_spc - do_exit - -%endif + +; --------------------------------------------------------------------------- +; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. +; +; LICENSE TERMS +; +; The free distribution and use of this software is allowed (with or without +; changes) provided that: +; +; 1. source code distributions include the above copyright notice, this +; list of conditions and the following disclaimer; +; +; 2. binary distributions include the above copyright notice, this list +; of conditions and the following disclaimer in their documentation; +; +; 3. the name of the copyright holder is not used to endorse products +; built using this software without specific written permission. +; +; DISCLAIMER +; +; This software is provided 'as is' with no explicit or implied warranties +; in respect of its properties, including, but not limited to, correctness +; and/or fitness for purpose. +; --------------------------------------------------------------------------- +; Issue 20/12/2007 +; +; This code requires ASM_X86_V1C to be set in aesopt.h. It requires the C files +; aeskey.c and aestab.c for support. + +; +; Adapted for TrueCrypt: +; - Compatibility with NASM and GCC +; + +; An AES implementation for x86 processors using the YASM (or NASM) assembler. +; This is an assembler implementation that covers encryption and decryption +; only and is intended as a replacement of the C file aescrypt.c. It hence +; requires the file aeskey.c for keying and aestab.c for the AES tables. It +; employs full tables rather than compressed tables. + +; This code provides the standard AES block size (128 bits, 16 bytes) and the +; three standard AES key sizes (128, 192 and 256 bits). It has the same call +; interface as my C implementation. The ebx, esi, edi and ebp registers are +; preserved across calls but eax, ecx and edx and the artihmetic status flags +; are not. It is also important that the defines below match those used in the +; C code. This code uses the VC++ register saving conentions; if it is used +; with another compiler, conventions for using and saving registers may need to +; be checked (and calling conventions). The YASM command line for the VC++ +; custom build step is: +; +; yasm -Xvc -f win32 -o "$(TargetDir)\$(InputName).obj" "$(InputPath)" +; +; The calling intefaces are: +; +; AES_RETURN aes_encrypt(const unsigned char in_blk[], +; unsigned char out_blk[], const aes_encrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt(const unsigned char in_blk[], +; unsigned char out_blk[], const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_encrypt_key(const unsigned char key[], +; const aes_encrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt_key(const unsigned char key[], +; const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_encrypt_key(const unsigned char key[], +; unsigned int len, const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt_key(const unsigned char key[], +; unsigned int len, const aes_decrypt_ctx cx[1]); +; +; where is 128, 102 or 256. In the last two calls the length can be in +; either bits or bytes. +; +; Comment in/out the following lines to obtain the desired subroutines. These +; selections MUST match those in the C header file aes.h + +; %define AES_128 ; define if AES with 128 bit keys is needed +; %define AES_192 ; define if AES with 192 bit keys is needed +%define AES_256 ; define if AES with 256 bit keys is needed +; %define AES_VAR ; define if a variable key size is needed +%define ENCRYPTION ; define if encryption is needed +%define DECRYPTION ; define if decryption is needed +%define AES_REV_DKS ; define if key decryption schedule is reversed +%define LAST_ROUND_TABLES ; define if tables are to be used for last round + +; offsets to parameters + +in_blk equ 4 ; input byte array address parameter +out_blk equ 8 ; output byte array address parameter +ctx equ 12 ; AES context structure +stk_spc equ 20 ; stack space +%define parms 12 ; parameter space on stack + +; The encryption key schedule has the following in memory layout where N is the +; number of rounds (10, 12 or 14): +; +; lo: | input key (round 0) | ; each round is four 32-bit words +; | encryption round 1 | +; | encryption round 2 | +; .... +; | encryption round N-1 | +; hi: | encryption round N | +; +; The decryption key schedule is normally set up so that it has the same +; layout as above by actually reversing the order of the encryption key +; schedule in memory (this happens when AES_REV_DKS is set): +; +; lo: | decryption round 0 | = | encryption round N | +; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ] +; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ] +; .... .... +; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ] +; hi: | decryption round N | = | input key (round 0) | +; +; with rounds except the first and last modified using inv_mix_column() +; But if AES_REV_DKS is NOT set the order of keys is left as it is for +; encryption so that it has to be accessed in reverse when used for +; decryption (although the inverse mix column modifications are done) +; +; lo: | decryption round 0 | = | input key (round 0) | +; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ] +; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ] +; .... .... +; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ] +; hi: | decryption round N | = | encryption round N | +; +; This layout is faster when the assembler key scheduling provided here +; is used. +; +; The DLL interface must use the _stdcall convention in which the number +; of bytes of parameter space is added after an @ to the sutine's name. +; We must also remove our parameters from the stack before return (see +; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version. + +;%define DLL_EXPORT + +; End of user defines + +%ifdef AES_VAR +%ifndef AES_128 +%define AES_128 +%endif +%ifndef AES_192 +%define AES_192 +%endif +%ifndef AES_256 +%define AES_256 +%endif +%endif + +%ifdef AES_VAR +%define KS_LENGTH 60 +%elifdef AES_256 +%define KS_LENGTH 60 +%elifdef AES_192 +%define KS_LENGTH 52 +%else +%define KS_LENGTH 44 +%endif + +; These macros implement stack based local variables + +%macro save 2 + mov [esp+4*%1],%2 +%endmacro + +%macro restore 2 + mov %1,[esp+4*%2] +%endmacro + +; the DLL has to implement the _stdcall calling interface on return +; In this case we have to take our parameters (3 4-byte pointers) +; off the stack + +%macro do_name 1-2 parms +%ifndef DLL_EXPORT + align 32 + global %1 +%1: +%else + align 32 + global %1@%2 + export _%1@%2 +%1@%2: +%endif +%endmacro + +%macro do_call 1-2 parms +%ifndef DLL_EXPORT + call %1 + add esp,%2 +%else + call %1@%2 +%endif +%endmacro + +%macro do_exit 0-1 parms +%ifdef DLL_EXPORT + ret %1 +%else + ret +%endif +%endmacro + +%ifdef ENCRYPTION + + extern t_fn + +%define etab_0(x) [t_fn+4*x] +%define etab_1(x) [t_fn+1024+4*x] +%define etab_2(x) [t_fn+2048+4*x] +%define etab_3(x) [t_fn+3072+4*x] + +%ifdef LAST_ROUND_TABLES + + extern t_fl + +%define eltab_0(x) [t_fl+4*x] +%define eltab_1(x) [t_fl+1024+4*x] +%define eltab_2(x) [t_fl+2048+4*x] +%define eltab_3(x) [t_fl+3072+4*x] + +%else + +%define etab_b(x) byte [t_fn+3072+4*x] + +%endif + +; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the +; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX. +; +; Input: +; +; EAX column[0] +; EBX column[1] +; ECX column[2] +; EDX column[3] +; ESI column key[round][2] +; EDI column key[round][3] +; EBP scratch +; +; Output: +; +; EBP column[0] unkeyed +; EBX column[1] unkeyed +; ESI column[2] keyed +; EDI column[3] keyed +; EAX scratch +; ECX scratch +; EDX scratch + +%macro rnd_fun 2 + + rol ebx,16 + %1 esi, cl, 0, ebp + %1 esi, dh, 1, ebp + %1 esi, bh, 3, ebp + %1 edi, dl, 0, ebp + %1 edi, ah, 1, ebp + %1 edi, bl, 2, ebp + %2 ebp, al, 0, ebp + shr ebx,16 + and eax,0xffff0000 + or eax,ebx + shr edx,16 + %1 ebp, ah, 1, ebx + %1 ebp, dh, 3, ebx + %2 ebx, dl, 2, ebx + %1 ebx, ch, 1, edx + %1 ebx, al, 0, edx + shr eax,16 + shr ecx,16 + %1 ebp, cl, 2, edx + %1 edi, ch, 3, edx + %1 esi, al, 2, edx + %1 ebx, ah, 3, edx + +%endmacro + +; Basic MOV and XOR Operations for normal rounds + +%macro nr_xor 4 + movzx %4,%2 + xor %1,etab_%3(%4) +%endmacro + +%macro nr_mov 4 + movzx %4,%2 + mov %1,etab_%3(%4) +%endmacro + +; Basic MOV and XOR Operations for last round + +%ifdef LAST_ROUND_TABLES + + %macro lr_xor 4 + movzx %4,%2 + xor %1,eltab_%3(%4) + %endmacro + + %macro lr_mov 4 + movzx %4,%2 + mov %1,eltab_%3(%4) + %endmacro + +%else + + %macro lr_xor 4 + movzx %4,%2 + movzx %4,etab_b(%4) + %if %3 != 0 + shl %4,8*%3 + %endif + xor %1,%4 + %endmacro + + %macro lr_mov 4 + movzx %4,%2 + movzx %1,etab_b(%4) + %if %3 != 0 + shl %1,8*%3 + %endif + %endmacro + +%endif + +%macro enc_round 0 + + add ebp,16 + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + rnd_fun nr_xor, nr_mov + + mov eax,ebp + mov ecx,esi + mov edx,edi + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + +%macro enc_last_round 0 + + add ebp,16 + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + rnd_fun lr_xor, lr_mov + + mov eax,ebp + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + + section .text align=32 + +; AES Encryption Subroutine + + do_name aes_encrypt + + sub esp,stk_spc + mov [esp+16],ebp + mov [esp+12],ebx + mov [esp+ 8],esi + mov [esp+ 4],edi + + mov esi,[esp+in_blk+stk_spc] ; input pointer + mov eax,[esi ] + mov ebx,[esi+ 4] + mov ecx,[esi+ 8] + mov edx,[esi+12] + + mov ebp,[esp+ctx+stk_spc] ; key pointer + movzx edi,byte [ebp+4*KS_LENGTH] + xor eax,[ebp ] + xor ebx,[ebp+ 4] + xor ecx,[ebp+ 8] + xor edx,[ebp+12] + +; determine the number of rounds + + cmp edi,10*16 + je .3 + cmp edi,12*16 + je .2 + cmp edi,14*16 + je .1 + mov eax,-1 + jmp .5 + +.1: enc_round + enc_round +.2: enc_round + enc_round +.3: enc_round + enc_round + enc_round + enc_round + enc_round + enc_round + enc_round + enc_round + enc_round + enc_last_round + + mov edx,[esp+out_blk+stk_spc] + mov [edx],eax + mov [edx+4],ebx + mov [edx+8],esi + mov [edx+12],edi + xor eax,eax + +.5: mov ebp,[esp+16] + mov ebx,[esp+12] + mov esi,[esp+ 8] + mov edi,[esp+ 4] + add esp,stk_spc + do_exit + +%endif + +%ifdef DECRYPTION + + extern t_in + +%define dtab_0(x) [t_in+4*x] +%define dtab_1(x) [t_in+1024+4*x] +%define dtab_2(x) [t_in+2048+4*x] +%define dtab_3(x) [t_in+3072+4*x] + +%ifdef LAST_ROUND_TABLES + + extern t_il + +%define dltab_0(x) [t_il+4*x] +%define dltab_1(x) [t_il+1024+4*x] +%define dltab_2(x) [t_il+2048+4*x] +%define dltab_3(x) [t_il+3072+4*x] + +%else + + extern _t_ibox + +%define dtab_x(x) byte [_t_ibox+x] + +%endif + +%macro irn_fun 2 + + rol eax,16 + %1 esi, cl, 0, ebp + %1 esi, bh, 1, ebp + %1 esi, al, 2, ebp + %1 edi, dl, 0, ebp + %1 edi, ch, 1, ebp + %1 edi, ah, 3, ebp + %2 ebp, bl, 0, ebp + shr eax,16 + and ebx,0xffff0000 + or ebx,eax + shr ecx,16 + %1 ebp, bh, 1, eax + %1 ebp, ch, 3, eax + %2 eax, cl, 2, ecx + %1 eax, bl, 0, ecx + %1 eax, dh, 1, ecx + shr ebx,16 + shr edx,16 + %1 esi, dh, 3, ecx + %1 ebp, dl, 2, ecx + %1 eax, bh, 3, ecx + %1 edi, bl, 2, ecx + +%endmacro + +; Basic MOV and XOR Operations for normal rounds + +%macro ni_xor 4 + movzx %4,%2 + xor %1,dtab_%3(%4) +%endmacro + +%macro ni_mov 4 + movzx %4,%2 + mov %1,dtab_%3(%4) +%endmacro + +; Basic MOV and XOR Operations for last round + +%ifdef LAST_ROUND_TABLES + +%macro li_xor 4 + movzx %4,%2 + xor %1,dltab_%3(%4) +%endmacro + +%macro li_mov 4 + movzx %4,%2 + mov %1,dltab_%3(%4) +%endmacro + +%else + + %macro li_xor 4 + movzx %4,%2 + movzx %4,dtab_x(%4) + %if %3 != 0 + shl %4,8*%3 + %endif + xor %1,%4 + %endmacro + + %macro li_mov 4 + movzx %4,%2 + movzx %1,dtab_x(%4) + %if %3 != 0 + shl %1,8*%3 + %endif + %endmacro + +%endif + +%macro dec_round 0 + +%ifdef AES_REV_DKS + add ebp,16 +%else + sub ebp,16 +%endif + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + irn_fun ni_xor, ni_mov + + mov ebx,ebp + mov ecx,esi + mov edx,edi + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + +%macro dec_last_round 0 + +%ifdef AES_REV_DKS + add ebp,16 +%else + sub ebp,16 +%endif + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + irn_fun li_xor, li_mov + + mov ebx,ebp + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + + section .text + +; AES Decryption Subroutine + + do_name aes_decrypt + + sub esp,stk_spc + mov [esp+16],ebp + mov [esp+12],ebx + mov [esp+ 8],esi + mov [esp+ 4],edi + +; input four columns and xor in first round key + + mov esi,[esp+in_blk+stk_spc] ; input pointer + mov eax,[esi ] + mov ebx,[esi+ 4] + mov ecx,[esi+ 8] + mov edx,[esi+12] + lea esi,[esi+16] + + mov ebp,[esp+ctx+stk_spc] ; key pointer + movzx edi,byte[ebp+4*KS_LENGTH] +%ifndef AES_REV_DKS ; if decryption key schedule is not reversed + lea ebp,[ebp+edi] ; we have to access it from the top down +%endif + xor eax,[ebp ] ; key schedule + xor ebx,[ebp+ 4] + xor ecx,[ebp+ 8] + xor edx,[ebp+12] + +; determine the number of rounds + + cmp edi,10*16 + je .3 + cmp edi,12*16 + je .2 + cmp edi,14*16 + je .1 + mov eax,-1 + jmp .5 + +.1: dec_round + dec_round +.2: dec_round + dec_round +.3: dec_round + dec_round + dec_round + dec_round + dec_round + dec_round + dec_round + dec_round + dec_round + dec_last_round + +; move final values to the output array. + + mov ebp,[esp+out_blk+stk_spc] + mov [ebp],eax + mov [ebp+4],ebx + mov [ebp+8],esi + mov [ebp+12],edi + xor eax,eax + +.5: mov ebp,[esp+16] + mov ebx,[esp+12] + mov esi,[esp+ 8] + mov edi,[esp+ 4] + add esp,stk_spc + do_exit + +%endif diff --git a/src/Crypto/Aescrypt.c b/src/Crypto/Aescrypt.c index c77ec675..46175981 100644 --- a/src/Crypto/Aescrypt.c +++ b/src/Crypto/Aescrypt.c @@ -1,311 +1,311 @@ -/* - --------------------------------------------------------------------------- - Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. - - LICENSE TERMS - - The free distribution and use of this software is allowed (with or without - changes) provided that: - - 1. source code distributions include the above copyright notice, this - list of conditions and the following disclaimer; - - 2. binary distributions include the above copyright notice, this list - of conditions and the following disclaimer in their documentation; - - 3. the name of the copyright holder is not used to endorse products - built using this software without specific written permission. - - DISCLAIMER - - This software is provided 'as is' with no explicit or implied warranties - in respect of its properties, including, but not limited to, correctness - and/or fitness for purpose. - --------------------------------------------------------------------------- - Issue Date: 20/12/2007 -*/ - -#include "Aesopt.h" -#include "Aestab.h" - -#if defined(__cplusplus) -extern "C" -{ -#endif - -#define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c]) -#define so(y,x,c) word_out(y, c, s(x,c)) - -#if defined(ARRAYS) -#define locals(y,x) x[4],y[4] -#else -#define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3 -#endif - -#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ - s(y,2) = s(x,2); s(y,3) = s(x,3); -#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3) -#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3) -#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3) - -#if ( FUNCS_IN_C & ENCRYPTION_IN_C ) - -/* Visual C++ .Net v7.1 provides the fastest encryption code when using - Pentium optimiation with small code but this is poor for decryption - so we need to control this with the following VC++ pragmas -*/ - -#if defined( _MSC_VER ) && !defined( _WIN64 ) -#pragma optimize( "s", on ) -#endif - -/* Given the column (c) of the output state variable, the following - macros give the input state variables which are needed in its - computation for each row (r) of the state. All the alternative - macros give the same end values but expand into different ways - of calculating these values. In particular the complex macro - used for dynamically variable block sizes is designed to expand - to a compile time constant whenever possible but will expand to - conditional clauses on some branches (I am grateful to Frank - Yellin for this construction) -*/ - -#define fwd_var(x,r,c)\ - ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ - : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\ - : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ - : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))) - -#if defined(FT4_SET) -#undef dec_fmvars -#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c)) -#elif defined(FT1_SET) -#undef dec_fmvars -#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c)) -#else -#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c))) -#endif - -#if defined(FL4_SET) -#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c)) -#elif defined(FL1_SET) -#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c)) -#else -#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(s,box),fwd_var,rf1,c)) -#endif - -AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]) -{ uint_32t locals(b0, b1); - const uint_32t *kp; -#if defined( dec_fmvars ) - dec_fmvars; /* declare variables for fwd_mcol() if needed */ -#endif - -#if defined( AES_ERR_CHK ) - if( cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16 ) - return EXIT_FAILURE; -#endif - - kp = cx->ks; - state_in(b0, in, kp); - -#if (ENC_UNROLL == FULL) - - switch(cx->inf.b[0]) - { - case 14 * 16: - round(fwd_rnd, b1, b0, kp + 1 * N_COLS); - round(fwd_rnd, b0, b1, kp + 2 * N_COLS); - kp += 2 * N_COLS; - case 12 * 16: - round(fwd_rnd, b1, b0, kp + 1 * N_COLS); - round(fwd_rnd, b0, b1, kp + 2 * N_COLS); - kp += 2 * N_COLS; - case 10 * 16: - round(fwd_rnd, b1, b0, kp + 1 * N_COLS); - round(fwd_rnd, b0, b1, kp + 2 * N_COLS); - round(fwd_rnd, b1, b0, kp + 3 * N_COLS); - round(fwd_rnd, b0, b1, kp + 4 * N_COLS); - round(fwd_rnd, b1, b0, kp + 5 * N_COLS); - round(fwd_rnd, b0, b1, kp + 6 * N_COLS); - round(fwd_rnd, b1, b0, kp + 7 * N_COLS); - round(fwd_rnd, b0, b1, kp + 8 * N_COLS); - round(fwd_rnd, b1, b0, kp + 9 * N_COLS); - round(fwd_lrnd, b0, b1, kp +10 * N_COLS); - } - -#else - -#if (ENC_UNROLL == PARTIAL) - { uint_32t rnd; - for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd) - { - kp += N_COLS; - round(fwd_rnd, b1, b0, kp); - kp += N_COLS; - round(fwd_rnd, b0, b1, kp); - } - kp += N_COLS; - round(fwd_rnd, b1, b0, kp); -#else - { uint_32t rnd; - for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd) - { - kp += N_COLS; - round(fwd_rnd, b1, b0, kp); - l_copy(b0, b1); - } -#endif - kp += N_COLS; - round(fwd_lrnd, b0, b1, kp); - } -#endif - - state_out(out, b0); - -#if defined( AES_ERR_CHK ) - return EXIT_SUCCESS; -#endif -} - -#endif - -#if ( FUNCS_IN_C & DECRYPTION_IN_C) - -/* Visual C++ .Net v7.1 provides the fastest encryption code when using - Pentium optimiation with small code but this is poor for decryption - so we need to control this with the following VC++ pragmas -*/ - -#if defined( _MSC_VER ) && !defined( _WIN64 ) -#pragma optimize( "t", on ) -#endif - -/* Given the column (c) of the output state variable, the following - macros give the input state variables which are needed in its - computation for each row (r) of the state. All the alternative - macros give the same end values but expand into different ways - of calculating these values. In particular the complex macro - used for dynamically variable block sizes is designed to expand - to a compile time constant whenever possible but will expand to - conditional clauses on some branches (I am grateful to Frank - Yellin for this construction) -*/ - -#define inv_var(x,r,c)\ - ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ - : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\ - : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ - : ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))) - -#if defined(IT4_SET) -#undef dec_imvars -#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c)) -#elif defined(IT1_SET) -#undef dec_imvars -#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c)) -#else -#define inv_rnd(y,x,k,c) (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c))) -#endif - -#if defined(IL4_SET) -#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c)) -#elif defined(IL1_SET) -#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c)) -#else -#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c)) -#endif - -/* This code can work with the decryption key schedule in the */ -/* order that is used for encrytpion (where the 1st decryption */ -/* round key is at the high end ot the schedule) or with a key */ -/* schedule that has been reversed to put the 1st decryption */ -/* round key at the low end of the schedule in memory (when */ -/* AES_REV_DKS is defined) */ - -#ifdef AES_REV_DKS -#define key_ofs 0 -#define rnd_key(n) (kp + n * N_COLS) -#else -#define key_ofs 1 -#define rnd_key(n) (kp - n * N_COLS) -#endif - -AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]) -{ uint_32t locals(b0, b1); -#if defined( dec_imvars ) - dec_imvars; /* declare variables for inv_mcol() if needed */ -#endif - const uint_32t *kp; - -#if defined( AES_ERR_CHK ) - if( cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16 ) - return EXIT_FAILURE; -#endif - - kp = cx->ks + (key_ofs ? (cx->inf.b[0] >> 2) : 0); - state_in(b0, in, kp); - -#if (DEC_UNROLL == FULL) - - kp = cx->ks + (key_ofs ? 0 : (cx->inf.b[0] >> 2)); - switch(cx->inf.b[0]) - { - case 14 * 16: - round(inv_rnd, b1, b0, rnd_key(-13)); - round(inv_rnd, b0, b1, rnd_key(-12)); - case 12 * 16: - round(inv_rnd, b1, b0, rnd_key(-11)); - round(inv_rnd, b0, b1, rnd_key(-10)); - case 10 * 16: - round(inv_rnd, b1, b0, rnd_key(-9)); - round(inv_rnd, b0, b1, rnd_key(-8)); - round(inv_rnd, b1, b0, rnd_key(-7)); - round(inv_rnd, b0, b1, rnd_key(-6)); - round(inv_rnd, b1, b0, rnd_key(-5)); - round(inv_rnd, b0, b1, rnd_key(-4)); - round(inv_rnd, b1, b0, rnd_key(-3)); - round(inv_rnd, b0, b1, rnd_key(-2)); - round(inv_rnd, b1, b0, rnd_key(-1)); - round(inv_lrnd, b0, b1, rnd_key( 0)); - } - -#else - -#if (DEC_UNROLL == PARTIAL) - { uint_32t rnd; - for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd) - { - kp = rnd_key(1); - round(inv_rnd, b1, b0, kp); - kp = rnd_key(1); - round(inv_rnd, b0, b1, kp); - } - kp = rnd_key(1); - round(inv_rnd, b1, b0, kp); -#else - { uint_32t rnd; - for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd) - { - kp = rnd_key(1); - round(inv_rnd, b1, b0, kp); - l_copy(b0, b1); - } -#endif - kp = rnd_key(1); - round(inv_lrnd, b0, b1, kp); - } -#endif - - state_out(out, b0); - -#if defined( AES_ERR_CHK ) - return EXIT_SUCCESS; -#endif -} - -#endif - -#if defined(__cplusplus) -} -#endif +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 +*/ + +#include "Aesopt.h" +#include "Aestab.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c]) +#define so(y,x,c) word_out(y, c, s(x,c)) + +#if defined(ARRAYS) +#define locals(y,x) x[4],y[4] +#else +#define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3 +#endif + +#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ + s(y,2) = s(x,2); s(y,3) = s(x,3); +#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3) +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3) +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3) + +#if ( FUNCS_IN_C & ENCRYPTION_IN_C ) + +/* Visual C++ .Net v7.1 provides the fastest encryption code when using + Pentium optimiation with small code but this is poor for decryption + so we need to control this with the following VC++ pragmas +*/ + +#if defined( _MSC_VER ) && !defined( _WIN64 ) +#pragma optimize( "s", on ) +#endif + +/* Given the column (c) of the output state variable, the following + macros give the input state variables which are needed in its + computation for each row (r) of the state. All the alternative + macros give the same end values but expand into different ways + of calculating these values. In particular the complex macro + used for dynamically variable block sizes is designed to expand + to a compile time constant whenever possible but will expand to + conditional clauses on some branches (I am grateful to Frank + Yellin for this construction) +*/ + +#define fwd_var(x,r,c)\ + ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ + : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\ + : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ + : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))) + +#if defined(FT4_SET) +#undef dec_fmvars +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c)) +#elif defined(FT1_SET) +#undef dec_fmvars +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c)) +#else +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c))) +#endif + +#if defined(FL4_SET) +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c)) +#elif defined(FL1_SET) +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c)) +#else +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(s,box),fwd_var,rf1,c)) +#endif + +AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]) +{ uint_32t locals(b0, b1); + const uint_32t *kp; +#if defined( dec_fmvars ) + dec_fmvars; /* declare variables for fwd_mcol() if needed */ +#endif + +#if defined( AES_ERR_CHK ) + if( cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16 ) + return EXIT_FAILURE; +#endif + + kp = cx->ks; + state_in(b0, in, kp); + +#if (ENC_UNROLL == FULL) + + switch(cx->inf.b[0]) + { + case 14 * 16: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + kp += 2 * N_COLS; + case 12 * 16: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + kp += 2 * N_COLS; + case 10 * 16: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + round(fwd_rnd, b1, b0, kp + 3 * N_COLS); + round(fwd_rnd, b0, b1, kp + 4 * N_COLS); + round(fwd_rnd, b1, b0, kp + 5 * N_COLS); + round(fwd_rnd, b0, b1, kp + 6 * N_COLS); + round(fwd_rnd, b1, b0, kp + 7 * N_COLS); + round(fwd_rnd, b0, b1, kp + 8 * N_COLS); + round(fwd_rnd, b1, b0, kp + 9 * N_COLS); + round(fwd_lrnd, b0, b1, kp +10 * N_COLS); + } + +#else + +#if (ENC_UNROLL == PARTIAL) + { uint_32t rnd; + for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd) + { + kp += N_COLS; + round(fwd_rnd, b1, b0, kp); + kp += N_COLS; + round(fwd_rnd, b0, b1, kp); + } + kp += N_COLS; + round(fwd_rnd, b1, b0, kp); +#else + { uint_32t rnd; + for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd) + { + kp += N_COLS; + round(fwd_rnd, b1, b0, kp); + l_copy(b0, b1); + } +#endif + kp += N_COLS; + round(fwd_lrnd, b0, b1, kp); + } +#endif + + state_out(out, b0); + +#if defined( AES_ERR_CHK ) + return EXIT_SUCCESS; +#endif +} + +#endif + +#if ( FUNCS_IN_C & DECRYPTION_IN_C) + +/* Visual C++ .Net v7.1 provides the fastest encryption code when using + Pentium optimiation with small code but this is poor for decryption + so we need to control this with the following VC++ pragmas +*/ + +#if defined( _MSC_VER ) && !defined( _WIN64 ) +#pragma optimize( "t", on ) +#endif + +/* Given the column (c) of the output state variable, the following + macros give the input state variables which are needed in its + computation for each row (r) of the state. All the alternative + macros give the same end values but expand into different ways + of calculating these values. In particular the complex macro + used for dynamically variable block sizes is designed to expand + to a compile time constant whenever possible but will expand to + conditional clauses on some branches (I am grateful to Frank + Yellin for this construction) +*/ + +#define inv_var(x,r,c)\ + ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ + : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\ + : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ + : ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))) + +#if defined(IT4_SET) +#undef dec_imvars +#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c)) +#elif defined(IT1_SET) +#undef dec_imvars +#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c)) +#else +#define inv_rnd(y,x,k,c) (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c))) +#endif + +#if defined(IL4_SET) +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c)) +#elif defined(IL1_SET) +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c)) +#else +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c)) +#endif + +/* This code can work with the decryption key schedule in the */ +/* order that is used for encrytpion (where the 1st decryption */ +/* round key is at the high end ot the schedule) or with a key */ +/* schedule that has been reversed to put the 1st decryption */ +/* round key at the low end of the schedule in memory (when */ +/* AES_REV_DKS is defined) */ + +#ifdef AES_REV_DKS +#define key_ofs 0 +#define rnd_key(n) (kp + n * N_COLS) +#else +#define key_ofs 1 +#define rnd_key(n) (kp - n * N_COLS) +#endif + +AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]) +{ uint_32t locals(b0, b1); +#if defined( dec_imvars ) + dec_imvars; /* declare variables for inv_mcol() if needed */ +#endif + const uint_32t *kp; + +#if defined( AES_ERR_CHK ) + if( cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16 ) + return EXIT_FAILURE; +#endif + + kp = cx->ks + (key_ofs ? (cx->inf.b[0] >> 2) : 0); + state_in(b0, in, kp); + +#if (DEC_UNROLL == FULL) + + kp = cx->ks + (key_ofs ? 0 : (cx->inf.b[0] >> 2)); + switch(cx->inf.b[0]) + { + case 14 * 16: + round(inv_rnd, b1, b0, rnd_key(-13)); + round(inv_rnd, b0, b1, rnd_key(-12)); + case 12 * 16: + round(inv_rnd, b1, b0, rnd_key(-11)); + round(inv_rnd, b0, b1, rnd_key(-10)); + case 10 * 16: + round(inv_rnd, b1, b0, rnd_key(-9)); + round(inv_rnd, b0, b1, rnd_key(-8)); + round(inv_rnd, b1, b0, rnd_key(-7)); + round(inv_rnd, b0, b1, rnd_key(-6)); + round(inv_rnd, b1, b0, rnd_key(-5)); + round(inv_rnd, b0, b1, rnd_key(-4)); + round(inv_rnd, b1, b0, rnd_key(-3)); + round(inv_rnd, b0, b1, rnd_key(-2)); + round(inv_rnd, b1, b0, rnd_key(-1)); + round(inv_lrnd, b0, b1, rnd_key( 0)); + } + +#else + +#if (DEC_UNROLL == PARTIAL) + { uint_32t rnd; + for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd) + { + kp = rnd_key(1); + round(inv_rnd, b1, b0, kp); + kp = rnd_key(1); + round(inv_rnd, b0, b1, kp); + } + kp = rnd_key(1); + round(inv_rnd, b1, b0, kp); +#else + { uint_32t rnd; + for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd) + { + kp = rnd_key(1); + round(inv_rnd, b1, b0, kp); + l_copy(b0, b1); + } +#endif + kp = rnd_key(1); + round(inv_lrnd, b0, b1, kp); + } +#endif + + state_out(out, b0); + +#if defined( AES_ERR_CHK ) + return EXIT_SUCCESS; +#endif +} + +#endif + +#if defined(__cplusplus) +} +#endif diff --git a/src/Crypto/Aeskey.c b/src/Crypto/Aeskey.c index 948b9238..c9ab0269 100644 --- a/src/Crypto/Aeskey.c +++ b/src/Crypto/Aeskey.c @@ -1,573 +1,573 @@ -/* - --------------------------------------------------------------------------- - Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. - - LICENSE TERMS - - The free distribution and use of this software is allowed (with or without - changes) provided that: - - 1. source code distributions include the above copyright notice, this - list of conditions and the following disclaimer; - - 2. binary distributions include the above copyright notice, this list - of conditions and the following disclaimer in their documentation; - - 3. the name of the copyright holder is not used to endorse products - built using this software without specific written permission. - - DISCLAIMER - - This software is provided 'as is' with no explicit or implied warranties - in respect of its properties, including, but not limited to, correctness - and/or fitness for purpose. - --------------------------------------------------------------------------- - Issue Date: 20/12/2007 -*/ - -#include "Aesopt.h" -#include "Aestab.h" - -#ifdef USE_VIA_ACE_IF_PRESENT -# include "aes_via_ace.h" -#endif - -#if defined(__cplusplus) -extern "C" -{ -#endif - -/* Initialise the key schedule from the user supplied key. The key - length can be specified in bytes, with legal values of 16, 24 - and 32, or in bits, with legal values of 128, 192 and 256. These - values correspond with Nk values of 4, 6 and 8 respectively. - - The following macros implement a single cycle in the key - schedule generation process. The number of cycles needed - for each cx->n_col and nk value is: - - nk = 4 5 6 7 8 - ------------------------------ - cx->n_col = 4 10 9 8 7 7 - cx->n_col = 5 14 11 10 9 9 - cx->n_col = 6 19 15 12 11 11 - cx->n_col = 7 21 19 16 13 14 - cx->n_col = 8 29 23 19 17 14 -*/ - -#if (FUNCS_IN_C & ENC_KEYING_IN_C) - -#if defined(AES_128) || defined(AES_VAR) - -#define ke4(k,i) \ -{ k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; \ - k[4*(i)+5] = ss[1] ^= ss[0]; \ - k[4*(i)+6] = ss[2] ^= ss[1]; \ - k[4*(i)+7] = ss[3] ^= ss[2]; \ -} - -AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]) -{ uint_32t ss[4]; - - cx->ks[0] = ss[0] = word_in(key, 0); - cx->ks[1] = ss[1] = word_in(key, 1); - cx->ks[2] = ss[2] = word_in(key, 2); - cx->ks[3] = ss[3] = word_in(key, 3); - -#if ENC_UNROLL == NONE - { uint_32t i; - for(i = 0; i < 9; ++i) - ke4(cx->ks, i); - } -#else - ke4(cx->ks, 0); ke4(cx->ks, 1); - ke4(cx->ks, 2); ke4(cx->ks, 3); - ke4(cx->ks, 4); ke4(cx->ks, 5); - ke4(cx->ks, 6); ke4(cx->ks, 7); - ke4(cx->ks, 8); -#endif - ke4(cx->ks, 9); - cx->inf.l = 0; - cx->inf.b[0] = 10 * 16; - -#ifdef USE_VIA_ACE_IF_PRESENT - if(VIA_ACE_AVAILABLE) - cx->inf.b[1] = 0xff; -#endif - -#if defined( AES_ERR_CHK ) - return EXIT_SUCCESS; -#endif -} - -#endif - -#if defined(AES_192) || defined(AES_VAR) - -#define kef6(k,i) \ -{ k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; \ - k[6*(i)+ 7] = ss[1] ^= ss[0]; \ - k[6*(i)+ 8] = ss[2] ^= ss[1]; \ - k[6*(i)+ 9] = ss[3] ^= ss[2]; \ -} - -#define ke6(k,i) \ -{ kef6(k,i); \ - k[6*(i)+10] = ss[4] ^= ss[3]; \ - k[6*(i)+11] = ss[5] ^= ss[4]; \ -} - -AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]) -{ uint_32t ss[6]; - - cx->ks[0] = ss[0] = word_in(key, 0); - cx->ks[1] = ss[1] = word_in(key, 1); - cx->ks[2] = ss[2] = word_in(key, 2); - cx->ks[3] = ss[3] = word_in(key, 3); - cx->ks[4] = ss[4] = word_in(key, 4); - cx->ks[5] = ss[5] = word_in(key, 5); - -#if ENC_UNROLL == NONE - { uint_32t i; - for(i = 0; i < 7; ++i) - ke6(cx->ks, i); - } -#else - ke6(cx->ks, 0); ke6(cx->ks, 1); - ke6(cx->ks, 2); ke6(cx->ks, 3); - ke6(cx->ks, 4); ke6(cx->ks, 5); - ke6(cx->ks, 6); -#endif - kef6(cx->ks, 7); - cx->inf.l = 0; - cx->inf.b[0] = 12 * 16; - -#ifdef USE_VIA_ACE_IF_PRESENT - if(VIA_ACE_AVAILABLE) - cx->inf.b[1] = 0xff; -#endif - -#if defined( AES_ERR_CHK ) - return EXIT_SUCCESS; -#endif -} - -#endif - -#if defined(AES_256) || defined(AES_VAR) - -#define kef8(k,i) \ -{ k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; \ - k[8*(i)+ 9] = ss[1] ^= ss[0]; \ - k[8*(i)+10] = ss[2] ^= ss[1]; \ - k[8*(i)+11] = ss[3] ^= ss[2]; \ -} - -#define ke8(k,i) \ -{ kef8(k,i); \ - k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \ - k[8*(i)+13] = ss[5] ^= ss[4]; \ - k[8*(i)+14] = ss[6] ^= ss[5]; \ - k[8*(i)+15] = ss[7] ^= ss[6]; \ -} - -AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]) -{ uint_32t ss[8]; - - cx->ks[0] = ss[0] = word_in(key, 0); - cx->ks[1] = ss[1] = word_in(key, 1); - cx->ks[2] = ss[2] = word_in(key, 2); - cx->ks[3] = ss[3] = word_in(key, 3); - cx->ks[4] = ss[4] = word_in(key, 4); - cx->ks[5] = ss[5] = word_in(key, 5); - cx->ks[6] = ss[6] = word_in(key, 6); - cx->ks[7] = ss[7] = word_in(key, 7); - -#if ENC_UNROLL == NONE - { uint_32t i; - for(i = 0; i < 6; ++i) - ke8(cx->ks, i); - } -#else - ke8(cx->ks, 0); ke8(cx->ks, 1); - ke8(cx->ks, 2); ke8(cx->ks, 3); - ke8(cx->ks, 4); ke8(cx->ks, 5); -#endif - kef8(cx->ks, 6); - cx->inf.l = 0; - cx->inf.b[0] = 14 * 16; - -#ifdef USE_VIA_ACE_IF_PRESENT - if(VIA_ACE_AVAILABLE) - cx->inf.b[1] = 0xff; -#endif - -#if defined( AES_ERR_CHK ) - return EXIT_SUCCESS; -#endif -} - -#endif - -#if defined(AES_VAR) - -AES_RETURN aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]) -{ - switch(key_len) - { -#if defined( AES_ERR_CHK ) - case 16: case 128: return aes_encrypt_key128(key, cx); - case 24: case 192: return aes_encrypt_key192(key, cx); - case 32: case 256: return aes_encrypt_key256(key, cx); - default: return EXIT_FAILURE; -#else - case 16: case 128: aes_encrypt_key128(key, cx); return; - case 24: case 192: aes_encrypt_key192(key, cx); return; - case 32: case 256: aes_encrypt_key256(key, cx); return; -#endif - } -} - -#endif - -#endif - -#if (FUNCS_IN_C & DEC_KEYING_IN_C) - -/* this is used to store the decryption round keys */ -/* in forward or reverse order */ - -#ifdef AES_REV_DKS -#define v(n,i) ((n) - (i) + 2 * ((i) & 3)) -#else -#define v(n,i) (i) -#endif - -#if DEC_ROUND == NO_TABLES -#define ff(x) (x) -#else -#define ff(x) inv_mcol(x) -#if defined( dec_imvars ) -#define d_vars dec_imvars -#endif -#endif - -#if defined(AES_128) || defined(AES_VAR) - -#define k4e(k,i) \ -{ k[v(40,(4*(i))+4)] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; \ - k[v(40,(4*(i))+5)] = ss[1] ^= ss[0]; \ - k[v(40,(4*(i))+6)] = ss[2] ^= ss[1]; \ - k[v(40,(4*(i))+7)] = ss[3] ^= ss[2]; \ -} - -#if 1 - -#define kdf4(k,i) \ -{ ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \ - ss[1] = ss[1] ^ ss[3]; \ - ss[2] = ss[2] ^ ss[3]; \ - ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; \ - ss[i % 4] ^= ss[4]; \ - ss[4] ^= k[v(40,(4*(i)))]; k[v(40,(4*(i))+4)] = ff(ss[4]); \ - ss[4] ^= k[v(40,(4*(i))+1)]; k[v(40,(4*(i))+5)] = ff(ss[4]); \ - ss[4] ^= k[v(40,(4*(i))+2)]; k[v(40,(4*(i))+6)] = ff(ss[4]); \ - ss[4] ^= k[v(40,(4*(i))+3)]; k[v(40,(4*(i))+7)] = ff(ss[4]); \ -} - -#define kd4(k,i) \ -{ ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; \ - ss[i % 4] ^= ss[4]; ss[4] = ff(ss[4]); \ - k[v(40,(4*(i))+4)] = ss[4] ^= k[v(40,(4*(i)))]; \ - k[v(40,(4*(i))+5)] = ss[4] ^= k[v(40,(4*(i))+1)]; \ - k[v(40,(4*(i))+6)] = ss[4] ^= k[v(40,(4*(i))+2)]; \ - k[v(40,(4*(i))+7)] = ss[4] ^= k[v(40,(4*(i))+3)]; \ -} - -#define kdl4(k,i) \ -{ ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; \ - k[v(40,(4*(i))+4)] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \ - k[v(40,(4*(i))+5)] = ss[1] ^ ss[3]; \ - k[v(40,(4*(i))+6)] = ss[0]; \ - k[v(40,(4*(i))+7)] = ss[1]; \ -} - -#else - -#define kdf4(k,i) \ -{ ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[v(40,(4*(i))+ 4)] = ff(ss[0]); \ - ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ff(ss[1]); \ - ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ff(ss[2]); \ - ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ff(ss[3]); \ -} - -#define kd4(k,i) \ -{ ss[4] = ls_box(ss[3],3) ^ t_use(r,c)[i]; \ - ss[0] ^= ss[4]; ss[4] = ff(ss[4]); k[v(40,(4*(i))+ 4)] = ss[4] ^= k[v(40,(4*(i)))]; \ - ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ss[4] ^= k[v(40,(4*(i))+ 1)]; \ - ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ss[4] ^= k[v(40,(4*(i))+ 2)]; \ - ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ss[4] ^= k[v(40,(4*(i))+ 3)]; \ -} - -#define kdl4(k,i) \ -{ ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[v(40,(4*(i))+ 4)] = ss[0]; \ - ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ss[1]; \ - ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ss[2]; \ - ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ss[3]; \ -} - -#endif - -AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]) -{ uint_32t ss[5]; -#if defined( d_vars ) - d_vars; -#endif - cx->ks[v(40,(0))] = ss[0] = word_in(key, 0); - cx->ks[v(40,(1))] = ss[1] = word_in(key, 1); - cx->ks[v(40,(2))] = ss[2] = word_in(key, 2); - cx->ks[v(40,(3))] = ss[3] = word_in(key, 3); - -#if DEC_UNROLL == NONE - { uint_32t i; - for(i = 0; i < 10; ++i) - k4e(cx->ks, i); -#if !(DEC_ROUND == NO_TABLES) - for(i = N_COLS; i < 10 * N_COLS; ++i) - cx->ks[i] = inv_mcol(cx->ks[i]); -#endif - } -#else - kdf4(cx->ks, 0); kd4(cx->ks, 1); - kd4(cx->ks, 2); kd4(cx->ks, 3); - kd4(cx->ks, 4); kd4(cx->ks, 5); - kd4(cx->ks, 6); kd4(cx->ks, 7); - kd4(cx->ks, 8); kdl4(cx->ks, 9); -#endif - cx->inf.l = 0; - cx->inf.b[0] = 10 * 16; - -#ifdef USE_VIA_ACE_IF_PRESENT - if(VIA_ACE_AVAILABLE) - cx->inf.b[1] = 0xff; -#endif - -#if defined( AES_ERR_CHK ) - return EXIT_SUCCESS; -#endif -} - -#endif - -#if defined(AES_192) || defined(AES_VAR) - -#define k6ef(k,i) \ -{ k[v(48,(6*(i))+ 6)] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; \ - k[v(48,(6*(i))+ 7)] = ss[1] ^= ss[0]; \ - k[v(48,(6*(i))+ 8)] = ss[2] ^= ss[1]; \ - k[v(48,(6*(i))+ 9)] = ss[3] ^= ss[2]; \ -} - -#define k6e(k,i) \ -{ k6ef(k,i); \ - k[v(48,(6*(i))+10)] = ss[4] ^= ss[3]; \ - k[v(48,(6*(i))+11)] = ss[5] ^= ss[4]; \ -} - -#define kdf6(k,i) \ -{ ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[v(48,(6*(i))+ 6)] = ff(ss[0]); \ - ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ff(ss[1]); \ - ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ff(ss[2]); \ - ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ff(ss[3]); \ - ss[4] ^= ss[3]; k[v(48,(6*(i))+10)] = ff(ss[4]); \ - ss[5] ^= ss[4]; k[v(48,(6*(i))+11)] = ff(ss[5]); \ -} - -#define kd6(k,i) \ -{ ss[6] = ls_box(ss[5],3) ^ t_use(r,c)[i]; \ - ss[0] ^= ss[6]; ss[6] = ff(ss[6]); k[v(48,(6*(i))+ 6)] = ss[6] ^= k[v(48,(6*(i)))]; \ - ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ss[6] ^= k[v(48,(6*(i))+ 1)]; \ - ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ss[6] ^= k[v(48,(6*(i))+ 2)]; \ - ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ss[6] ^= k[v(48,(6*(i))+ 3)]; \ - ss[4] ^= ss[3]; k[v(48,(6*(i))+10)] = ss[6] ^= k[v(48,(6*(i))+ 4)]; \ - ss[5] ^= ss[4]; k[v(48,(6*(i))+11)] = ss[6] ^= k[v(48,(6*(i))+ 5)]; \ -} - -#define kdl6(k,i) \ -{ ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[v(48,(6*(i))+ 6)] = ss[0]; \ - ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ss[1]; \ - ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ss[2]; \ - ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ss[3]; \ -} - -AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]) -{ uint_32t ss[7]; -#if defined( d_vars ) - d_vars; -#endif - cx->ks[v(48,(0))] = ss[0] = word_in(key, 0); - cx->ks[v(48,(1))] = ss[1] = word_in(key, 1); - cx->ks[v(48,(2))] = ss[2] = word_in(key, 2); - cx->ks[v(48,(3))] = ss[3] = word_in(key, 3); - -#if DEC_UNROLL == NONE - cx->ks[v(48,(4))] = ss[4] = word_in(key, 4); - cx->ks[v(48,(5))] = ss[5] = word_in(key, 5); - { uint_32t i; - - for(i = 0; i < 7; ++i) - k6e(cx->ks, i); - k6ef(cx->ks, 7); -#if !(DEC_ROUND == NO_TABLES) - for(i = N_COLS; i < 12 * N_COLS; ++i) - cx->ks[i] = inv_mcol(cx->ks[i]); -#endif - } -#else - cx->ks[v(48,(4))] = ff(ss[4] = word_in(key, 4)); - cx->ks[v(48,(5))] = ff(ss[5] = word_in(key, 5)); - kdf6(cx->ks, 0); kd6(cx->ks, 1); - kd6(cx->ks, 2); kd6(cx->ks, 3); - kd6(cx->ks, 4); kd6(cx->ks, 5); - kd6(cx->ks, 6); kdl6(cx->ks, 7); -#endif - cx->inf.l = 0; - cx->inf.b[0] = 12 * 16; - -#ifdef USE_VIA_ACE_IF_PRESENT - if(VIA_ACE_AVAILABLE) - cx->inf.b[1] = 0xff; -#endif - -#if defined( AES_ERR_CHK ) - return EXIT_SUCCESS; -#endif -} - -#endif - -#if defined(AES_256) || defined(AES_VAR) - -#define k8ef(k,i) \ -{ k[v(56,(8*(i))+ 8)] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; \ - k[v(56,(8*(i))+ 9)] = ss[1] ^= ss[0]; \ - k[v(56,(8*(i))+10)] = ss[2] ^= ss[1]; \ - k[v(56,(8*(i))+11)] = ss[3] ^= ss[2]; \ -} - -#define k8e(k,i) \ -{ k8ef(k,i); \ - k[v(56,(8*(i))+12)] = ss[4] ^= ls_box(ss[3],0); \ - k[v(56,(8*(i))+13)] = ss[5] ^= ss[4]; \ - k[v(56,(8*(i))+14)] = ss[6] ^= ss[5]; \ - k[v(56,(8*(i))+15)] = ss[7] ^= ss[6]; \ -} - -#define kdf8(k,i) \ -{ ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[v(56,(8*(i))+ 8)] = ff(ss[0]); \ - ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ff(ss[1]); \ - ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ff(ss[2]); \ - ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ff(ss[3]); \ - ss[4] ^= ls_box(ss[3],0); k[v(56,(8*(i))+12)] = ff(ss[4]); \ - ss[5] ^= ss[4]; k[v(56,(8*(i))+13)] = ff(ss[5]); \ - ss[6] ^= ss[5]; k[v(56,(8*(i))+14)] = ff(ss[6]); \ - ss[7] ^= ss[6]; k[v(56,(8*(i))+15)] = ff(ss[7]); \ -} - -#define kd8(k,i) \ -{ ss[8] = ls_box(ss[7],3) ^ t_use(r,c)[i]; \ - ss[0] ^= ss[8]; ss[8] = ff(ss[8]); k[v(56,(8*(i))+ 8)] = ss[8] ^= k[v(56,(8*(i)))]; \ - ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ss[8] ^= k[v(56,(8*(i))+ 1)]; \ - ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ss[8] ^= k[v(56,(8*(i))+ 2)]; \ - ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ss[8] ^= k[v(56,(8*(i))+ 3)]; \ - ss[8] = ls_box(ss[3],0); \ - ss[4] ^= ss[8]; ss[8] = ff(ss[8]); k[v(56,(8*(i))+12)] = ss[8] ^= k[v(56,(8*(i))+ 4)]; \ - ss[5] ^= ss[4]; k[v(56,(8*(i))+13)] = ss[8] ^= k[v(56,(8*(i))+ 5)]; \ - ss[6] ^= ss[5]; k[v(56,(8*(i))+14)] = ss[8] ^= k[v(56,(8*(i))+ 6)]; \ - ss[7] ^= ss[6]; k[v(56,(8*(i))+15)] = ss[8] ^= k[v(56,(8*(i))+ 7)]; \ -} - -#define kdl8(k,i) \ -{ ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[v(56,(8*(i))+ 8)] = ss[0]; \ - ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ss[1]; \ - ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ss[2]; \ - ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ss[3]; \ -} - -AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]) -{ uint_32t ss[9]; -#if defined( d_vars ) - d_vars; -#endif - cx->ks[v(56,(0))] = ss[0] = word_in(key, 0); - cx->ks[v(56,(1))] = ss[1] = word_in(key, 1); - cx->ks[v(56,(2))] = ss[2] = word_in(key, 2); - cx->ks[v(56,(3))] = ss[3] = word_in(key, 3); - -#if DEC_UNROLL == NONE - cx->ks[v(56,(4))] = ss[4] = word_in(key, 4); - cx->ks[v(56,(5))] = ss[5] = word_in(key, 5); - cx->ks[v(56,(6))] = ss[6] = word_in(key, 6); - cx->ks[v(56,(7))] = ss[7] = word_in(key, 7); - { uint_32t i; - - for(i = 0; i < 6; ++i) - k8e(cx->ks, i); - k8ef(cx->ks, 6); -#if !(DEC_ROUND == NO_TABLES) - for(i = N_COLS; i < 14 * N_COLS; ++i) - cx->ks[i] = inv_mcol(cx->ks[i]); - -#endif - } -#else - ss[4] = word_in(key, 4); cx->ks[v(56,(4))] = ff(ss[4]); - ss[5] = word_in(key, 5); cx->ks[v(56,(5))] = ff(ss[5]); - ss[6] = word_in(key, 6); cx->ks[v(56,(6))] = ff(ss[6]); - ss[7] = word_in(key, 7); cx->ks[v(56,(7))] = ff(ss[7]); - kdf8(cx->ks, 0); kd8(cx->ks, 1); - kd8(cx->ks, 2); kd8(cx->ks, 3); - kd8(cx->ks, 4); kd8(cx->ks, 5); - kdl8(cx->ks, 6); -#endif - cx->inf.l = 0; - cx->inf.b[0] = 14 * 16; - -#ifdef USE_VIA_ACE_IF_PRESENT - if(VIA_ACE_AVAILABLE) - cx->inf.b[1] = 0xff; -#endif - -#if defined( AES_ERR_CHK ) - return EXIT_SUCCESS; -#endif -} - -#endif - -#if defined(AES_VAR) - -AES_RETURN aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]) -{ - switch(key_len) - { -#if defined( AES_ERR_CHK ) - case 16: case 128: return aes_decrypt_key128(key, cx); - case 24: case 192: return aes_decrypt_key192(key, cx); - case 32: case 256: return aes_decrypt_key256(key, cx); - default: return EXIT_FAILURE; -#else - case 16: case 128: aes_decrypt_key128(key, cx); return; - case 24: case 192: aes_decrypt_key192(key, cx); return; - case 32: case 256: aes_decrypt_key256(key, cx); return; -#endif - } -} - -#endif - -#endif - -#if defined(__cplusplus) -} -#endif +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 +*/ + +#include "Aesopt.h" +#include "Aestab.h" + +#ifdef USE_VIA_ACE_IF_PRESENT +# include "aes_via_ace.h" +#endif + +#if defined(__cplusplus) +extern "C" +{ +#endif + +/* Initialise the key schedule from the user supplied key. The key + length can be specified in bytes, with legal values of 16, 24 + and 32, or in bits, with legal values of 128, 192 and 256. These + values correspond with Nk values of 4, 6 and 8 respectively. + + The following macros implement a single cycle in the key + schedule generation process. The number of cycles needed + for each cx->n_col and nk value is: + + nk = 4 5 6 7 8 + ------------------------------ + cx->n_col = 4 10 9 8 7 7 + cx->n_col = 5 14 11 10 9 9 + cx->n_col = 6 19 15 12 11 11 + cx->n_col = 7 21 19 16 13 14 + cx->n_col = 8 29 23 19 17 14 +*/ + +#if (FUNCS_IN_C & ENC_KEYING_IN_C) + +#if defined(AES_128) || defined(AES_VAR) + +#define ke4(k,i) \ +{ k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; \ + k[4*(i)+5] = ss[1] ^= ss[0]; \ + k[4*(i)+6] = ss[2] ^= ss[1]; \ + k[4*(i)+7] = ss[3] ^= ss[2]; \ +} + +AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ uint_32t ss[4]; + + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + +#if ENC_UNROLL == NONE + { uint_32t i; + for(i = 0; i < 9; ++i) + ke4(cx->ks, i); + } +#else + ke4(cx->ks, 0); ke4(cx->ks, 1); + ke4(cx->ks, 2); ke4(cx->ks, 3); + ke4(cx->ks, 4); ke4(cx->ks, 5); + ke4(cx->ks, 6); ke4(cx->ks, 7); + ke4(cx->ks, 8); +#endif + ke4(cx->ks, 9); + cx->inf.l = 0; + cx->inf.b[0] = 10 * 16; + +#ifdef USE_VIA_ACE_IF_PRESENT + if(VIA_ACE_AVAILABLE) + cx->inf.b[1] = 0xff; +#endif + +#if defined( AES_ERR_CHK ) + return EXIT_SUCCESS; +#endif +} + +#endif + +#if defined(AES_192) || defined(AES_VAR) + +#define kef6(k,i) \ +{ k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; \ + k[6*(i)+ 7] = ss[1] ^= ss[0]; \ + k[6*(i)+ 8] = ss[2] ^= ss[1]; \ + k[6*(i)+ 9] = ss[3] ^= ss[2]; \ +} + +#define ke6(k,i) \ +{ kef6(k,i); \ + k[6*(i)+10] = ss[4] ^= ss[3]; \ + k[6*(i)+11] = ss[5] ^= ss[4]; \ +} + +AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ uint_32t ss[6]; + + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + cx->ks[4] = ss[4] = word_in(key, 4); + cx->ks[5] = ss[5] = word_in(key, 5); + +#if ENC_UNROLL == NONE + { uint_32t i; + for(i = 0; i < 7; ++i) + ke6(cx->ks, i); + } +#else + ke6(cx->ks, 0); ke6(cx->ks, 1); + ke6(cx->ks, 2); ke6(cx->ks, 3); + ke6(cx->ks, 4); ke6(cx->ks, 5); + ke6(cx->ks, 6); +#endif + kef6(cx->ks, 7); + cx->inf.l = 0; + cx->inf.b[0] = 12 * 16; + +#ifdef USE_VIA_ACE_IF_PRESENT + if(VIA_ACE_AVAILABLE) + cx->inf.b[1] = 0xff; +#endif + +#if defined( AES_ERR_CHK ) + return EXIT_SUCCESS; +#endif +} + +#endif + +#if defined(AES_256) || defined(AES_VAR) + +#define kef8(k,i) \ +{ k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; \ + k[8*(i)+ 9] = ss[1] ^= ss[0]; \ + k[8*(i)+10] = ss[2] ^= ss[1]; \ + k[8*(i)+11] = ss[3] ^= ss[2]; \ +} + +#define ke8(k,i) \ +{ kef8(k,i); \ + k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \ + k[8*(i)+13] = ss[5] ^= ss[4]; \ + k[8*(i)+14] = ss[6] ^= ss[5]; \ + k[8*(i)+15] = ss[7] ^= ss[6]; \ +} + +AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ uint_32t ss[8]; + + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + cx->ks[4] = ss[4] = word_in(key, 4); + cx->ks[5] = ss[5] = word_in(key, 5); + cx->ks[6] = ss[6] = word_in(key, 6); + cx->ks[7] = ss[7] = word_in(key, 7); + +#if ENC_UNROLL == NONE + { uint_32t i; + for(i = 0; i < 6; ++i) + ke8(cx->ks, i); + } +#else + ke8(cx->ks, 0); ke8(cx->ks, 1); + ke8(cx->ks, 2); ke8(cx->ks, 3); + ke8(cx->ks, 4); ke8(cx->ks, 5); +#endif + kef8(cx->ks, 6); + cx->inf.l = 0; + cx->inf.b[0] = 14 * 16; + +#ifdef USE_VIA_ACE_IF_PRESENT + if(VIA_ACE_AVAILABLE) + cx->inf.b[1] = 0xff; +#endif + +#if defined( AES_ERR_CHK ) + return EXIT_SUCCESS; +#endif +} + +#endif + +#if defined(AES_VAR) + +AES_RETURN aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]) +{ + switch(key_len) + { +#if defined( AES_ERR_CHK ) + case 16: case 128: return aes_encrypt_key128(key, cx); + case 24: case 192: return aes_encrypt_key192(key, cx); + case 32: case 256: return aes_encrypt_key256(key, cx); + default: return EXIT_FAILURE; +#else + case 16: case 128: aes_encrypt_key128(key, cx); return; + case 24: case 192: aes_encrypt_key192(key, cx); return; + case 32: case 256: aes_encrypt_key256(key, cx); return; +#endif + } +} + +#endif + +#endif + +#if (FUNCS_IN_C & DEC_KEYING_IN_C) + +/* this is used to store the decryption round keys */ +/* in forward or reverse order */ + +#ifdef AES_REV_DKS +#define v(n,i) ((n) - (i) + 2 * ((i) & 3)) +#else +#define v(n,i) (i) +#endif + +#if DEC_ROUND == NO_TABLES +#define ff(x) (x) +#else +#define ff(x) inv_mcol(x) +#if defined( dec_imvars ) +#define d_vars dec_imvars +#endif +#endif + +#if defined(AES_128) || defined(AES_VAR) + +#define k4e(k,i) \ +{ k[v(40,(4*(i))+4)] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; \ + k[v(40,(4*(i))+5)] = ss[1] ^= ss[0]; \ + k[v(40,(4*(i))+6)] = ss[2] ^= ss[1]; \ + k[v(40,(4*(i))+7)] = ss[3] ^= ss[2]; \ +} + +#if 1 + +#define kdf4(k,i) \ +{ ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \ + ss[1] = ss[1] ^ ss[3]; \ + ss[2] = ss[2] ^ ss[3]; \ + ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; \ + ss[i % 4] ^= ss[4]; \ + ss[4] ^= k[v(40,(4*(i)))]; k[v(40,(4*(i))+4)] = ff(ss[4]); \ + ss[4] ^= k[v(40,(4*(i))+1)]; k[v(40,(4*(i))+5)] = ff(ss[4]); \ + ss[4] ^= k[v(40,(4*(i))+2)]; k[v(40,(4*(i))+6)] = ff(ss[4]); \ + ss[4] ^= k[v(40,(4*(i))+3)]; k[v(40,(4*(i))+7)] = ff(ss[4]); \ +} + +#define kd4(k,i) \ +{ ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; \ + ss[i % 4] ^= ss[4]; ss[4] = ff(ss[4]); \ + k[v(40,(4*(i))+4)] = ss[4] ^= k[v(40,(4*(i)))]; \ + k[v(40,(4*(i))+5)] = ss[4] ^= k[v(40,(4*(i))+1)]; \ + k[v(40,(4*(i))+6)] = ss[4] ^= k[v(40,(4*(i))+2)]; \ + k[v(40,(4*(i))+7)] = ss[4] ^= k[v(40,(4*(i))+3)]; \ +} + +#define kdl4(k,i) \ +{ ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; \ + k[v(40,(4*(i))+4)] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \ + k[v(40,(4*(i))+5)] = ss[1] ^ ss[3]; \ + k[v(40,(4*(i))+6)] = ss[0]; \ + k[v(40,(4*(i))+7)] = ss[1]; \ +} + +#else + +#define kdf4(k,i) \ +{ ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[v(40,(4*(i))+ 4)] = ff(ss[0]); \ + ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ff(ss[1]); \ + ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ff(ss[2]); \ + ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ff(ss[3]); \ +} + +#define kd4(k,i) \ +{ ss[4] = ls_box(ss[3],3) ^ t_use(r,c)[i]; \ + ss[0] ^= ss[4]; ss[4] = ff(ss[4]); k[v(40,(4*(i))+ 4)] = ss[4] ^= k[v(40,(4*(i)))]; \ + ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ss[4] ^= k[v(40,(4*(i))+ 1)]; \ + ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ss[4] ^= k[v(40,(4*(i))+ 2)]; \ + ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ss[4] ^= k[v(40,(4*(i))+ 3)]; \ +} + +#define kdl4(k,i) \ +{ ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[v(40,(4*(i))+ 4)] = ss[0]; \ + ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ss[1]; \ + ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ss[2]; \ + ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ss[3]; \ +} + +#endif + +AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ uint_32t ss[5]; +#if defined( d_vars ) + d_vars; +#endif + cx->ks[v(40,(0))] = ss[0] = word_in(key, 0); + cx->ks[v(40,(1))] = ss[1] = word_in(key, 1); + cx->ks[v(40,(2))] = ss[2] = word_in(key, 2); + cx->ks[v(40,(3))] = ss[3] = word_in(key, 3); + +#if DEC_UNROLL == NONE + { uint_32t i; + for(i = 0; i < 10; ++i) + k4e(cx->ks, i); +#if !(DEC_ROUND == NO_TABLES) + for(i = N_COLS; i < 10 * N_COLS; ++i) + cx->ks[i] = inv_mcol(cx->ks[i]); +#endif + } +#else + kdf4(cx->ks, 0); kd4(cx->ks, 1); + kd4(cx->ks, 2); kd4(cx->ks, 3); + kd4(cx->ks, 4); kd4(cx->ks, 5); + kd4(cx->ks, 6); kd4(cx->ks, 7); + kd4(cx->ks, 8); kdl4(cx->ks, 9); +#endif + cx->inf.l = 0; + cx->inf.b[0] = 10 * 16; + +#ifdef USE_VIA_ACE_IF_PRESENT + if(VIA_ACE_AVAILABLE) + cx->inf.b[1] = 0xff; +#endif + +#if defined( AES_ERR_CHK ) + return EXIT_SUCCESS; +#endif +} + +#endif + +#if defined(AES_192) || defined(AES_VAR) + +#define k6ef(k,i) \ +{ k[v(48,(6*(i))+ 6)] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; \ + k[v(48,(6*(i))+ 7)] = ss[1] ^= ss[0]; \ + k[v(48,(6*(i))+ 8)] = ss[2] ^= ss[1]; \ + k[v(48,(6*(i))+ 9)] = ss[3] ^= ss[2]; \ +} + +#define k6e(k,i) \ +{ k6ef(k,i); \ + k[v(48,(6*(i))+10)] = ss[4] ^= ss[3]; \ + k[v(48,(6*(i))+11)] = ss[5] ^= ss[4]; \ +} + +#define kdf6(k,i) \ +{ ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[v(48,(6*(i))+ 6)] = ff(ss[0]); \ + ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ff(ss[1]); \ + ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ff(ss[2]); \ + ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ff(ss[3]); \ + ss[4] ^= ss[3]; k[v(48,(6*(i))+10)] = ff(ss[4]); \ + ss[5] ^= ss[4]; k[v(48,(6*(i))+11)] = ff(ss[5]); \ +} + +#define kd6(k,i) \ +{ ss[6] = ls_box(ss[5],3) ^ t_use(r,c)[i]; \ + ss[0] ^= ss[6]; ss[6] = ff(ss[6]); k[v(48,(6*(i))+ 6)] = ss[6] ^= k[v(48,(6*(i)))]; \ + ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ss[6] ^= k[v(48,(6*(i))+ 1)]; \ + ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ss[6] ^= k[v(48,(6*(i))+ 2)]; \ + ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ss[6] ^= k[v(48,(6*(i))+ 3)]; \ + ss[4] ^= ss[3]; k[v(48,(6*(i))+10)] = ss[6] ^= k[v(48,(6*(i))+ 4)]; \ + ss[5] ^= ss[4]; k[v(48,(6*(i))+11)] = ss[6] ^= k[v(48,(6*(i))+ 5)]; \ +} + +#define kdl6(k,i) \ +{ ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[v(48,(6*(i))+ 6)] = ss[0]; \ + ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ss[1]; \ + ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ss[2]; \ + ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ss[3]; \ +} + +AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ uint_32t ss[7]; +#if defined( d_vars ) + d_vars; +#endif + cx->ks[v(48,(0))] = ss[0] = word_in(key, 0); + cx->ks[v(48,(1))] = ss[1] = word_in(key, 1); + cx->ks[v(48,(2))] = ss[2] = word_in(key, 2); + cx->ks[v(48,(3))] = ss[3] = word_in(key, 3); + +#if DEC_UNROLL == NONE + cx->ks[v(48,(4))] = ss[4] = word_in(key, 4); + cx->ks[v(48,(5))] = ss[5] = word_in(key, 5); + { uint_32t i; + + for(i = 0; i < 7; ++i) + k6e(cx->ks, i); + k6ef(cx->ks, 7); +#if !(DEC_ROUND == NO_TABLES) + for(i = N_COLS; i < 12 * N_COLS; ++i) + cx->ks[i] = inv_mcol(cx->ks[i]); +#endif + } +#else + cx->ks[v(48,(4))] = ff(ss[4] = word_in(key, 4)); + cx->ks[v(48,(5))] = ff(ss[5] = word_in(key, 5)); + kdf6(cx->ks, 0); kd6(cx->ks, 1); + kd6(cx->ks, 2); kd6(cx->ks, 3); + kd6(cx->ks, 4); kd6(cx->ks, 5); + kd6(cx->ks, 6); kdl6(cx->ks, 7); +#endif + cx->inf.l = 0; + cx->inf.b[0] = 12 * 16; + +#ifdef USE_VIA_ACE_IF_PRESENT + if(VIA_ACE_AVAILABLE) + cx->inf.b[1] = 0xff; +#endif + +#if defined( AES_ERR_CHK ) + return EXIT_SUCCESS; +#endif +} + +#endif + +#if defined(AES_256) || defined(AES_VAR) + +#define k8ef(k,i) \ +{ k[v(56,(8*(i))+ 8)] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; \ + k[v(56,(8*(i))+ 9)] = ss[1] ^= ss[0]; \ + k[v(56,(8*(i))+10)] = ss[2] ^= ss[1]; \ + k[v(56,(8*(i))+11)] = ss[3] ^= ss[2]; \ +} + +#define k8e(k,i) \ +{ k8ef(k,i); \ + k[v(56,(8*(i))+12)] = ss[4] ^= ls_box(ss[3],0); \ + k[v(56,(8*(i))+13)] = ss[5] ^= ss[4]; \ + k[v(56,(8*(i))+14)] = ss[6] ^= ss[5]; \ + k[v(56,(8*(i))+15)] = ss[7] ^= ss[6]; \ +} + +#define kdf8(k,i) \ +{ ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[v(56,(8*(i))+ 8)] = ff(ss[0]); \ + ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ff(ss[1]); \ + ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ff(ss[2]); \ + ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ff(ss[3]); \ + ss[4] ^= ls_box(ss[3],0); k[v(56,(8*(i))+12)] = ff(ss[4]); \ + ss[5] ^= ss[4]; k[v(56,(8*(i))+13)] = ff(ss[5]); \ + ss[6] ^= ss[5]; k[v(56,(8*(i))+14)] = ff(ss[6]); \ + ss[7] ^= ss[6]; k[v(56,(8*(i))+15)] = ff(ss[7]); \ +} + +#define kd8(k,i) \ +{ ss[8] = ls_box(ss[7],3) ^ t_use(r,c)[i]; \ + ss[0] ^= ss[8]; ss[8] = ff(ss[8]); k[v(56,(8*(i))+ 8)] = ss[8] ^= k[v(56,(8*(i)))]; \ + ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ss[8] ^= k[v(56,(8*(i))+ 1)]; \ + ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ss[8] ^= k[v(56,(8*(i))+ 2)]; \ + ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ss[8] ^= k[v(56,(8*(i))+ 3)]; \ + ss[8] = ls_box(ss[3],0); \ + ss[4] ^= ss[8]; ss[8] = ff(ss[8]); k[v(56,(8*(i))+12)] = ss[8] ^= k[v(56,(8*(i))+ 4)]; \ + ss[5] ^= ss[4]; k[v(56,(8*(i))+13)] = ss[8] ^= k[v(56,(8*(i))+ 5)]; \ + ss[6] ^= ss[5]; k[v(56,(8*(i))+14)] = ss[8] ^= k[v(56,(8*(i))+ 6)]; \ + ss[7] ^= ss[6]; k[v(56,(8*(i))+15)] = ss[8] ^= k[v(56,(8*(i))+ 7)]; \ +} + +#define kdl8(k,i) \ +{ ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[v(56,(8*(i))+ 8)] = ss[0]; \ + ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ss[1]; \ + ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ss[2]; \ + ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ss[3]; \ +} + +AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ uint_32t ss[9]; +#if defined( d_vars ) + d_vars; +#endif + cx->ks[v(56,(0))] = ss[0] = word_in(key, 0); + cx->ks[v(56,(1))] = ss[1] = word_in(key, 1); + cx->ks[v(56,(2))] = ss[2] = word_in(key, 2); + cx->ks[v(56,(3))] = ss[3] = word_in(key, 3); + +#if DEC_UNROLL == NONE + cx->ks[v(56,(4))] = ss[4] = word_in(key, 4); + cx->ks[v(56,(5))] = ss[5] = word_in(key, 5); + cx->ks[v(56,(6))] = ss[6] = word_in(key, 6); + cx->ks[v(56,(7))] = ss[7] = word_in(key, 7); + { uint_32t i; + + for(i = 0; i < 6; ++i) + k8e(cx->ks, i); + k8ef(cx->ks, 6); +#if !(DEC_ROUND == NO_TABLES) + for(i = N_COLS; i < 14 * N_COLS; ++i) + cx->ks[i] = inv_mcol(cx->ks[i]); + +#endif + } +#else + ss[4] = word_in(key, 4); cx->ks[v(56,(4))] = ff(ss[4]); + ss[5] = word_in(key, 5); cx->ks[v(56,(5))] = ff(ss[5]); + ss[6] = word_in(key, 6); cx->ks[v(56,(6))] = ff(ss[6]); + ss[7] = word_in(key, 7); cx->ks[v(56,(7))] = ff(ss[7]); + kdf8(cx->ks, 0); kd8(cx->ks, 1); + kd8(cx->ks, 2); kd8(cx->ks, 3); + kd8(cx->ks, 4); kd8(cx->ks, 5); + kdl8(cx->ks, 6); +#endif + cx->inf.l = 0; + cx->inf.b[0] = 14 * 16; + +#ifdef USE_VIA_ACE_IF_PRESENT + if(VIA_ACE_AVAILABLE) + cx->inf.b[1] = 0xff; +#endif + +#if defined( AES_ERR_CHK ) + return EXIT_SUCCESS; +#endif +} + +#endif + +#if defined(AES_VAR) + +AES_RETURN aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]) +{ + switch(key_len) + { +#if defined( AES_ERR_CHK ) + case 16: case 128: return aes_decrypt_key128(key, cx); + case 24: case 192: return aes_decrypt_key192(key, cx); + case 32: case 256: return aes_decrypt_key256(key, cx); + default: return EXIT_FAILURE; +#else + case 16: case 128: aes_decrypt_key128(key, cx); return; + case 24: case 192: aes_decrypt_key192(key, cx); return; + case 32: case 256: aes_decrypt_key256(key, cx); return; +#endif + } +} + +#endif + +#endif + +#if defined(__cplusplus) +} +#endif diff --git a/src/Crypto/Aesopt.h b/src/Crypto/Aesopt.h index 1b793e43..cf7edbe2 100644 --- a/src/Crypto/Aesopt.h +++ b/src/Crypto/Aesopt.h @@ -1,734 +1,734 @@ -/* - --------------------------------------------------------------------------- - Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. - - LICENSE TERMS - - The free distribution and use of this software is allowed (with or without - changes) provided that: - - 1. source code distributions include the above copyright notice, this - list of conditions and the following disclaimer; - - 2. binary distributions include the above copyright notice, this list - of conditions and the following disclaimer in their documentation; - - 3. the name of the copyright holder is not used to endorse products - built using this software without specific written permission. - - DISCLAIMER - - This software is provided 'as is' with no explicit or implied warranties - in respect of its properties, including, but not limited to, correctness - and/or fitness for purpose. - --------------------------------------------------------------------------- - Issue Date: 20/12/2007 - - This file contains the compilation options for AES (Rijndael) and code - that is common across encryption, key scheduling and table generation. - - OPERATION - - These source code files implement the AES algorithm Rijndael designed by - Joan Daemen and Vincent Rijmen. This version is designed for the standard - block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24 - and 32 bytes). - - This version is designed for flexibility and speed using operations on - 32-bit words rather than operations on bytes. It can be compiled with - either big or little endian internal byte order but is faster when the - native byte order for the processor is used. - - THE CIPHER INTERFACE - - The cipher interface is implemented as an array of bytes in which lower - AES bit sequence indexes map to higher numeric significance within bytes. - - uint_8t (an unsigned 8-bit type) - uint_32t (an unsigned 32-bit type) - struct aes_encrypt_ctx (structure for the cipher encryption context) - struct aes_decrypt_ctx (structure for the cipher decryption context) - AES_RETURN the function return type - - C subroutine calls: - - AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]); - AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]); - AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]); - AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, - const aes_encrypt_ctx cx[1]); - - AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]); - AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]); - AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]); - AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, - const aes_decrypt_ctx cx[1]); - - IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that - you call aes_init() before AES is used so that the tables are initialised. - - C++ aes class subroutines: - - Class AESencrypt for encryption - - Construtors: - AESencrypt(void) - AESencrypt(const unsigned char *key) - 128 bit key - Members: - AES_RETURN key128(const unsigned char *key) - AES_RETURN key192(const unsigned char *key) - AES_RETURN key256(const unsigned char *key) - AES_RETURN encrypt(const unsigned char *in, unsigned char *out) const - - Class AESdecrypt for encryption - Construtors: - AESdecrypt(void) - AESdecrypt(const unsigned char *key) - 128 bit key - Members: - AES_RETURN key128(const unsigned char *key) - AES_RETURN key192(const unsigned char *key) - AES_RETURN key256(const unsigned char *key) - AES_RETURN decrypt(const unsigned char *in, unsigned char *out) const -*/ - -/* Adapted for TrueCrypt */ - -#if !defined( _AESOPT_H ) -#define _AESOPT_H - -#ifdef TC_WINDOWS_BOOT -#define ASM_X86_V2 -#endif - -#if defined( __cplusplus ) -#include "Aescpp.h" -#else -#include "Aes.h" -#endif - - -#include "Common/Endian.h" -#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ -#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ - -#if BYTE_ORDER == LITTLE_ENDIAN -# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN -#endif - -#if BYTE_ORDER == BIG_ENDIAN -# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN -#endif - - -/* CONFIGURATION - THE USE OF DEFINES - - Later in this section there are a number of defines that control the - operation of the code. In each section, the purpose of each define is - explained so that the relevant form can be included or excluded by - setting either 1's or 0's respectively on the branches of the related - #if clauses. The following local defines should not be changed. -*/ - -#define ENCRYPTION_IN_C 1 -#define DECRYPTION_IN_C 2 -#define ENC_KEYING_IN_C 4 -#define DEC_KEYING_IN_C 8 - -#define NO_TABLES 0 -#define ONE_TABLE 1 -#define FOUR_TABLES 4 -#define NONE 0 -#define PARTIAL 1 -#define FULL 2 - -/* --- START OF USER CONFIGURED OPTIONS --- */ - -/* 1. BYTE ORDER WITHIN 32 BIT WORDS - - The fundamental data processing units in Rijndael are 8-bit bytes. The - input, output and key input are all enumerated arrays of bytes in which - bytes are numbered starting at zero and increasing to one less than the - number of bytes in the array in question. This enumeration is only used - for naming bytes and does not imply any adjacency or order relationship - from one byte to another. When these inputs and outputs are considered - as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to - byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte. - In this implementation bits are numbered from 0 to 7 starting at the - numerically least significant end of each byte (bit n represents 2^n). - - However, Rijndael can be implemented more efficiently using 32-bit - words by packing bytes into words so that bytes 4*n to 4*n+3 are placed - into word[n]. While in principle these bytes can be assembled into words - in any positions, this implementation only supports the two formats in - which bytes in adjacent positions within words also have adjacent byte - numbers. This order is called big-endian if the lowest numbered bytes - in words have the highest numeric significance and little-endian if the - opposite applies. - - This code can work in either order irrespective of the order used by the - machine on which it runs. Normally the internal byte order will be set - to the order of the processor on which the code is to be run but this - define can be used to reverse this in special situations - - WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set. - This define will hence be redefined later (in section 4) if necessary -*/ - -#if 1 -#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER -#elif 0 -#define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN -#elif 0 -#define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN -#else -#error The algorithm byte order is not defined -#endif - -/* 2. VIA ACE SUPPORT - - Define this option if support for the VIA ACE is required. This uses - inline assembler instructions and is only implemented for the Microsoft, - Intel and GCC compilers. If VIA ACE is known to be present, then defining - ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption - code. If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if - it is detected (both present and enabled) but the normal AES code will - also be present. - - When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte - aligned; other input/output buffers do not need to be 16 byte aligned - but there are very large performance gains if this can be arranged. - VIA ACE also requires the decryption key schedule to be in reverse - order (which later checks below ensure). -*/ - -#if 0 && !defined( USE_VIA_ACE_IF_PRESENT ) -# define USE_VIA_ACE_IF_PRESENT -#endif - -#if 0 && !defined( ASSUME_VIA_ACE_PRESENT ) -# define ASSUME_VIA_ACE_PRESENT -# endif - -#if defined ( _WIN64 ) || defined( _WIN32_WCE ) || \ - defined( _MSC_VER ) && ( _MSC_VER <= 800 ) -# if defined( USE_VIA_ACE_IF_PRESENT ) -# undef USE_VIA_ACE_IF_PRESENT -# endif -# if defined( ASSUME_VIA_ACE_PRESENT ) -# undef ASSUME_VIA_ACE_PRESENT -# endif -#endif - -/* 3. ASSEMBLER SUPPORT - - This define (which can be on the command line) enables the use of the - assembler code routines for encryption, decryption and key scheduling - as follows: - - ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for - encryption and decryption and but with key scheduling in C - ASM_X86_V2 uses assembler (aes_x86_v2.asm) with compressed tables for - encryption, decryption and key scheduling - ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for - encryption and decryption and but with key scheduling in C - ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for - encryption and decryption and but with key scheduling in C - - Change one 'if 0' below to 'if 1' to select the version or define - as a compilation option. -*/ - -#if 0 && !defined( ASM_X86_V1C ) -# define ASM_X86_V1C -#elif 0 && !defined( ASM_X86_V2 ) -# define ASM_X86_V2 -#elif 0 && !defined( ASM_X86_V2C ) -# define ASM_X86_V2C -#elif 0 && !defined( ASM_AMD64_C ) -# define ASM_AMD64_C -#endif - -#if (defined ( ASM_X86_V1C ) || defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )) \ - && !defined( _M_IX86 ) || defined( ASM_AMD64_C ) && !defined( _M_X64 ) -//# error Assembler code is only available for x86 and AMD64 systems -#endif - -/* 4. FAST INPUT/OUTPUT OPERATIONS. - - On some machines it is possible to improve speed by transferring the - bytes in the input and output arrays to and from the internal 32-bit - variables by addressing these arrays as if they are arrays of 32-bit - words. On some machines this will always be possible but there may - be a large performance penalty if the byte arrays are not aligned on - the normal word boundaries. On other machines this technique will - lead to memory access errors when such 32-bit word accesses are not - properly aligned. The option SAFE_IO avoids such problems but will - often be slower on those machines that support misaligned access - (especially so if care is taken to align the input and output byte - arrays on 32-bit word boundaries). If SAFE_IO is not defined it is - assumed that access to byte arrays as if they are arrays of 32-bit - words will not cause problems when such accesses are misaligned. -*/ -#if 1 && !defined( _MSC_VER ) -#define SAFE_IO -#endif - -/* 5. LOOP UNROLLING - - The code for encryption and decrytpion cycles through a number of rounds - that can be implemented either in a loop or by expanding the code into a - long sequence of instructions, the latter producing a larger program but - one that will often be much faster. The latter is called loop unrolling. - There are also potential speed advantages in expanding two iterations in - a loop with half the number of iterations, which is called partial loop - unrolling. The following options allow partial or full loop unrolling - to be set independently for encryption and decryption -*/ -#if 1 -#define ENC_UNROLL FULL -#elif 0 -#define ENC_UNROLL PARTIAL -#else -#define ENC_UNROLL NONE -#endif - -#if 1 -#define DEC_UNROLL FULL -#elif 0 -#define DEC_UNROLL PARTIAL -#else -#define DEC_UNROLL NONE -#endif - -/* 6. FAST FINITE FIELD OPERATIONS - - If this section is included, tables are used to provide faster finite - field arithmetic (this has no effect if FIXED_TABLES is defined). -*/ -#if !defined (TC_WINDOWS_BOOT) -#define FF_TABLES -#endif - -/* 7. INTERNAL STATE VARIABLE FORMAT - - The internal state of Rijndael is stored in a number of local 32-bit - word varaibles which can be defined either as an array or as individual - names variables. Include this section if you want to store these local - varaibles in arrays. Otherwise individual local variables will be used. -*/ -#if 1 -#define ARRAYS -#endif - -/* 8. FIXED OR DYNAMIC TABLES - - When this section is included the tables used by the code are compiled - statically into the binary file. Otherwise the subroutine aes_init() - must be called to compute them before the code is first used. -*/ -#if !defined (TC_WINDOWS_BOOT) && !(defined( _MSC_VER ) && ( _MSC_VER <= 800 )) -#define FIXED_TABLES -#endif - -/* 9. TABLE ALIGNMENT - - On some sytsems speed will be improved by aligning the AES large lookup - tables on particular boundaries. This define should be set to a power of - two giving the desired alignment. It can be left undefined if alignment - is not needed. This option is specific to the Microsft VC++ compiler - - it seems to sometimes cause trouble for the VC++ version 6 compiler. -*/ - -#if 1 && defined( _MSC_VER ) && ( _MSC_VER >= 1300 ) -#define TABLE_ALIGN 32 -#endif - -/* 10. TABLE OPTIONS - - This cipher proceeds by repeating in a number of cycles known as 'rounds' - which are implemented by a round function which can optionally be speeded - up using tables. The basic tables are each 256 32-bit words, with either - one or four tables being required for each round function depending on - how much speed is required. The encryption and decryption round functions - are different and the last encryption and decrytpion round functions are - different again making four different round functions in all. - - This means that: - 1. Normal encryption and decryption rounds can each use either 0, 1 - or 4 tables and table spaces of 0, 1024 or 4096 bytes each. - 2. The last encryption and decryption rounds can also use either 0, 1 - or 4 tables and table spaces of 0, 1024 or 4096 bytes each. - - Include or exclude the appropriate definitions below to set the number - of tables used by this implementation. -*/ - -#if 1 /* set tables for the normal encryption round */ -#define ENC_ROUND FOUR_TABLES -#elif 0 -#define ENC_ROUND ONE_TABLE -#else -#define ENC_ROUND NO_TABLES -#endif - -#if 1 /* set tables for the last encryption round */ -#define LAST_ENC_ROUND FOUR_TABLES -#elif 0 -#define LAST_ENC_ROUND ONE_TABLE -#else -#define LAST_ENC_ROUND NO_TABLES -#endif - -#if 1 /* set tables for the normal decryption round */ -#define DEC_ROUND FOUR_TABLES -#elif 0 -#define DEC_ROUND ONE_TABLE -#else -#define DEC_ROUND NO_TABLES -#endif - -#if 1 /* set tables for the last decryption round */ -#define LAST_DEC_ROUND FOUR_TABLES -#elif 0 -#define LAST_DEC_ROUND ONE_TABLE -#else -#define LAST_DEC_ROUND NO_TABLES -#endif - -/* The decryption key schedule can be speeded up with tables in the same - way that the round functions can. Include or exclude the following - defines to set this requirement. -*/ -#if 1 -#define KEY_SCHED FOUR_TABLES -#elif 0 -#define KEY_SCHED ONE_TABLE -#else -#define KEY_SCHED NO_TABLES -#endif - -/* ---- END OF USER CONFIGURED OPTIONS ---- */ - -/* VIA ACE support is only available for VC++ and GCC */ - -#if !defined( _MSC_VER ) && !defined( __GNUC__ ) -# if defined( ASSUME_VIA_ACE_PRESENT ) -# undef ASSUME_VIA_ACE_PRESENT -# endif -# if defined( USE_VIA_ACE_IF_PRESENT ) -# undef USE_VIA_ACE_IF_PRESENT -# endif -#endif - -#if defined( ASSUME_VIA_ACE_PRESENT ) && !defined( USE_VIA_ACE_IF_PRESENT ) -#define USE_VIA_ACE_IF_PRESENT -#endif - -#if defined( USE_VIA_ACE_IF_PRESENT ) && !defined ( AES_REV_DKS ) -#define AES_REV_DKS -#endif - -/* Assembler support requires the use of platform byte order */ - -#if ( defined( ASM_X86_V1C ) || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) ) \ - && (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER) -#undef ALGORITHM_BYTE_ORDER -#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER -#endif - -/* In this implementation the columns of the state array are each held in - 32-bit words. The state array can be held in various ways: in an array - of words, in a number of individual word variables or in a number of - processor registers. The following define maps a variable name x and - a column number c to the way the state array variable is to be held. - The first define below maps the state into an array x[c] whereas the - second form maps the state into a number of individual variables x0, - x1, etc. Another form could map individual state colums to machine - register names. -*/ - -#if defined( ARRAYS ) -#define s(x,c) x[c] -#else -#define s(x,c) x##c -#endif - -/* This implementation provides subroutines for encryption, decryption - and for setting the three key lengths (separately) for encryption - and decryption. Since not all functions are needed, masks are set - up here to determine which will be implemented in C -*/ - -#if !defined( AES_ENCRYPT ) -# define EFUNCS_IN_C 0 -#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \ - || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) -# define EFUNCS_IN_C ENC_KEYING_IN_C -#elif !defined( ASM_X86_V2 ) -# define EFUNCS_IN_C ( ENCRYPTION_IN_C | ENC_KEYING_IN_C ) -#else -# define EFUNCS_IN_C 0 -#endif - -#if !defined( AES_DECRYPT ) -# define DFUNCS_IN_C 0 -#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \ - || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) -# define DFUNCS_IN_C DEC_KEYING_IN_C -#elif !defined( ASM_X86_V2 ) -# define DFUNCS_IN_C ( DECRYPTION_IN_C | DEC_KEYING_IN_C ) -#else -# define DFUNCS_IN_C 0 -#endif - -#define FUNCS_IN_C ( EFUNCS_IN_C | DFUNCS_IN_C ) - -/* END OF CONFIGURATION OPTIONS */ - -#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2)) - -/* Disable or report errors on some combinations of options */ - -#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES -#undef LAST_ENC_ROUND -#define LAST_ENC_ROUND NO_TABLES -#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES -#undef LAST_ENC_ROUND -#define LAST_ENC_ROUND ONE_TABLE -#endif - -#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE -#undef ENC_UNROLL -#define ENC_UNROLL NONE -#endif - -#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES -#undef LAST_DEC_ROUND -#define LAST_DEC_ROUND NO_TABLES -#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES -#undef LAST_DEC_ROUND -#define LAST_DEC_ROUND ONE_TABLE -#endif - -#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE -#undef DEC_UNROLL -#define DEC_UNROLL NONE -#endif - -#if defined( bswap32 ) -#define aes_sw32 bswap32 -#elif defined( bswap_32 ) -#define aes_sw32 bswap_32 -#else -#define brot(x,n) (((uint_32t)(x) << n) | ((uint_32t)(x) >> (32 - n))) -#define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00)) -#endif - -/* upr(x,n): rotates bytes within words by n positions, moving bytes to - higher index positions with wrap around into low positions - ups(x,n): moves bytes by n positions to higher index positions in - words but without wrap around - bval(x,n): extracts a byte from a word - - WARNING: The definitions given here are intended only for use with - unsigned variables and with shift counts that are compile - time constants -*/ - -#if ( ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN ) -#define upr(x,n) (((uint_32t)(x) << (8 * (n))) | ((uint_32t)(x) >> (32 - 8 * (n)))) -#define ups(x,n) ((uint_32t) (x) << (8 * (n))) -#define bval(x,n) ((uint_8t)((x) >> (8 * (n)))) -#define bytes2word(b0, b1, b2, b3) \ - (((uint_32t)(b3) << 24) | ((uint_32t)(b2) << 16) | ((uint_32t)(b1) << 8) | (b0)) -#endif - -#if ( ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN ) -#define upr(x,n) (((uint_32t)(x) >> (8 * (n))) | ((uint_32t)(x) << (32 - 8 * (n)))) -#define ups(x,n) ((uint_32t) (x) >> (8 * (n))) -#define bval(x,n) ((uint_8t)((x) >> (24 - 8 * (n)))) -#define bytes2word(b0, b1, b2, b3) \ - (((uint_32t)(b0) << 24) | ((uint_32t)(b1) << 16) | ((uint_32t)(b2) << 8) | (b3)) -#endif - -#if defined( SAFE_IO ) - -#define word_in(x,c) bytes2word(((const uint_8t*)(x)+4*c)[0], ((const uint_8t*)(x)+4*c)[1], \ - ((const uint_8t*)(x)+4*c)[2], ((const uint_8t*)(x)+4*c)[3]) -#define word_out(x,c,v) { ((uint_8t*)(x)+4*c)[0] = bval(v,0); ((uint_8t*)(x)+4*c)[1] = bval(v,1); \ - ((uint_8t*)(x)+4*c)[2] = bval(v,2); ((uint_8t*)(x)+4*c)[3] = bval(v,3); } - -#elif ( ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER ) - -#define word_in(x,c) (*((uint_32t*)(x)+(c))) -#define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = (v)) - -#else - -#define word_in(x,c) aes_sw32(*((uint_32t*)(x)+(c))) -#define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = aes_sw32(v)) - -#endif - -/* the finite field modular polynomial and elements */ - -#define WPOLY 0x011b -#define BPOLY 0x1b - -/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */ - -#define m1 0x80808080 -#define m2 0x7f7f7f7f -#define gf_mulx(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY)) - -/* The following defines provide alternative definitions of gf_mulx that might - give improved performance if a fast 32-bit multiply is not available. Note - that a temporary variable u needs to be defined where gf_mulx is used. - -#define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6)) -#define m4 (0x01010101 * BPOLY) -#define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4) -*/ - -/* Work out which tables are needed for the different options */ - -#if defined( ASM_X86_V1C ) -#if defined( ENC_ROUND ) -#undef ENC_ROUND -#endif -#define ENC_ROUND FOUR_TABLES -#if defined( LAST_ENC_ROUND ) -#undef LAST_ENC_ROUND -#endif -#define LAST_ENC_ROUND FOUR_TABLES -#if defined( DEC_ROUND ) -#undef DEC_ROUND -#endif -#define DEC_ROUND FOUR_TABLES -#if defined( LAST_DEC_ROUND ) -#undef LAST_DEC_ROUND -#endif -#define LAST_DEC_ROUND FOUR_TABLES -#if defined( KEY_SCHED ) -#undef KEY_SCHED -#define KEY_SCHED FOUR_TABLES -#endif -#endif - -#if ( FUNCS_IN_C & ENCRYPTION_IN_C ) || defined( ASM_X86_V1C ) -#if ENC_ROUND == ONE_TABLE -#define FT1_SET -#elif ENC_ROUND == FOUR_TABLES -#define FT4_SET -#else -#define SBX_SET -#endif -#if LAST_ENC_ROUND == ONE_TABLE -#define FL1_SET -#elif LAST_ENC_ROUND == FOUR_TABLES -#define FL4_SET -#elif !defined( SBX_SET ) -#define SBX_SET -#endif -#endif - -#if ( FUNCS_IN_C & DECRYPTION_IN_C ) || defined( ASM_X86_V1C ) -#if DEC_ROUND == ONE_TABLE -#define IT1_SET -#elif DEC_ROUND == FOUR_TABLES -#define IT4_SET -#else -#define ISB_SET -#endif -#if LAST_DEC_ROUND == ONE_TABLE -#define IL1_SET -#elif LAST_DEC_ROUND == FOUR_TABLES -#define IL4_SET -#elif !defined(ISB_SET) -#define ISB_SET -#endif -#endif - -#if (FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C) -#if KEY_SCHED == ONE_TABLE -#define LS1_SET -#elif KEY_SCHED == FOUR_TABLES -#define LS4_SET -#elif !defined( SBX_SET ) -#define SBX_SET -#endif -#endif - -#if (FUNCS_IN_C & DEC_KEYING_IN_C) -#if KEY_SCHED == ONE_TABLE -#define IM1_SET -#elif KEY_SCHED == FOUR_TABLES -#define IM4_SET -#elif !defined( SBX_SET ) -#define SBX_SET -#endif -#endif - -/* generic definitions of Rijndael macros that use tables */ - -#define no_table(x,box,vf,rf,c) bytes2word( \ - box[bval(vf(x,0,c),rf(0,c))], \ - box[bval(vf(x,1,c),rf(1,c))], \ - box[bval(vf(x,2,c),rf(2,c))], \ - box[bval(vf(x,3,c),rf(3,c))]) - -#define one_table(x,op,tab,vf,rf,c) \ - ( tab[bval(vf(x,0,c),rf(0,c))] \ - ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \ - ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \ - ^ op(tab[bval(vf(x,3,c),rf(3,c))],3)) - -#define four_tables(x,tab,vf,rf,c) \ - ( tab[0][bval(vf(x,0,c),rf(0,c))] \ - ^ tab[1][bval(vf(x,1,c),rf(1,c))] \ - ^ tab[2][bval(vf(x,2,c),rf(2,c))] \ - ^ tab[3][bval(vf(x,3,c),rf(3,c))]) - -#define vf1(x,r,c) (x) -#define rf1(r,c) (r) -#define rf2(r,c) ((8+r-c)&3) - -/* perform forward and inverse column mix operation on four bytes in long word x in */ -/* parallel. NOTE: x must be a simple variable, NOT an expression in these macros. */ - -#if defined( FM4_SET ) /* not currently used */ -#define fwd_mcol(x) four_tables(x,t_use(f,m),vf1,rf1,0) -#elif defined( FM1_SET ) /* not currently used */ -#define fwd_mcol(x) one_table(x,upr,t_use(f,m),vf1,rf1,0) -#else -#define dec_fmvars uint_32t g2 -#define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1)) -#endif - -#if defined( IM4_SET ) -#define inv_mcol(x) four_tables(x,t_use(i,m),vf1,rf1,0) -#elif defined( IM1_SET ) -#define inv_mcol(x) one_table(x,upr,t_use(i,m),vf1,rf1,0) -#else -#define dec_imvars uint_32t g2, g4, g9 -#define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \ - (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1)) -#endif - -#if defined( FL4_SET ) -#define ls_box(x,c) four_tables(x,t_use(f,l),vf1,rf2,c) -#elif defined( LS4_SET ) -#define ls_box(x,c) four_tables(x,t_use(l,s),vf1,rf2,c) -#elif defined( FL1_SET ) -#define ls_box(x,c) one_table(x,upr,t_use(f,l),vf1,rf2,c) -#elif defined( LS1_SET ) -#define ls_box(x,c) one_table(x,upr,t_use(l,s),vf1,rf2,c) -#else -#define ls_box(x,c) no_table(x,t_use(s,box),vf1,rf2,c) -#endif - -#if defined( ASM_X86_V1C ) && defined( AES_DECRYPT ) && !defined( ISB_SET ) -#define ISB_SET -#endif - -#endif +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + + This file contains the compilation options for AES (Rijndael) and code + that is common across encryption, key scheduling and table generation. + + OPERATION + + These source code files implement the AES algorithm Rijndael designed by + Joan Daemen and Vincent Rijmen. This version is designed for the standard + block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24 + and 32 bytes). + + This version is designed for flexibility and speed using operations on + 32-bit words rather than operations on bytes. It can be compiled with + either big or little endian internal byte order but is faster when the + native byte order for the processor is used. + + THE CIPHER INTERFACE + + The cipher interface is implemented as an array of bytes in which lower + AES bit sequence indexes map to higher numeric significance within bytes. + + uint_8t (an unsigned 8-bit type) + uint_32t (an unsigned 32-bit type) + struct aes_encrypt_ctx (structure for the cipher encryption context) + struct aes_decrypt_ctx (structure for the cipher decryption context) + AES_RETURN the function return type + + C subroutine calls: + + AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]); + AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]); + AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]); + AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, + const aes_encrypt_ctx cx[1]); + + AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]); + AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]); + AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]); + AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, + const aes_decrypt_ctx cx[1]); + + IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that + you call aes_init() before AES is used so that the tables are initialised. + + C++ aes class subroutines: + + Class AESencrypt for encryption + + Construtors: + AESencrypt(void) + AESencrypt(const unsigned char *key) - 128 bit key + Members: + AES_RETURN key128(const unsigned char *key) + AES_RETURN key192(const unsigned char *key) + AES_RETURN key256(const unsigned char *key) + AES_RETURN encrypt(const unsigned char *in, unsigned char *out) const + + Class AESdecrypt for encryption + Construtors: + AESdecrypt(void) + AESdecrypt(const unsigned char *key) - 128 bit key + Members: + AES_RETURN key128(const unsigned char *key) + AES_RETURN key192(const unsigned char *key) + AES_RETURN key256(const unsigned char *key) + AES_RETURN decrypt(const unsigned char *in, unsigned char *out) const +*/ + +/* Adapted for TrueCrypt */ + +#if !defined( _AESOPT_H ) +#define _AESOPT_H + +#ifdef TC_WINDOWS_BOOT +#define ASM_X86_V2 +#endif + +#if defined( __cplusplus ) +#include "Aescpp.h" +#else +#include "Aes.h" +#endif + + +#include "Common/Endian.h" +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ + +#if BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#endif + + +/* CONFIGURATION - THE USE OF DEFINES + + Later in this section there are a number of defines that control the + operation of the code. In each section, the purpose of each define is + explained so that the relevant form can be included or excluded by + setting either 1's or 0's respectively on the branches of the related + #if clauses. The following local defines should not be changed. +*/ + +#define ENCRYPTION_IN_C 1 +#define DECRYPTION_IN_C 2 +#define ENC_KEYING_IN_C 4 +#define DEC_KEYING_IN_C 8 + +#define NO_TABLES 0 +#define ONE_TABLE 1 +#define FOUR_TABLES 4 +#define NONE 0 +#define PARTIAL 1 +#define FULL 2 + +/* --- START OF USER CONFIGURED OPTIONS --- */ + +/* 1. BYTE ORDER WITHIN 32 BIT WORDS + + The fundamental data processing units in Rijndael are 8-bit bytes. The + input, output and key input are all enumerated arrays of bytes in which + bytes are numbered starting at zero and increasing to one less than the + number of bytes in the array in question. This enumeration is only used + for naming bytes and does not imply any adjacency or order relationship + from one byte to another. When these inputs and outputs are considered + as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to + byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte. + In this implementation bits are numbered from 0 to 7 starting at the + numerically least significant end of each byte (bit n represents 2^n). + + However, Rijndael can be implemented more efficiently using 32-bit + words by packing bytes into words so that bytes 4*n to 4*n+3 are placed + into word[n]. While in principle these bytes can be assembled into words + in any positions, this implementation only supports the two formats in + which bytes in adjacent positions within words also have adjacent byte + numbers. This order is called big-endian if the lowest numbered bytes + in words have the highest numeric significance and little-endian if the + opposite applies. + + This code can work in either order irrespective of the order used by the + machine on which it runs. Normally the internal byte order will be set + to the order of the processor on which the code is to be run but this + define can be used to reverse this in special situations + + WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set. + This define will hence be redefined later (in section 4) if necessary +*/ + +#if 1 +#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER +#elif 0 +#define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 +#define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN +#else +#error The algorithm byte order is not defined +#endif + +/* 2. VIA ACE SUPPORT + + Define this option if support for the VIA ACE is required. This uses + inline assembler instructions and is only implemented for the Microsoft, + Intel and GCC compilers. If VIA ACE is known to be present, then defining + ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption + code. If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if + it is detected (both present and enabled) but the normal AES code will + also be present. + + When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte + aligned; other input/output buffers do not need to be 16 byte aligned + but there are very large performance gains if this can be arranged. + VIA ACE also requires the decryption key schedule to be in reverse + order (which later checks below ensure). +*/ + +#if 0 && !defined( USE_VIA_ACE_IF_PRESENT ) +# define USE_VIA_ACE_IF_PRESENT +#endif + +#if 0 && !defined( ASSUME_VIA_ACE_PRESENT ) +# define ASSUME_VIA_ACE_PRESENT +# endif + +#if defined ( _WIN64 ) || defined( _WIN32_WCE ) || \ + defined( _MSC_VER ) && ( _MSC_VER <= 800 ) +# if defined( USE_VIA_ACE_IF_PRESENT ) +# undef USE_VIA_ACE_IF_PRESENT +# endif +# if defined( ASSUME_VIA_ACE_PRESENT ) +# undef ASSUME_VIA_ACE_PRESENT +# endif +#endif + +/* 3. ASSEMBLER SUPPORT + + This define (which can be on the command line) enables the use of the + assembler code routines for encryption, decryption and key scheduling + as follows: + + ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for + encryption and decryption and but with key scheduling in C + ASM_X86_V2 uses assembler (aes_x86_v2.asm) with compressed tables for + encryption, decryption and key scheduling + ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for + encryption and decryption and but with key scheduling in C + ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for + encryption and decryption and but with key scheduling in C + + Change one 'if 0' below to 'if 1' to select the version or define + as a compilation option. +*/ + +#if 0 && !defined( ASM_X86_V1C ) +# define ASM_X86_V1C +#elif 0 && !defined( ASM_X86_V2 ) +# define ASM_X86_V2 +#elif 0 && !defined( ASM_X86_V2C ) +# define ASM_X86_V2C +#elif 0 && !defined( ASM_AMD64_C ) +# define ASM_AMD64_C +#endif + +#if (defined ( ASM_X86_V1C ) || defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )) \ + && !defined( _M_IX86 ) || defined( ASM_AMD64_C ) && !defined( _M_X64 ) +//# error Assembler code is only available for x86 and AMD64 systems +#endif + +/* 4. FAST INPUT/OUTPUT OPERATIONS. + + On some machines it is possible to improve speed by transferring the + bytes in the input and output arrays to and from the internal 32-bit + variables by addressing these arrays as if they are arrays of 32-bit + words. On some machines this will always be possible but there may + be a large performance penalty if the byte arrays are not aligned on + the normal word boundaries. On other machines this technique will + lead to memory access errors when such 32-bit word accesses are not + properly aligned. The option SAFE_IO avoids such problems but will + often be slower on those machines that support misaligned access + (especially so if care is taken to align the input and output byte + arrays on 32-bit word boundaries). If SAFE_IO is not defined it is + assumed that access to byte arrays as if they are arrays of 32-bit + words will not cause problems when such accesses are misaligned. +*/ +#if 1 && !defined( _MSC_VER ) +#define SAFE_IO +#endif + +/* 5. LOOP UNROLLING + + The code for encryption and decrytpion cycles through a number of rounds + that can be implemented either in a loop or by expanding the code into a + long sequence of instructions, the latter producing a larger program but + one that will often be much faster. The latter is called loop unrolling. + There are also potential speed advantages in expanding two iterations in + a loop with half the number of iterations, which is called partial loop + unrolling. The following options allow partial or full loop unrolling + to be set independently for encryption and decryption +*/ +#if 1 +#define ENC_UNROLL FULL +#elif 0 +#define ENC_UNROLL PARTIAL +#else +#define ENC_UNROLL NONE +#endif + +#if 1 +#define DEC_UNROLL FULL +#elif 0 +#define DEC_UNROLL PARTIAL +#else +#define DEC_UNROLL NONE +#endif + +/* 6. FAST FINITE FIELD OPERATIONS + + If this section is included, tables are used to provide faster finite + field arithmetic (this has no effect if FIXED_TABLES is defined). +*/ +#if !defined (TC_WINDOWS_BOOT) +#define FF_TABLES +#endif + +/* 7. INTERNAL STATE VARIABLE FORMAT + + The internal state of Rijndael is stored in a number of local 32-bit + word varaibles which can be defined either as an array or as individual + names variables. Include this section if you want to store these local + varaibles in arrays. Otherwise individual local variables will be used. +*/ +#if 1 +#define ARRAYS +#endif + +/* 8. FIXED OR DYNAMIC TABLES + + When this section is included the tables used by the code are compiled + statically into the binary file. Otherwise the subroutine aes_init() + must be called to compute them before the code is first used. +*/ +#if !defined (TC_WINDOWS_BOOT) && !(defined( _MSC_VER ) && ( _MSC_VER <= 800 )) +#define FIXED_TABLES +#endif + +/* 9. TABLE ALIGNMENT + + On some sytsems speed will be improved by aligning the AES large lookup + tables on particular boundaries. This define should be set to a power of + two giving the desired alignment. It can be left undefined if alignment + is not needed. This option is specific to the Microsft VC++ compiler - + it seems to sometimes cause trouble for the VC++ version 6 compiler. +*/ + +#if 1 && defined( _MSC_VER ) && ( _MSC_VER >= 1300 ) +#define TABLE_ALIGN 32 +#endif + +/* 10. TABLE OPTIONS + + This cipher proceeds by repeating in a number of cycles known as 'rounds' + which are implemented by a round function which can optionally be speeded + up using tables. The basic tables are each 256 32-bit words, with either + one or four tables being required for each round function depending on + how much speed is required. The encryption and decryption round functions + are different and the last encryption and decrytpion round functions are + different again making four different round functions in all. + + This means that: + 1. Normal encryption and decryption rounds can each use either 0, 1 + or 4 tables and table spaces of 0, 1024 or 4096 bytes each. + 2. The last encryption and decryption rounds can also use either 0, 1 + or 4 tables and table spaces of 0, 1024 or 4096 bytes each. + + Include or exclude the appropriate definitions below to set the number + of tables used by this implementation. +*/ + +#if 1 /* set tables for the normal encryption round */ +#define ENC_ROUND FOUR_TABLES +#elif 0 +#define ENC_ROUND ONE_TABLE +#else +#define ENC_ROUND NO_TABLES +#endif + +#if 1 /* set tables for the last encryption round */ +#define LAST_ENC_ROUND FOUR_TABLES +#elif 0 +#define LAST_ENC_ROUND ONE_TABLE +#else +#define LAST_ENC_ROUND NO_TABLES +#endif + +#if 1 /* set tables for the normal decryption round */ +#define DEC_ROUND FOUR_TABLES +#elif 0 +#define DEC_ROUND ONE_TABLE +#else +#define DEC_ROUND NO_TABLES +#endif + +#if 1 /* set tables for the last decryption round */ +#define LAST_DEC_ROUND FOUR_TABLES +#elif 0 +#define LAST_DEC_ROUND ONE_TABLE +#else +#define LAST_DEC_ROUND NO_TABLES +#endif + +/* The decryption key schedule can be speeded up with tables in the same + way that the round functions can. Include or exclude the following + defines to set this requirement. +*/ +#if 1 +#define KEY_SCHED FOUR_TABLES +#elif 0 +#define KEY_SCHED ONE_TABLE +#else +#define KEY_SCHED NO_TABLES +#endif + +/* ---- END OF USER CONFIGURED OPTIONS ---- */ + +/* VIA ACE support is only available for VC++ and GCC */ + +#if !defined( _MSC_VER ) && !defined( __GNUC__ ) +# if defined( ASSUME_VIA_ACE_PRESENT ) +# undef ASSUME_VIA_ACE_PRESENT +# endif +# if defined( USE_VIA_ACE_IF_PRESENT ) +# undef USE_VIA_ACE_IF_PRESENT +# endif +#endif + +#if defined( ASSUME_VIA_ACE_PRESENT ) && !defined( USE_VIA_ACE_IF_PRESENT ) +#define USE_VIA_ACE_IF_PRESENT +#endif + +#if defined( USE_VIA_ACE_IF_PRESENT ) && !defined ( AES_REV_DKS ) +#define AES_REV_DKS +#endif + +/* Assembler support requires the use of platform byte order */ + +#if ( defined( ASM_X86_V1C ) || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) ) \ + && (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER) +#undef ALGORITHM_BYTE_ORDER +#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER +#endif + +/* In this implementation the columns of the state array are each held in + 32-bit words. The state array can be held in various ways: in an array + of words, in a number of individual word variables or in a number of + processor registers. The following define maps a variable name x and + a column number c to the way the state array variable is to be held. + The first define below maps the state into an array x[c] whereas the + second form maps the state into a number of individual variables x0, + x1, etc. Another form could map individual state colums to machine + register names. +*/ + +#if defined( ARRAYS ) +#define s(x,c) x[c] +#else +#define s(x,c) x##c +#endif + +/* This implementation provides subroutines for encryption, decryption + and for setting the three key lengths (separately) for encryption + and decryption. Since not all functions are needed, masks are set + up here to determine which will be implemented in C +*/ + +#if !defined( AES_ENCRYPT ) +# define EFUNCS_IN_C 0 +#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \ + || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) +# define EFUNCS_IN_C ENC_KEYING_IN_C +#elif !defined( ASM_X86_V2 ) +# define EFUNCS_IN_C ( ENCRYPTION_IN_C | ENC_KEYING_IN_C ) +#else +# define EFUNCS_IN_C 0 +#endif + +#if !defined( AES_DECRYPT ) +# define DFUNCS_IN_C 0 +#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \ + || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) +# define DFUNCS_IN_C DEC_KEYING_IN_C +#elif !defined( ASM_X86_V2 ) +# define DFUNCS_IN_C ( DECRYPTION_IN_C | DEC_KEYING_IN_C ) +#else +# define DFUNCS_IN_C 0 +#endif + +#define FUNCS_IN_C ( EFUNCS_IN_C | DFUNCS_IN_C ) + +/* END OF CONFIGURATION OPTIONS */ + +#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2)) + +/* Disable or report errors on some combinations of options */ + +#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES +#undef LAST_ENC_ROUND +#define LAST_ENC_ROUND NO_TABLES +#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES +#undef LAST_ENC_ROUND +#define LAST_ENC_ROUND ONE_TABLE +#endif + +#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE +#undef ENC_UNROLL +#define ENC_UNROLL NONE +#endif + +#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES +#undef LAST_DEC_ROUND +#define LAST_DEC_ROUND NO_TABLES +#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES +#undef LAST_DEC_ROUND +#define LAST_DEC_ROUND ONE_TABLE +#endif + +#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE +#undef DEC_UNROLL +#define DEC_UNROLL NONE +#endif + +#if defined( bswap32 ) +#define aes_sw32 bswap32 +#elif defined( bswap_32 ) +#define aes_sw32 bswap_32 +#else +#define brot(x,n) (((uint_32t)(x) << n) | ((uint_32t)(x) >> (32 - n))) +#define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00)) +#endif + +/* upr(x,n): rotates bytes within words by n positions, moving bytes to + higher index positions with wrap around into low positions + ups(x,n): moves bytes by n positions to higher index positions in + words but without wrap around + bval(x,n): extracts a byte from a word + + WARNING: The definitions given here are intended only for use with + unsigned variables and with shift counts that are compile + time constants +*/ + +#if ( ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN ) +#define upr(x,n) (((uint_32t)(x) << (8 * (n))) | ((uint_32t)(x) >> (32 - 8 * (n)))) +#define ups(x,n) ((uint_32t) (x) << (8 * (n))) +#define bval(x,n) ((uint_8t)((x) >> (8 * (n)))) +#define bytes2word(b0, b1, b2, b3) \ + (((uint_32t)(b3) << 24) | ((uint_32t)(b2) << 16) | ((uint_32t)(b1) << 8) | (b0)) +#endif + +#if ( ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN ) +#define upr(x,n) (((uint_32t)(x) >> (8 * (n))) | ((uint_32t)(x) << (32 - 8 * (n)))) +#define ups(x,n) ((uint_32t) (x) >> (8 * (n))) +#define bval(x,n) ((uint_8t)((x) >> (24 - 8 * (n)))) +#define bytes2word(b0, b1, b2, b3) \ + (((uint_32t)(b0) << 24) | ((uint_32t)(b1) << 16) | ((uint_32t)(b2) << 8) | (b3)) +#endif + +#if defined( SAFE_IO ) + +#define word_in(x,c) bytes2word(((const uint_8t*)(x)+4*c)[0], ((const uint_8t*)(x)+4*c)[1], \ + ((const uint_8t*)(x)+4*c)[2], ((const uint_8t*)(x)+4*c)[3]) +#define word_out(x,c,v) { ((uint_8t*)(x)+4*c)[0] = bval(v,0); ((uint_8t*)(x)+4*c)[1] = bval(v,1); \ + ((uint_8t*)(x)+4*c)[2] = bval(v,2); ((uint_8t*)(x)+4*c)[3] = bval(v,3); } + +#elif ( ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER ) + +#define word_in(x,c) (*((uint_32t*)(x)+(c))) +#define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = (v)) + +#else + +#define word_in(x,c) aes_sw32(*((uint_32t*)(x)+(c))) +#define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = aes_sw32(v)) + +#endif + +/* the finite field modular polynomial and elements */ + +#define WPOLY 0x011b +#define BPOLY 0x1b + +/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */ + +#define m1 0x80808080 +#define m2 0x7f7f7f7f +#define gf_mulx(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY)) + +/* The following defines provide alternative definitions of gf_mulx that might + give improved performance if a fast 32-bit multiply is not available. Note + that a temporary variable u needs to be defined where gf_mulx is used. + +#define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6)) +#define m4 (0x01010101 * BPOLY) +#define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4) +*/ + +/* Work out which tables are needed for the different options */ + +#if defined( ASM_X86_V1C ) +#if defined( ENC_ROUND ) +#undef ENC_ROUND +#endif +#define ENC_ROUND FOUR_TABLES +#if defined( LAST_ENC_ROUND ) +#undef LAST_ENC_ROUND +#endif +#define LAST_ENC_ROUND FOUR_TABLES +#if defined( DEC_ROUND ) +#undef DEC_ROUND +#endif +#define DEC_ROUND FOUR_TABLES +#if defined( LAST_DEC_ROUND ) +#undef LAST_DEC_ROUND +#endif +#define LAST_DEC_ROUND FOUR_TABLES +#if defined( KEY_SCHED ) +#undef KEY_SCHED +#define KEY_SCHED FOUR_TABLES +#endif +#endif + +#if ( FUNCS_IN_C & ENCRYPTION_IN_C ) || defined( ASM_X86_V1C ) +#if ENC_ROUND == ONE_TABLE +#define FT1_SET +#elif ENC_ROUND == FOUR_TABLES +#define FT4_SET +#else +#define SBX_SET +#endif +#if LAST_ENC_ROUND == ONE_TABLE +#define FL1_SET +#elif LAST_ENC_ROUND == FOUR_TABLES +#define FL4_SET +#elif !defined( SBX_SET ) +#define SBX_SET +#endif +#endif + +#if ( FUNCS_IN_C & DECRYPTION_IN_C ) || defined( ASM_X86_V1C ) +#if DEC_ROUND == ONE_TABLE +#define IT1_SET +#elif DEC_ROUND == FOUR_TABLES +#define IT4_SET +#else +#define ISB_SET +#endif +#if LAST_DEC_ROUND == ONE_TABLE +#define IL1_SET +#elif LAST_DEC_ROUND == FOUR_TABLES +#define IL4_SET +#elif !defined(ISB_SET) +#define ISB_SET +#endif +#endif + +#if (FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C) +#if KEY_SCHED == ONE_TABLE +#define LS1_SET +#elif KEY_SCHED == FOUR_TABLES +#define LS4_SET +#elif !defined( SBX_SET ) +#define SBX_SET +#endif +#endif + +#if (FUNCS_IN_C & DEC_KEYING_IN_C) +#if KEY_SCHED == ONE_TABLE +#define IM1_SET +#elif KEY_SCHED == FOUR_TABLES +#define IM4_SET +#elif !defined( SBX_SET ) +#define SBX_SET +#endif +#endif + +/* generic definitions of Rijndael macros that use tables */ + +#define no_table(x,box,vf,rf,c) bytes2word( \ + box[bval(vf(x,0,c),rf(0,c))], \ + box[bval(vf(x,1,c),rf(1,c))], \ + box[bval(vf(x,2,c),rf(2,c))], \ + box[bval(vf(x,3,c),rf(3,c))]) + +#define one_table(x,op,tab,vf,rf,c) \ + ( tab[bval(vf(x,0,c),rf(0,c))] \ + ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \ + ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \ + ^ op(tab[bval(vf(x,3,c),rf(3,c))],3)) + +#define four_tables(x,tab,vf,rf,c) \ + ( tab[0][bval(vf(x,0,c),rf(0,c))] \ + ^ tab[1][bval(vf(x,1,c),rf(1,c))] \ + ^ tab[2][bval(vf(x,2,c),rf(2,c))] \ + ^ tab[3][bval(vf(x,3,c),rf(3,c))]) + +#define vf1(x,r,c) (x) +#define rf1(r,c) (r) +#define rf2(r,c) ((8+r-c)&3) + +/* perform forward and inverse column mix operation on four bytes in long word x in */ +/* parallel. NOTE: x must be a simple variable, NOT an expression in these macros. */ + +#if defined( FM4_SET ) /* not currently used */ +#define fwd_mcol(x) four_tables(x,t_use(f,m),vf1,rf1,0) +#elif defined( FM1_SET ) /* not currently used */ +#define fwd_mcol(x) one_table(x,upr,t_use(f,m),vf1,rf1,0) +#else +#define dec_fmvars uint_32t g2 +#define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1)) +#endif + +#if defined( IM4_SET ) +#define inv_mcol(x) four_tables(x,t_use(i,m),vf1,rf1,0) +#elif defined( IM1_SET ) +#define inv_mcol(x) one_table(x,upr,t_use(i,m),vf1,rf1,0) +#else +#define dec_imvars uint_32t g2, g4, g9 +#define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \ + (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1)) +#endif + +#if defined( FL4_SET ) +#define ls_box(x,c) four_tables(x,t_use(f,l),vf1,rf2,c) +#elif defined( LS4_SET ) +#define ls_box(x,c) four_tables(x,t_use(l,s),vf1,rf2,c) +#elif defined( FL1_SET ) +#define ls_box(x,c) one_table(x,upr,t_use(f,l),vf1,rf2,c) +#elif defined( LS1_SET ) +#define ls_box(x,c) one_table(x,upr,t_use(l,s),vf1,rf2,c) +#else +#define ls_box(x,c) no_table(x,t_use(s,box),vf1,rf2,c) +#endif + +#if defined( ASM_X86_V1C ) && defined( AES_DECRYPT ) && !defined( ISB_SET ) +#define ISB_SET +#endif + +#endif diff --git a/src/Crypto/Aestab.c b/src/Crypto/Aestab.c index 2fd53789..1effb6f6 100644 --- a/src/Crypto/Aestab.c +++ b/src/Crypto/Aestab.c @@ -1,428 +1,428 @@ -/* - --------------------------------------------------------------------------- - Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. - - LICENSE TERMS - - The free distribution and use of this software is allowed (with or without - changes) provided that: - - 1. source code distributions include the above copyright notice, this - list of conditions and the following disclaimer; - - 2. binary distributions include the above copyright notice, this list - of conditions and the following disclaimer in their documentation; - - 3. the name of the copyright holder is not used to endorse products - built using this software without specific written permission. - - DISCLAIMER - - This software is provided 'as is' with no explicit or implied warranties - in respect of its properties, including, but not limited to, correctness - and/or fitness for purpose. - --------------------------------------------------------------------------- - Issue Date: 20/12/2007 -*/ - -/* Adapted for TrueCrypt: - - Added run-time table generator for Aes_x86_v2.asm -*/ - -#define DO_TABLES - -#include "Aes.h" -#include "Aesopt.h" - -#if defined(FIXED_TABLES) - -#define sb_data(w) {\ - w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\ - w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\ - w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\ - w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\ - w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\ - w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\ - w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\ - w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\ - w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\ - w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\ - w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\ - w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\ - w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\ - w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\ - w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\ - w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\ - w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\ - w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\ - w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\ - w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\ - w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\ - w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\ - w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\ - w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\ - w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\ - w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\ - w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\ - w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\ - w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\ - w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\ - w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\ - w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) } - -#define isb_data(w) {\ - w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\ - w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\ - w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\ - w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\ - w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\ - w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\ - w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\ - w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\ - w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\ - w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\ - w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\ - w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\ - w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\ - w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\ - w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\ - w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\ - w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\ - w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\ - w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\ - w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\ - w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\ - w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\ - w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\ - w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\ - w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\ - w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\ - w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\ - w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\ - w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\ - w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\ - w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\ - w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) } - -#define mm_data(w) {\ - w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\ - w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\ - w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\ - w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\ - w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\ - w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\ - w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\ - w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\ - w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\ - w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\ - w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\ - w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\ - w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\ - w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\ - w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\ - w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\ - w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\ - w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\ - w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\ - w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\ - w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\ - w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\ - w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\ - w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\ - w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\ - w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\ - w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\ - w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\ - w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\ - w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\ - w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\ - w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) } - -#define rc_data(w) {\ - w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\ - w(0x1b), w(0x36) } - -#define h0(x) (x) - -#define w0(p) bytes2word(p, 0, 0, 0) -#define w1(p) bytes2word(0, p, 0, 0) -#define w2(p) bytes2word(0, 0, p, 0) -#define w3(p) bytes2word(0, 0, 0, p) - -#define u0(p) bytes2word(f2(p), p, p, f3(p)) -#define u1(p) bytes2word(f3(p), f2(p), p, p) -#define u2(p) bytes2word(p, f3(p), f2(p), p) -#define u3(p) bytes2word(p, p, f3(p), f2(p)) - -#define v0(p) bytes2word(fe(p), f9(p), fd(p), fb(p)) -#define v1(p) bytes2word(fb(p), fe(p), f9(p), fd(p)) -#define v2(p) bytes2word(fd(p), fb(p), fe(p), f9(p)) -#define v3(p) bytes2word(f9(p), fd(p), fb(p), fe(p)) - -#endif - -#if defined(FIXED_TABLES) || !defined(FF_TABLES) - -#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY)) -#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY)) -#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) \ - ^ (((x>>5) & 4) * WPOLY)) -#define f3(x) (f2(x) ^ x) -#define f9(x) (f8(x) ^ x) -#define fb(x) (f8(x) ^ f2(x) ^ x) -#define fd(x) (f8(x) ^ f4(x) ^ x) -#define fe(x) (f8(x) ^ f4(x) ^ f2(x)) - -#else - -#define f2(x) ((x) ? pow[log[x] + 0x19] : 0) -#define f3(x) ((x) ? pow[log[x] + 0x01] : 0) -#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0) -#define fb(x) ((x) ? pow[log[x] + 0x68] : 0) -#define fd(x) ((x) ? pow[log[x] + 0xee] : 0) -#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0) -#define fi(x) ((x) ? pow[ 255 - log[x]] : 0) - -#endif - -#include "Aestab.h" - -#if defined(__cplusplus) -extern "C" -{ -#endif - -#if defined(FIXED_TABLES) - -/* implemented in case of wrong call for fixed tables */ - -AES_RETURN aes_init(void) -{ - return EXIT_SUCCESS; -} - -#else /* dynamic table generation */ - -#if !defined(FF_TABLES) - -/* Generate the tables for the dynamic table option - - It will generally be sensible to use tables to compute finite - field multiplies and inverses but where memory is scarse this - code might sometimes be better. But it only has effect during - initialisation so its pretty unimportant in overall terms. -*/ - -/* return 2 ^ (n - 1) where n is the bit number of the highest bit - set in x with x in the range 1 < x < 0x00000200. This form is - used so that locals within fi can be bytes rather than words -*/ - -static uint_8t hibit(const uint_32t x) -{ uint_8t r = (uint_8t)((x >> 1) | (x >> 2)); - - r |= (r >> 2); - r |= (r >> 4); - return (r + 1) >> 1; -} - -/* return the inverse of the finite field element x */ - -static uint_8t fi(const uint_8t x) -{ uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0; - - if(x < 2) return x; - - for(;;) - { - if(!n1) return v1; - - while(n2 >= n1) - { - n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2); - } - - if(!n2) return v2; - - while(n1 >= n2) - { - n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1); - } - } -} - -#endif - -/* The forward and inverse affine transformations used in the S-box */ - -#define fwd_affine(x) \ - (w = (uint_32t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(uint_8t)(w^(w>>8))) - -#define inv_affine(x) \ - (w = (uint_32t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(uint_8t)(w^(w>>8))) - -static int init = 0; - -#ifdef TC_WINDOWS_BOOT - -#pragma optimize ("l", on) -uint_8t aes_enc_tab[256][8]; -uint_8t aes_dec_tab[256][8]; - -#endif - -AES_RETURN aes_init(void) -{ uint_32t i, w; - -#ifdef TC_WINDOWS_BOOT - - if (init) - return EXIT_SUCCESS; - - for (i = 0; i < 256; ++i) - { - uint_8t x = fwd_affine(fi((uint_8t)i)); - aes_enc_tab[i][0] = 0; - aes_enc_tab[i][1] = x; - aes_enc_tab[i][2] = x; - aes_enc_tab[i][3] = f3(x); - aes_enc_tab[i][4] = f2(x); - aes_enc_tab[i][5] = x; - aes_enc_tab[i][6] = x; - aes_enc_tab[i][7] = f3(x); - - x = fi((uint_8t)inv_affine((uint_8t)i)); - aes_dec_tab[i][0] = fe(x); - aes_dec_tab[i][1] = f9(x); - aes_dec_tab[i][2] = fd(x); - aes_dec_tab[i][3] = fb(x); - aes_dec_tab[i][4] = fe(x); - aes_dec_tab[i][5] = f9(x); - aes_dec_tab[i][6] = fd(x); - aes_dec_tab[i][7] = x; - } - -#else // TC_WINDOWS_BOOT - -#if defined(FF_TABLES) - - uint_8t pow[512], log[256]; - - if(init) - return EXIT_SUCCESS; - /* log and power tables for GF(2^8) finite field with - WPOLY as modular polynomial - the simplest primitive - root is 0x03, used here to generate the tables - */ - - i = 0; w = 1; - do - { - pow[i] = (uint_8t)w; - pow[i + 255] = (uint_8t)w; - log[w] = (uint_8t)i++; - w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0); - } - while (w != 1); - -#else - if(init) - return EXIT_SUCCESS; -#endif - - for(i = 0, w = 1; i < RC_LENGTH; ++i) - { - t_set(r,c)[i] = bytes2word(w, 0, 0, 0); - w = f2(w); - } - - for(i = 0; i < 256; ++i) - { uint_8t b; - - b = fwd_affine(fi((uint_8t)i)); - w = bytes2word(f2(b), b, b, f3(b)); - -#if defined( SBX_SET ) - t_set(s,box)[i] = b; -#endif - -#if defined( FT1_SET ) /* tables for a normal encryption round */ - t_set(f,n)[i] = w; -#endif -#if defined( FT4_SET ) - t_set(f,n)[0][i] = w; - t_set(f,n)[1][i] = upr(w,1); - t_set(f,n)[2][i] = upr(w,2); - t_set(f,n)[3][i] = upr(w,3); -#endif - w = bytes2word(b, 0, 0, 0); - -#if defined( FL1_SET ) /* tables for last encryption round (may also */ - t_set(f,l)[i] = w; /* be used in the key schedule) */ -#endif -#if defined( FL4_SET ) - t_set(f,l)[0][i] = w; - t_set(f,l)[1][i] = upr(w,1); - t_set(f,l)[2][i] = upr(w,2); - t_set(f,l)[3][i] = upr(w,3); -#endif - -#if defined( LS1_SET ) /* table for key schedule if t_set(f,l) above is*/ - t_set(l,s)[i] = w; /* not of the required form */ -#endif -#if defined( LS4_SET ) - t_set(l,s)[0][i] = w; - t_set(l,s)[1][i] = upr(w,1); - t_set(l,s)[2][i] = upr(w,2); - t_set(l,s)[3][i] = upr(w,3); -#endif - - b = fi(inv_affine((uint_8t)i)); - w = bytes2word(fe(b), f9(b), fd(b), fb(b)); - -#if defined( IM1_SET ) /* tables for the inverse mix column operation */ - t_set(i,m)[b] = w; -#endif -#if defined( IM4_SET ) - t_set(i,m)[0][b] = w; - t_set(i,m)[1][b] = upr(w,1); - t_set(i,m)[2][b] = upr(w,2); - t_set(i,m)[3][b] = upr(w,3); -#endif - -#if defined( ISB_SET ) - t_set(i,box)[i] = b; -#endif -#if defined( IT1_SET ) /* tables for a normal decryption round */ - t_set(i,n)[i] = w; -#endif -#if defined( IT4_SET ) - t_set(i,n)[0][i] = w; - t_set(i,n)[1][i] = upr(w,1); - t_set(i,n)[2][i] = upr(w,2); - t_set(i,n)[3][i] = upr(w,3); -#endif - w = bytes2word(b, 0, 0, 0); -#if defined( IL1_SET ) /* tables for last decryption round */ - t_set(i,l)[i] = w; -#endif -#if defined( IL4_SET ) - t_set(i,l)[0][i] = w; - t_set(i,l)[1][i] = upr(w,1); - t_set(i,l)[2][i] = upr(w,2); - t_set(i,l)[3][i] = upr(w,3); -#endif - } - -#endif // TC_WINDOWS_BOOT - - init = 1; - return EXIT_SUCCESS; -} - -#endif - -#if defined(__cplusplus) -} -#endif - +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 +*/ + +/* Adapted for TrueCrypt: + - Added run-time table generator for Aes_x86_v2.asm +*/ + +#define DO_TABLES + +#include "Aes.h" +#include "Aesopt.h" + +#if defined(FIXED_TABLES) + +#define sb_data(w) {\ + w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\ + w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\ + w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\ + w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\ + w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\ + w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\ + w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\ + w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\ + w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\ + w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\ + w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\ + w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\ + w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\ + w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\ + w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\ + w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\ + w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\ + w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\ + w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\ + w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\ + w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\ + w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\ + w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\ + w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\ + w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\ + w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\ + w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\ + w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\ + w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\ + w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\ + w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\ + w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) } + +#define isb_data(w) {\ + w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\ + w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\ + w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\ + w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\ + w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\ + w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\ + w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\ + w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\ + w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\ + w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\ + w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\ + w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\ + w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\ + w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\ + w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\ + w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\ + w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\ + w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\ + w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\ + w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\ + w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\ + w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\ + w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\ + w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\ + w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\ + w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\ + w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\ + w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\ + w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\ + w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\ + w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\ + w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) } + +#define mm_data(w) {\ + w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\ + w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\ + w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\ + w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\ + w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\ + w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\ + w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\ + w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\ + w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\ + w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\ + w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\ + w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\ + w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\ + w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\ + w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\ + w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\ + w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\ + w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\ + w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\ + w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\ + w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\ + w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\ + w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\ + w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\ + w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\ + w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\ + w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\ + w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\ + w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\ + w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\ + w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\ + w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) } + +#define rc_data(w) {\ + w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\ + w(0x1b), w(0x36) } + +#define h0(x) (x) + +#define w0(p) bytes2word(p, 0, 0, 0) +#define w1(p) bytes2word(0, p, 0, 0) +#define w2(p) bytes2word(0, 0, p, 0) +#define w3(p) bytes2word(0, 0, 0, p) + +#define u0(p) bytes2word(f2(p), p, p, f3(p)) +#define u1(p) bytes2word(f3(p), f2(p), p, p) +#define u2(p) bytes2word(p, f3(p), f2(p), p) +#define u3(p) bytes2word(p, p, f3(p), f2(p)) + +#define v0(p) bytes2word(fe(p), f9(p), fd(p), fb(p)) +#define v1(p) bytes2word(fb(p), fe(p), f9(p), fd(p)) +#define v2(p) bytes2word(fd(p), fb(p), fe(p), f9(p)) +#define v3(p) bytes2word(f9(p), fd(p), fb(p), fe(p)) + +#endif + +#if defined(FIXED_TABLES) || !defined(FF_TABLES) + +#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY)) +#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY)) +#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) \ + ^ (((x>>5) & 4) * WPOLY)) +#define f3(x) (f2(x) ^ x) +#define f9(x) (f8(x) ^ x) +#define fb(x) (f8(x) ^ f2(x) ^ x) +#define fd(x) (f8(x) ^ f4(x) ^ x) +#define fe(x) (f8(x) ^ f4(x) ^ f2(x)) + +#else + +#define f2(x) ((x) ? pow[log[x] + 0x19] : 0) +#define f3(x) ((x) ? pow[log[x] + 0x01] : 0) +#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0) +#define fb(x) ((x) ? pow[log[x] + 0x68] : 0) +#define fd(x) ((x) ? pow[log[x] + 0xee] : 0) +#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0) +#define fi(x) ((x) ? pow[ 255 - log[x]] : 0) + +#endif + +#include "Aestab.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#if defined(FIXED_TABLES) + +/* implemented in case of wrong call for fixed tables */ + +AES_RETURN aes_init(void) +{ + return EXIT_SUCCESS; +} + +#else /* dynamic table generation */ + +#if !defined(FF_TABLES) + +/* Generate the tables for the dynamic table option + + It will generally be sensible to use tables to compute finite + field multiplies and inverses but where memory is scarse this + code might sometimes be better. But it only has effect during + initialisation so its pretty unimportant in overall terms. +*/ + +/* return 2 ^ (n - 1) where n is the bit number of the highest bit + set in x with x in the range 1 < x < 0x00000200. This form is + used so that locals within fi can be bytes rather than words +*/ + +static uint_8t hibit(const uint_32t x) +{ uint_8t r = (uint_8t)((x >> 1) | (x >> 2)); + + r |= (r >> 2); + r |= (r >> 4); + return (r + 1) >> 1; +} + +/* return the inverse of the finite field element x */ + +static uint_8t fi(const uint_8t x) +{ uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0; + + if(x < 2) return x; + + for(;;) + { + if(!n1) return v1; + + while(n2 >= n1) + { + n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2); + } + + if(!n2) return v2; + + while(n1 >= n2) + { + n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1); + } + } +} + +#endif + +/* The forward and inverse affine transformations used in the S-box */ + +#define fwd_affine(x) \ + (w = (uint_32t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(uint_8t)(w^(w>>8))) + +#define inv_affine(x) \ + (w = (uint_32t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(uint_8t)(w^(w>>8))) + +static int init = 0; + +#ifdef TC_WINDOWS_BOOT + +#pragma optimize ("l", on) +uint_8t aes_enc_tab[256][8]; +uint_8t aes_dec_tab[256][8]; + +#endif + +AES_RETURN aes_init(void) +{ uint_32t i, w; + +#ifdef TC_WINDOWS_BOOT + + if (init) + return EXIT_SUCCESS; + + for (i = 0; i < 256; ++i) + { + uint_8t x = fwd_affine(fi((uint_8t)i)); + aes_enc_tab[i][0] = 0; + aes_enc_tab[i][1] = x; + aes_enc_tab[i][2] = x; + aes_enc_tab[i][3] = f3(x); + aes_enc_tab[i][4] = f2(x); + aes_enc_tab[i][5] = x; + aes_enc_tab[i][6] = x; + aes_enc_tab[i][7] = f3(x); + + x = fi((uint_8t)inv_affine((uint_8t)i)); + aes_dec_tab[i][0] = fe(x); + aes_dec_tab[i][1] = f9(x); + aes_dec_tab[i][2] = fd(x); + aes_dec_tab[i][3] = fb(x); + aes_dec_tab[i][4] = fe(x); + aes_dec_tab[i][5] = f9(x); + aes_dec_tab[i][6] = fd(x); + aes_dec_tab[i][7] = x; + } + +#else // TC_WINDOWS_BOOT + +#if defined(FF_TABLES) + + uint_8t pow[512], log[256]; + + if(init) + return EXIT_SUCCESS; + /* log and power tables for GF(2^8) finite field with + WPOLY as modular polynomial - the simplest primitive + root is 0x03, used here to generate the tables + */ + + i = 0; w = 1; + do + { + pow[i] = (uint_8t)w; + pow[i + 255] = (uint_8t)w; + log[w] = (uint_8t)i++; + w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0); + } + while (w != 1); + +#else + if(init) + return EXIT_SUCCESS; +#endif + + for(i = 0, w = 1; i < RC_LENGTH; ++i) + { + t_set(r,c)[i] = bytes2word(w, 0, 0, 0); + w = f2(w); + } + + for(i = 0; i < 256; ++i) + { uint_8t b; + + b = fwd_affine(fi((uint_8t)i)); + w = bytes2word(f2(b), b, b, f3(b)); + +#if defined( SBX_SET ) + t_set(s,box)[i] = b; +#endif + +#if defined( FT1_SET ) /* tables for a normal encryption round */ + t_set(f,n)[i] = w; +#endif +#if defined( FT4_SET ) + t_set(f,n)[0][i] = w; + t_set(f,n)[1][i] = upr(w,1); + t_set(f,n)[2][i] = upr(w,2); + t_set(f,n)[3][i] = upr(w,3); +#endif + w = bytes2word(b, 0, 0, 0); + +#if defined( FL1_SET ) /* tables for last encryption round (may also */ + t_set(f,l)[i] = w; /* be used in the key schedule) */ +#endif +#if defined( FL4_SET ) + t_set(f,l)[0][i] = w; + t_set(f,l)[1][i] = upr(w,1); + t_set(f,l)[2][i] = upr(w,2); + t_set(f,l)[3][i] = upr(w,3); +#endif + +#if defined( LS1_SET ) /* table for key schedule if t_set(f,l) above is*/ + t_set(l,s)[i] = w; /* not of the required form */ +#endif +#if defined( LS4_SET ) + t_set(l,s)[0][i] = w; + t_set(l,s)[1][i] = upr(w,1); + t_set(l,s)[2][i] = upr(w,2); + t_set(l,s)[3][i] = upr(w,3); +#endif + + b = fi(inv_affine((uint_8t)i)); + w = bytes2word(fe(b), f9(b), fd(b), fb(b)); + +#if defined( IM1_SET ) /* tables for the inverse mix column operation */ + t_set(i,m)[b] = w; +#endif +#if defined( IM4_SET ) + t_set(i,m)[0][b] = w; + t_set(i,m)[1][b] = upr(w,1); + t_set(i,m)[2][b] = upr(w,2); + t_set(i,m)[3][b] = upr(w,3); +#endif + +#if defined( ISB_SET ) + t_set(i,box)[i] = b; +#endif +#if defined( IT1_SET ) /* tables for a normal decryption round */ + t_set(i,n)[i] = w; +#endif +#if defined( IT4_SET ) + t_set(i,n)[0][i] = w; + t_set(i,n)[1][i] = upr(w,1); + t_set(i,n)[2][i] = upr(w,2); + t_set(i,n)[3][i] = upr(w,3); +#endif + w = bytes2word(b, 0, 0, 0); +#if defined( IL1_SET ) /* tables for last decryption round */ + t_set(i,l)[i] = w; +#endif +#if defined( IL4_SET ) + t_set(i,l)[0][i] = w; + t_set(i,l)[1][i] = upr(w,1); + t_set(i,l)[2][i] = upr(w,2); + t_set(i,l)[3][i] = upr(w,3); +#endif + } + +#endif // TC_WINDOWS_BOOT + + init = 1; + return EXIT_SUCCESS; +} + +#endif + +#if defined(__cplusplus) +} +#endif + diff --git a/src/Crypto/Aestab.h b/src/Crypto/Aestab.h index 2ad1b034..e52e0057 100644 --- a/src/Crypto/Aestab.h +++ b/src/Crypto/Aestab.h @@ -1,174 +1,174 @@ -/* - --------------------------------------------------------------------------- - Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. - - LICENSE TERMS - - The free distribution and use of this software is allowed (with or without - changes) provided that: - - 1. source code distributions include the above copyright notice, this - list of conditions and the following disclaimer; - - 2. binary distributions include the above copyright notice, this list - of conditions and the following disclaimer in their documentation; - - 3. the name of the copyright holder is not used to endorse products - built using this software without specific written permission. - - DISCLAIMER - - This software is provided 'as is' with no explicit or implied warranties - in respect of its properties, including, but not limited to, correctness - and/or fitness for purpose. - --------------------------------------------------------------------------- - Issue Date: 20/12/2007 - - This file contains the code for declaring the tables needed to implement - AES. The file aesopt.h is assumed to be included before this header file. - If there are no global variables, the definitions here can be used to put - the AES tables in a structure so that a pointer can then be added to the - AES context to pass them to the AES routines that need them. If this - facility is used, the calling program has to ensure that this pointer is - managed appropriately. In particular, the value of the t_dec(in,it) item - in the table structure must be set to zero in order to ensure that the - tables are initialised. In practice the three code sequences in aeskey.c - that control the calls to aes_init() and the aes_init() routine itself will - have to be changed for a specific implementation. If global variables are - available it will generally be preferable to use them with the precomputed - FIXED_TABLES option that uses static global tables. - - The following defines can be used to control the way the tables - are defined, initialised and used in embedded environments that - require special features for these purposes - - the 't_dec' construction is used to declare fixed table arrays - the 't_set' construction is used to set fixed table values - the 't_use' construction is used to access fixed table values - - 256 byte tables: - - t_xxx(s,box) => forward S box - t_xxx(i,box) => inverse S box - - 256 32-bit word OR 4 x 256 32-bit word tables: - - t_xxx(f,n) => forward normal round - t_xxx(f,l) => forward last round - t_xxx(i,n) => inverse normal round - t_xxx(i,l) => inverse last round - t_xxx(l,s) => key schedule table - t_xxx(i,m) => key schedule table - - Other variables and tables: - - t_xxx(r,c) => the rcon table -*/ - -#if !defined( _AESTAB_H ) -#define _AESTAB_H - -#define t_dec(m,n) t_##m##n -#define t_set(m,n) t_##m##n -#define t_use(m,n) t_##m##n - -#if defined(FIXED_TABLES) -# if !defined( __GNUC__ ) && (defined( __MSDOS__ ) || defined( __WIN16__ )) -/* make tables far data to avoid using too much DGROUP space (PG) */ -# define CONST const far -# else -# define CONST const -# endif -#else -# define CONST -#endif - -#if defined(__cplusplus) -# define EXTERN extern "C" -#elif defined(DO_TABLES) -# define EXTERN -#else -# define EXTERN extern -#endif - -#if defined(_MSC_VER) && defined(TABLE_ALIGN) -#define ALIGN __declspec(align(TABLE_ALIGN)) -#else -#define ALIGN -#endif - -#if defined( __WATCOMC__ ) && ( __WATCOMC__ >= 1100 ) -# define XP_DIR __cdecl -#else -# define XP_DIR -#endif - -#if defined(DO_TABLES) && defined(FIXED_TABLES) -#define d_1(t,n,b,e) EXTERN ALIGN CONST XP_DIR t n[256] = b(e) -#define d_4(t,n,b,e,f,g,h) EXTERN ALIGN CONST XP_DIR t n[4][256] = { b(e), b(f), b(g), b(h) } -EXTERN ALIGN CONST uint_32t t_dec(r,c)[RC_LENGTH] = rc_data(w0); -#else -#define d_1(t,n,b,e) EXTERN ALIGN CONST XP_DIR t n[256] -#define d_4(t,n,b,e,f,g,h) EXTERN ALIGN CONST XP_DIR t n[4][256] -EXTERN ALIGN CONST uint_32t t_dec(r,c)[RC_LENGTH]; -#endif - -#if defined( SBX_SET ) - d_1(uint_8t, t_dec(s,box), sb_data, h0); -#endif -#if defined( ISB_SET ) - d_1(uint_8t, t_dec(i,box), isb_data, h0); -#endif - -#if defined( FT1_SET ) - d_1(uint_32t, t_dec(f,n), sb_data, u0); -#endif -#if defined( FT4_SET ) - d_4(uint_32t, t_dec(f,n), sb_data, u0, u1, u2, u3); -#endif - -#if defined( FL1_SET ) - d_1(uint_32t, t_dec(f,l), sb_data, w0); -#endif -#if defined( FL4_SET ) - d_4(uint_32t, t_dec(f,l), sb_data, w0, w1, w2, w3); -#endif - -#if defined( IT1_SET ) - d_1(uint_32t, t_dec(i,n), isb_data, v0); -#endif -#if defined( IT4_SET ) - d_4(uint_32t, t_dec(i,n), isb_data, v0, v1, v2, v3); -#endif - -#if defined( IL1_SET ) - d_1(uint_32t, t_dec(i,l), isb_data, w0); -#endif -#if defined( IL4_SET ) - d_4(uint_32t, t_dec(i,l), isb_data, w0, w1, w2, w3); -#endif - -#if defined( LS1_SET ) -#if defined( FL1_SET ) -#undef LS1_SET -#else - d_1(uint_32t, t_dec(l,s), sb_data, w0); -#endif -#endif - -#if defined( LS4_SET ) -#if defined( FL4_SET ) -#undef LS4_SET -#else - d_4(uint_32t, t_dec(l,s), sb_data, w0, w1, w2, w3); -#endif -#endif - -#if defined( IM1_SET ) - d_1(uint_32t, t_dec(i,m), mm_data, v0); -#endif -#if defined( IM4_SET ) - d_4(uint_32t, t_dec(i,m), mm_data, v0, v1, v2, v3); -#endif - -#endif +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + + This file contains the code for declaring the tables needed to implement + AES. The file aesopt.h is assumed to be included before this header file. + If there are no global variables, the definitions here can be used to put + the AES tables in a structure so that a pointer can then be added to the + AES context to pass them to the AES routines that need them. If this + facility is used, the calling program has to ensure that this pointer is + managed appropriately. In particular, the value of the t_dec(in,it) item + in the table structure must be set to zero in order to ensure that the + tables are initialised. In practice the three code sequences in aeskey.c + that control the calls to aes_init() and the aes_init() routine itself will + have to be changed for a specific implementation. If global variables are + available it will generally be preferable to use them with the precomputed + FIXED_TABLES option that uses static global tables. + + The following defines can be used to control the way the tables + are defined, initialised and used in embedded environments that + require special features for these purposes + + the 't_dec' construction is used to declare fixed table arrays + the 't_set' construction is used to set fixed table values + the 't_use' construction is used to access fixed table values + + 256 byte tables: + + t_xxx(s,box) => forward S box + t_xxx(i,box) => inverse S box + + 256 32-bit word OR 4 x 256 32-bit word tables: + + t_xxx(f,n) => forward normal round + t_xxx(f,l) => forward last round + t_xxx(i,n) => inverse normal round + t_xxx(i,l) => inverse last round + t_xxx(l,s) => key schedule table + t_xxx(i,m) => key schedule table + + Other variables and tables: + + t_xxx(r,c) => the rcon table +*/ + +#if !defined( _AESTAB_H ) +#define _AESTAB_H + +#define t_dec(m,n) t_##m##n +#define t_set(m,n) t_##m##n +#define t_use(m,n) t_##m##n + +#if defined(FIXED_TABLES) +# if !defined( __GNUC__ ) && (defined( __MSDOS__ ) || defined( __WIN16__ )) +/* make tables far data to avoid using too much DGROUP space (PG) */ +# define CONST const far +# else +# define CONST const +# endif +#else +# define CONST +#endif + +#if defined(__cplusplus) +# define EXTERN extern "C" +#elif defined(DO_TABLES) +# define EXTERN +#else +# define EXTERN extern +#endif + +#if defined(_MSC_VER) && defined(TABLE_ALIGN) +#define ALIGN __declspec(align(TABLE_ALIGN)) +#else +#define ALIGN +#endif + +#if defined( __WATCOMC__ ) && ( __WATCOMC__ >= 1100 ) +# define XP_DIR __cdecl +#else +# define XP_DIR +#endif + +#if defined(DO_TABLES) && defined(FIXED_TABLES) +#define d_1(t,n,b,e) EXTERN ALIGN CONST XP_DIR t n[256] = b(e) +#define d_4(t,n,b,e,f,g,h) EXTERN ALIGN CONST XP_DIR t n[4][256] = { b(e), b(f), b(g), b(h) } +EXTERN ALIGN CONST uint_32t t_dec(r,c)[RC_LENGTH] = rc_data(w0); +#else +#define d_1(t,n,b,e) EXTERN ALIGN CONST XP_DIR t n[256] +#define d_4(t,n,b,e,f,g,h) EXTERN ALIGN CONST XP_DIR t n[4][256] +EXTERN ALIGN CONST uint_32t t_dec(r,c)[RC_LENGTH]; +#endif + +#if defined( SBX_SET ) + d_1(uint_8t, t_dec(s,box), sb_data, h0); +#endif +#if defined( ISB_SET ) + d_1(uint_8t, t_dec(i,box), isb_data, h0); +#endif + +#if defined( FT1_SET ) + d_1(uint_32t, t_dec(f,n), sb_data, u0); +#endif +#if defined( FT4_SET ) + d_4(uint_32t, t_dec(f,n), sb_data, u0, u1, u2, u3); +#endif + +#if defined( FL1_SET ) + d_1(uint_32t, t_dec(f,l), sb_data, w0); +#endif +#if defined( FL4_SET ) + d_4(uint_32t, t_dec(f,l), sb_data, w0, w1, w2, w3); +#endif + +#if defined( IT1_SET ) + d_1(uint_32t, t_dec(i,n), isb_data, v0); +#endif +#if defined( IT4_SET ) + d_4(uint_32t, t_dec(i,n), isb_data, v0, v1, v2, v3); +#endif + +#if defined( IL1_SET ) + d_1(uint_32t, t_dec(i,l), isb_data, w0); +#endif +#if defined( IL4_SET ) + d_4(uint_32t, t_dec(i,l), isb_data, w0, w1, w2, w3); +#endif + +#if defined( LS1_SET ) +#if defined( FL1_SET ) +#undef LS1_SET +#else + d_1(uint_32t, t_dec(l,s), sb_data, w0); +#endif +#endif + +#if defined( LS4_SET ) +#if defined( FL4_SET ) +#undef LS4_SET +#else + d_4(uint_32t, t_dec(l,s), sb_data, w0, w1, w2, w3); +#endif +#endif + +#if defined( IM1_SET ) + d_1(uint_32t, t_dec(i,m), mm_data, v0); +#endif +#if defined( IM4_SET ) + d_4(uint_32t, t_dec(i,m), mm_data, v0, v1, v2, v3); +#endif + +#endif diff --git a/src/Crypto/Crypto.vcproj b/src/Crypto/Crypto.vcproj index 24b012c5..50f67a11 100644 --- a/src/Crypto/Crypto.vcproj +++ b/src/Crypto/Crypto.vcproj @@ -1,517 +1,517 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/Crypto/Makefile b/src/Crypto/Makefile index 53b9a3d6..5acbbd24 100644 --- a/src/Crypto/Makefile +++ b/src/Crypto/Makefile @@ -1 +1 @@ -!INCLUDE $(NTMAKEENV)\makefile.def +!INCLUDE $(NTMAKEENV)\makefile.def diff --git a/src/Crypto/Makefile.inc b/src/Crypto/Makefile.inc index 51c4f46d..955f2a76 100644 --- a/src/Crypto/Makefile.inc +++ b/src/Crypto/Makefile.inc @@ -1,15 +1,15 @@ -TC_ASFLAGS = -Xvc -Ox - -!if "$(TC_ARCH)" == "x86" -TC_ASFLAGS = $(TC_ASFLAGS) -f win32 --prefix _ -D MS_STDCALL -D DLL_EXPORT -!else -TC_ASFLAGS = $(TC_ASFLAGS) -f win64 -!endif - -TC_ASM_ERR_LOG = ..\Driver\build_errors_asm.log - -"$(OBJ_PATH)\$(O)\Aes_$(TC_ARCH).obj": Aes_$(TC_ARCH).asm - nasm.exe $(TC_ASFLAGS) -o "$@" -l "$(OBJ_PATH)\$(O)\Aes_$(TC_ARCH).lst" Aes_$(TC_ARCH).asm 2>$(TC_ASM_ERR_LOG) - -"$(OBJ_PATH)\$(O)\Aes_hw_cpu.obj": Aes_hw_cpu.asm - nasm.exe $(TC_ASFLAGS) -o "$@" -l "$(OBJ_PATH)\$(O)\Aes_hw_cpu.lst" Aes_hw_cpu.asm 2>$(TC_ASM_ERR_LOG) +TC_ASFLAGS = -Xvc -Ox + +!if "$(TC_ARCH)" == "x86" +TC_ASFLAGS = $(TC_ASFLAGS) -f win32 --prefix _ -D MS_STDCALL -D DLL_EXPORT +!else +TC_ASFLAGS = $(TC_ASFLAGS) -f win64 +!endif + +TC_ASM_ERR_LOG = ..\Driver\build_errors_asm.log + +"$(OBJ_PATH)\$(O)\Aes_$(TC_ARCH).obj": Aes_$(TC_ARCH).asm + nasm.exe $(TC_ASFLAGS) -o "$@" -l "$(OBJ_PATH)\$(O)\Aes_$(TC_ARCH).lst" Aes_$(TC_ARCH).asm 2>$(TC_ASM_ERR_LOG) + +"$(OBJ_PATH)\$(O)\Aes_hw_cpu.obj": Aes_hw_cpu.asm + nasm.exe $(TC_ASFLAGS) -o "$@" -l "$(OBJ_PATH)\$(O)\Aes_hw_cpu.lst" Aes_hw_cpu.asm 2>$(TC_ASM_ERR_LOG) diff --git a/src/Crypto/Rmd160.c b/src/Crypto/Rmd160.c index f94f5e08..75a34c3e 100644 --- a/src/Crypto/Rmd160.c +++ b/src/Crypto/Rmd160.c @@ -1,498 +1,498 @@ -// RIPEMD-160 written and placed in the public domain by Wei Dai - -/* - * This code implements the MD4 message-digest algorithm. - * The algorithm is due to Ron Rivest. This code was - * written by Colin Plumb in 1993, no copyright is claimed. - * This code is in the public domain; do with it what you wish. - */ - -/* Adapted for TrueCrypt */ -/* Adapted for VeraCrypt */ - -#include -#include "Common/Tcdefs.h" -#include "Common/Endian.h" -#include "Rmd160.h" - -#define F(x, y, z) (x ^ y ^ z) -#define G(x, y, z) (z ^ (x & (y^z))) -#define H(x, y, z) (z ^ (x | ~y)) -#define I(x, y, z) (y ^ (z & (x^y))) -#define J(x, y, z) (x ^ (y | ~z)) - -#define PUT_64BIT_LE(cp, value) do { \ - (cp)[7] = (byte) ((value) >> 56); \ - (cp)[6] = (byte) ((value) >> 48); \ - (cp)[5] = (byte) ((value) >> 40); \ - (cp)[4] = (byte) ((value) >> 32); \ - (cp)[3] = (byte) ((value) >> 24); \ - (cp)[2] = (byte) ((value) >> 16); \ - (cp)[1] = (byte) ((value) >> 8); \ - (cp)[0] = (byte) (value); } while (0) - -#define PUT_32BIT_LE(cp, value) do { \ - (cp)[3] = (byte) ((value) >> 24); \ - (cp)[2] = (byte) ((value) >> 16); \ - (cp)[1] = (byte) ((value) >> 8); \ - (cp)[0] = (byte) (value); } while (0) - -#ifndef TC_MINIMIZE_CODE_SIZE - -static byte PADDING[64] = { - 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -#else - -static byte PADDING[64]; - -#endif - -void RMD160Init (RMD160_CTX *ctx) -{ - ctx->count = 0; - ctx->state[0] = 0x67452301; - ctx->state[1] = 0xefcdab89; - ctx->state[2] = 0x98badcfe; - ctx->state[3] = 0x10325476; - ctx->state[4] = 0xc3d2e1f0; - PADDING[0] = 0x80; -} - -/* -* Update context to reflect the concatenation of another buffer full -* of bytes. -*/ -void RMD160Update (RMD160_CTX *ctx, const unsigned char *input, unsigned __int32 lenArg) -{ -#ifndef TC_WINDOWS_BOOT - uint64 len = lenArg; -#else - uint32 len = lenArg; -#endif - unsigned int have, need; - - /* Check how many bytes we already have and how many more we need. */ - have = (unsigned int) ((ctx->count) & (RIPEMD160_BLOCK_LENGTH - 1)); - need = RIPEMD160_BLOCK_LENGTH - have; - - /* Update bitcount */ - ctx->count += len; - - if (len >= need) { - if (have != 0) { - memcpy (ctx->buffer + have, input, (size_t) need); - RMD160Transform ((uint32 *) ctx->state, (const uint32 *) ctx->buffer); - input += need; - len -= need; - have = 0; - } - - /* Process data in RIPEMD160_BLOCK_LENGTH-byte chunks. */ - while (len >= RIPEMD160_BLOCK_LENGTH) { - RMD160Transform ((uint32 *) ctx->state, (const uint32 *) input); - input += RIPEMD160_BLOCK_LENGTH; - len -= RIPEMD160_BLOCK_LENGTH; - } - } - - /* Handle any remaining bytes of data. */ - if (len != 0) - memcpy (ctx->buffer + have, input, (size_t) len); -} - -/* -* Pad pad to 64-byte boundary with the bit pattern -* 1 0* (64-bit count of bits processed, MSB-first) -*/ -static void RMD160Pad(RMD160_CTX *ctx) -{ - byte count[8]; - uint32 padlen; - - /* Convert count to 8 bytes in little endian order. */ - -#ifndef TC_WINDOWS_BOOT - uint64 bitcount = ctx->count << 3; - PUT_64BIT_LE(count, bitcount); -#else - *(uint32 *) (count + 4) = 0; - *(uint32 *) (count + 0) = ctx->count << 3; -#endif - - /* Pad out to 56 mod 64. */ - padlen = RIPEMD160_BLOCK_LENGTH - - (uint32)((ctx->count) & (RIPEMD160_BLOCK_LENGTH - 1)); - if (padlen < 1 + 8) - padlen += RIPEMD160_BLOCK_LENGTH; - RMD160Update(ctx, PADDING, padlen - 8); /* padlen - 8 <= 64 */ - RMD160Update(ctx, count, 8); -} - -/* -* Final wrapup--call RMD160Pad, fill in digest and zero out ctx. -*/ -void RMD160Final(unsigned char *digest, RMD160_CTX *ctx) -{ - int i; - - RMD160Pad(ctx); - if (digest) { - for (i = 0; i < 5; i++) - PUT_32BIT_LE(digest + i * 4, ctx->state[i]); -#ifndef TC_WINDOWS_BOOT - burn (ctx, sizeof(*ctx)); -#endif - } -} - - -#ifndef TC_MINIMIZE_CODE_SIZE - -#define word32 unsigned __int32 - -#define k0 0 -#define k1 0x5a827999UL -#define k2 0x6ed9eba1UL -#define k3 0x8f1bbcdcUL -#define k4 0xa953fd4eUL -#define k5 0x50a28be6UL -#define k6 0x5c4dd124UL -#define k7 0x6d703ef3UL -#define k8 0x7a6d76e9UL -#define k9 0 - -static word32 rotlFixed (word32 x, unsigned int y) -{ - return (word32)((x<>(sizeof(word32)*8-y))); -} - -#define Subround(f, a, b, c, d, e, x, s, k) \ - a += f(b, c, d) + x + k;\ - a = rotlFixed((word32)a, s) + e;\ - c = rotlFixed((word32)c, 10U) - -void RMD160Transform (unsigned __int32 *digest, const unsigned __int32 *data) -{ -#if BYTE_ORDER == LITTLE_ENDIAN - const word32 *X = data; -#else - word32 X[16]; - int i; -#endif - - word32 a1, b1, c1, d1, e1, a2, b2, c2, d2, e2; - a1 = a2 = digest[0]; - b1 = b2 = digest[1]; - c1 = c2 = digest[2]; - d1 = d2 = digest[3]; - e1 = e2 = digest[4]; - -#if BYTE_ORDER == BIG_ENDIAN - for (i = 0; i < 16; i++) - { - X[i] = LE32 (data[i]); - } -#endif - - Subround(F, a1, b1, c1, d1, e1, X[ 0], 11, k0); - Subround(F, e1, a1, b1, c1, d1, X[ 1], 14, k0); - Subround(F, d1, e1, a1, b1, c1, X[ 2], 15, k0); - Subround(F, c1, d1, e1, a1, b1, X[ 3], 12, k0); - Subround(F, b1, c1, d1, e1, a1, X[ 4], 5, k0); - Subround(F, a1, b1, c1, d1, e1, X[ 5], 8, k0); - Subround(F, e1, a1, b1, c1, d1, X[ 6], 7, k0); - Subround(F, d1, e1, a1, b1, c1, X[ 7], 9, k0); - Subround(F, c1, d1, e1, a1, b1, X[ 8], 11, k0); - Subround(F, b1, c1, d1, e1, a1, X[ 9], 13, k0); - Subround(F, a1, b1, c1, d1, e1, X[10], 14, k0); - Subround(F, e1, a1, b1, c1, d1, X[11], 15, k0); - Subround(F, d1, e1, a1, b1, c1, X[12], 6, k0); - Subround(F, c1, d1, e1, a1, b1, X[13], 7, k0); - Subround(F, b1, c1, d1, e1, a1, X[14], 9, k0); - Subround(F, a1, b1, c1, d1, e1, X[15], 8, k0); - - Subround(G, e1, a1, b1, c1, d1, X[ 7], 7, k1); - Subround(G, d1, e1, a1, b1, c1, X[ 4], 6, k1); - Subround(G, c1, d1, e1, a1, b1, X[13], 8, k1); - Subround(G, b1, c1, d1, e1, a1, X[ 1], 13, k1); - Subround(G, a1, b1, c1, d1, e1, X[10], 11, k1); - Subround(G, e1, a1, b1, c1, d1, X[ 6], 9, k1); - Subround(G, d1, e1, a1, b1, c1, X[15], 7, k1); - Subround(G, c1, d1, e1, a1, b1, X[ 3], 15, k1); - Subround(G, b1, c1, d1, e1, a1, X[12], 7, k1); - Subround(G, a1, b1, c1, d1, e1, X[ 0], 12, k1); - Subround(G, e1, a1, b1, c1, d1, X[ 9], 15, k1); - Subround(G, d1, e1, a1, b1, c1, X[ 5], 9, k1); - Subround(G, c1, d1, e1, a1, b1, X[ 2], 11, k1); - Subround(G, b1, c1, d1, e1, a1, X[14], 7, k1); - Subround(G, a1, b1, c1, d1, e1, X[11], 13, k1); - Subround(G, e1, a1, b1, c1, d1, X[ 8], 12, k1); - - Subround(H, d1, e1, a1, b1, c1, X[ 3], 11, k2); - Subround(H, c1, d1, e1, a1, b1, X[10], 13, k2); - Subround(H, b1, c1, d1, e1, a1, X[14], 6, k2); - Subround(H, a1, b1, c1, d1, e1, X[ 4], 7, k2); - Subround(H, e1, a1, b1, c1, d1, X[ 9], 14, k2); - Subround(H, d1, e1, a1, b1, c1, X[15], 9, k2); - Subround(H, c1, d1, e1, a1, b1, X[ 8], 13, k2); - Subround(H, b1, c1, d1, e1, a1, X[ 1], 15, k2); - Subround(H, a1, b1, c1, d1, e1, X[ 2], 14, k2); - Subround(H, e1, a1, b1, c1, d1, X[ 7], 8, k2); - Subround(H, d1, e1, a1, b1, c1, X[ 0], 13, k2); - Subround(H, c1, d1, e1, a1, b1, X[ 6], 6, k2); - Subround(H, b1, c1, d1, e1, a1, X[13], 5, k2); - Subround(H, a1, b1, c1, d1, e1, X[11], 12, k2); - Subround(H, e1, a1, b1, c1, d1, X[ 5], 7, k2); - Subround(H, d1, e1, a1, b1, c1, X[12], 5, k2); - - Subround(I, c1, d1, e1, a1, b1, X[ 1], 11, k3); - Subround(I, b1, c1, d1, e1, a1, X[ 9], 12, k3); - Subround(I, a1, b1, c1, d1, e1, X[11], 14, k3); - Subround(I, e1, a1, b1, c1, d1, X[10], 15, k3); - Subround(I, d1, e1, a1, b1, c1, X[ 0], 14, k3); - Subround(I, c1, d1, e1, a1, b1, X[ 8], 15, k3); - Subround(I, b1, c1, d1, e1, a1, X[12], 9, k3); - Subround(I, a1, b1, c1, d1, e1, X[ 4], 8, k3); - Subround(I, e1, a1, b1, c1, d1, X[13], 9, k3); - Subround(I, d1, e1, a1, b1, c1, X[ 3], 14, k3); - Subround(I, c1, d1, e1, a1, b1, X[ 7], 5, k3); - Subround(I, b1, c1, d1, e1, a1, X[15], 6, k3); - Subround(I, a1, b1, c1, d1, e1, X[14], 8, k3); - Subround(I, e1, a1, b1, c1, d1, X[ 5], 6, k3); - Subround(I, d1, e1, a1, b1, c1, X[ 6], 5, k3); - Subround(I, c1, d1, e1, a1, b1, X[ 2], 12, k3); - - Subround(J, b1, c1, d1, e1, a1, X[ 4], 9, k4); - Subround(J, a1, b1, c1, d1, e1, X[ 0], 15, k4); - Subround(J, e1, a1, b1, c1, d1, X[ 5], 5, k4); - Subround(J, d1, e1, a1, b1, c1, X[ 9], 11, k4); - Subround(J, c1, d1, e1, a1, b1, X[ 7], 6, k4); - Subround(J, b1, c1, d1, e1, a1, X[12], 8, k4); - Subround(J, a1, b1, c1, d1, e1, X[ 2], 13, k4); - Subround(J, e1, a1, b1, c1, d1, X[10], 12, k4); - Subround(J, d1, e1, a1, b1, c1, X[14], 5, k4); - Subround(J, c1, d1, e1, a1, b1, X[ 1], 12, k4); - Subround(J, b1, c1, d1, e1, a1, X[ 3], 13, k4); - Subround(J, a1, b1, c1, d1, e1, X[ 8], 14, k4); - Subround(J, e1, a1, b1, c1, d1, X[11], 11, k4); - Subround(J, d1, e1, a1, b1, c1, X[ 6], 8, k4); - Subround(J, c1, d1, e1, a1, b1, X[15], 5, k4); - Subround(J, b1, c1, d1, e1, a1, X[13], 6, k4); - - Subround(J, a2, b2, c2, d2, e2, X[ 5], 8, k5); - Subround(J, e2, a2, b2, c2, d2, X[14], 9, k5); - Subround(J, d2, e2, a2, b2, c2, X[ 7], 9, k5); - Subround(J, c2, d2, e2, a2, b2, X[ 0], 11, k5); - Subround(J, b2, c2, d2, e2, a2, X[ 9], 13, k5); - Subround(J, a2, b2, c2, d2, e2, X[ 2], 15, k5); - Subround(J, e2, a2, b2, c2, d2, X[11], 15, k5); - Subround(J, d2, e2, a2, b2, c2, X[ 4], 5, k5); - Subround(J, c2, d2, e2, a2, b2, X[13], 7, k5); - Subround(J, b2, c2, d2, e2, a2, X[ 6], 7, k5); - Subround(J, a2, b2, c2, d2, e2, X[15], 8, k5); - Subround(J, e2, a2, b2, c2, d2, X[ 8], 11, k5); - Subround(J, d2, e2, a2, b2, c2, X[ 1], 14, k5); - Subround(J, c2, d2, e2, a2, b2, X[10], 14, k5); - Subround(J, b2, c2, d2, e2, a2, X[ 3], 12, k5); - Subround(J, a2, b2, c2, d2, e2, X[12], 6, k5); - - Subround(I, e2, a2, b2, c2, d2, X[ 6], 9, k6); - Subround(I, d2, e2, a2, b2, c2, X[11], 13, k6); - Subround(I, c2, d2, e2, a2, b2, X[ 3], 15, k6); - Subround(I, b2, c2, d2, e2, a2, X[ 7], 7, k6); - Subround(I, a2, b2, c2, d2, e2, X[ 0], 12, k6); - Subround(I, e2, a2, b2, c2, d2, X[13], 8, k6); - Subround(I, d2, e2, a2, b2, c2, X[ 5], 9, k6); - Subround(I, c2, d2, e2, a2, b2, X[10], 11, k6); - Subround(I, b2, c2, d2, e2, a2, X[14], 7, k6); - Subround(I, a2, b2, c2, d2, e2, X[15], 7, k6); - Subround(I, e2, a2, b2, c2, d2, X[ 8], 12, k6); - Subround(I, d2, e2, a2, b2, c2, X[12], 7, k6); - Subround(I, c2, d2, e2, a2, b2, X[ 4], 6, k6); - Subround(I, b2, c2, d2, e2, a2, X[ 9], 15, k6); - Subround(I, a2, b2, c2, d2, e2, X[ 1], 13, k6); - Subround(I, e2, a2, b2, c2, d2, X[ 2], 11, k6); - - Subround(H, d2, e2, a2, b2, c2, X[15], 9, k7); - Subround(H, c2, d2, e2, a2, b2, X[ 5], 7, k7); - Subround(H, b2, c2, d2, e2, a2, X[ 1], 15, k7); - Subround(H, a2, b2, c2, d2, e2, X[ 3], 11, k7); - Subround(H, e2, a2, b2, c2, d2, X[ 7], 8, k7); - Subround(H, d2, e2, a2, b2, c2, X[14], 6, k7); - Subround(H, c2, d2, e2, a2, b2, X[ 6], 6, k7); - Subround(H, b2, c2, d2, e2, a2, X[ 9], 14, k7); - Subround(H, a2, b2, c2, d2, e2, X[11], 12, k7); - Subround(H, e2, a2, b2, c2, d2, X[ 8], 13, k7); - Subround(H, d2, e2, a2, b2, c2, X[12], 5, k7); - Subround(H, c2, d2, e2, a2, b2, X[ 2], 14, k7); - Subround(H, b2, c2, d2, e2, a2, X[10], 13, k7); - Subround(H, a2, b2, c2, d2, e2, X[ 0], 13, k7); - Subround(H, e2, a2, b2, c2, d2, X[ 4], 7, k7); - Subround(H, d2, e2, a2, b2, c2, X[13], 5, k7); - - Subround(G, c2, d2, e2, a2, b2, X[ 8], 15, k8); - Subround(G, b2, c2, d2, e2, a2, X[ 6], 5, k8); - Subround(G, a2, b2, c2, d2, e2, X[ 4], 8, k8); - Subround(G, e2, a2, b2, c2, d2, X[ 1], 11, k8); - Subround(G, d2, e2, a2, b2, c2, X[ 3], 14, k8); - Subround(G, c2, d2, e2, a2, b2, X[11], 14, k8); - Subround(G, b2, c2, d2, e2, a2, X[15], 6, k8); - Subround(G, a2, b2, c2, d2, e2, X[ 0], 14, k8); - Subround(G, e2, a2, b2, c2, d2, X[ 5], 6, k8); - Subround(G, d2, e2, a2, b2, c2, X[12], 9, k8); - Subround(G, c2, d2, e2, a2, b2, X[ 2], 12, k8); - Subround(G, b2, c2, d2, e2, a2, X[13], 9, k8); - Subround(G, a2, b2, c2, d2, e2, X[ 9], 12, k8); - Subround(G, e2, a2, b2, c2, d2, X[ 7], 5, k8); - Subround(G, d2, e2, a2, b2, c2, X[10], 15, k8); - Subround(G, c2, d2, e2, a2, b2, X[14], 8, k8); - - Subround(F, b2, c2, d2, e2, a2, X[12], 8, k9); - Subround(F, a2, b2, c2, d2, e2, X[15], 5, k9); - Subround(F, e2, a2, b2, c2, d2, X[10], 12, k9); - Subround(F, d2, e2, a2, b2, c2, X[ 4], 9, k9); - Subround(F, c2, d2, e2, a2, b2, X[ 1], 12, k9); - Subround(F, b2, c2, d2, e2, a2, X[ 5], 5, k9); - Subround(F, a2, b2, c2, d2, e2, X[ 8], 14, k9); - Subround(F, e2, a2, b2, c2, d2, X[ 7], 6, k9); - Subround(F, d2, e2, a2, b2, c2, X[ 6], 8, k9); - Subround(F, c2, d2, e2, a2, b2, X[ 2], 13, k9); - Subround(F, b2, c2, d2, e2, a2, X[13], 6, k9); - Subround(F, a2, b2, c2, d2, e2, X[14], 5, k9); - Subround(F, e2, a2, b2, c2, d2, X[ 0], 15, k9); - Subround(F, d2, e2, a2, b2, c2, X[ 3], 13, k9); - Subround(F, c2, d2, e2, a2, b2, X[ 9], 11, k9); - Subround(F, b2, c2, d2, e2, a2, X[11], 11, k9); - - c1 = digest[1] + c1 + d2; - digest[1] = digest[2] + d1 + e2; - digest[2] = digest[3] + e1 + a2; - digest[3] = digest[4] + a1 + b2; - digest[4] = digest[0] + b1 + c2; - digest[0] = c1; -} - -#else // TC_MINIMIZE_CODE_SIZE - -/* - Derived from source code of TrueCrypt 7.1a, which is - Copyright (c) 2008-2012 TrueCrypt Developers Association and which is governed - by the TrueCrypt License 3.0. - - Modifications and additions to the original source code (contained in this file) - and all other portions of this file are Copyright (c) 2013-2016 IDRIX - and are governed by the Apache License 2.0 the full text of which is - contained in the file License.txt included in VeraCrypt binary and source - code distribution packages. -*/ - -#pragma optimize ("tl", on) - -typedef unsigned __int32 uint32; -typedef unsigned __int8 byte; - -#include -#pragma intrinsic (_lrotl) - -static const byte OrderTab[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 7, 4, 13, 1, 10, 6, 15, 3, 12, 0, 9, 5, 2, 14, 11, 8, - 3, 10, 14, 4, 9, 15, 8, 1, 2, 7, 0, 6, 13, 11, 5, 12, - 1, 9, 11, 10, 0, 8, 12, 4, 13, 3, 7, 15, 14, 5, 6, 2, - 4, 0, 5, 9, 7, 12, 2, 10, 14, 1, 3, 8, 11, 6, 15, 13, - 5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12, - 6, 11, 3, 7, 0, 13, 5, 10, 14, 15, 8, 12, 4, 9, 1, 2, - 15, 5, 1, 3, 7, 14, 6, 9, 11, 8, 12, 2, 10, 0, 4, 13, - 8, 6, 4, 1, 3, 11, 15, 0, 5, 12, 2, 13, 9, 7, 10, 14, - 12, 15, 10, 4, 1, 5, 8, 7, 6, 2, 13, 14, 0, 3, 9, 11 -}; - -static const byte RolTab[] = { - 11, 14, 15, 12, 5, 8, 7, 9, 11, 13, 14, 15, 6, 7, 9, 8, - 7, 6, 8, 13, 11, 9, 7, 15, 7, 12, 15, 9, 11, 7, 13, 12, - 11, 13, 6, 7, 14, 9, 13, 15, 14, 8, 13, 6, 5, 12, 7, 5, - 11, 12, 14, 15, 14, 15, 9, 8, 9, 14, 5, 6, 8, 6, 5, 12, - 9, 15, 5, 11, 6, 8, 13, 12, 5, 12, 13, 14, 11, 8, 5, 6, - 8, 9, 9, 11, 13, 15, 15, 5, 7, 7, 8, 11, 14, 14, 12, 6, - 9, 13, 15, 7, 12, 8, 9, 11, 7, 7, 12, 7, 6, 15, 13, 11, - 9, 7, 15, 11, 8, 6, 6, 14, 12, 13, 5, 14, 13, 13, 7, 5, - 15, 5, 8, 11, 14, 14, 6, 14, 6, 9, 12, 9, 12, 5, 15, 8, - 8, 5, 12, 9, 12, 5, 14, 6, 8, 13, 6, 5, 15, 13, 11, 11 -}; - -static const uint32 KTab[] = { - 0x00000000UL, - 0x5A827999UL, - 0x6ED9EBA1UL, - 0x8F1BBCDCUL, - 0xA953FD4EUL, - 0x50A28BE6UL, - 0x5C4DD124UL, - 0x6D703EF3UL, - 0x7A6D76E9UL, - 0x00000000UL -}; - - -void RMD160Transform (unsigned __int32 *state, const unsigned __int32 *data) -{ - uint32 a, b, c, d, e; - uint32 a2, b2, c2, d2, e2; - byte pos; - uint32 tmp; - - a = state[0]; - b = state[1]; - c = state[2]; - d = state[3]; - e = state[4]; - - for (pos = 0; pos < 160; ++pos) - { - tmp = a + data[OrderTab[pos]] + KTab[pos >> 4]; - - switch (pos >> 4) - { - case 0: case 9: tmp += F (b, c, d); break; - case 1: case 8: tmp += G (b, c, d); break; - case 2: case 7: tmp += H (b, c, d); break; - case 3: case 6: tmp += I (b, c, d); break; - case 4: case 5: tmp += J (b, c, d); break; - } - - tmp = _lrotl (tmp, RolTab[pos]) + e; - a = e; - e = d; - d = _lrotl (c, 10); - c = b; - b = tmp; - - if (pos == 79) - { - a2 = a; - b2 = b; - c2 = c; - d2 = d; - e2 = e; - - a = state[0]; - b = state[1]; - c = state[2]; - d = state[3]; - e = state[4]; - } - } - - tmp = state[1] + c2 + d; - state[1] = state[2] + d2 + e; - state[2] = state[3] + e2 + a; - state[3] = state[4] + a2 + b; - state[4] = state[0] + b2 + c; - state[0] = tmp; -} - -#endif // TC_MINIMIZE_CODE_SIZE +// RIPEMD-160 written and placed in the public domain by Wei Dai + +/* + * This code implements the MD4 message-digest algorithm. + * The algorithm is due to Ron Rivest. This code was + * written by Colin Plumb in 1993, no copyright is claimed. + * This code is in the public domain; do with it what you wish. + */ + +/* Adapted for TrueCrypt */ +/* Adapted for VeraCrypt */ + +#include +#include "Common/Tcdefs.h" +#include "Common/Endian.h" +#include "Rmd160.h" + +#define F(x, y, z) (x ^ y ^ z) +#define G(x, y, z) (z ^ (x & (y^z))) +#define H(x, y, z) (z ^ (x | ~y)) +#define I(x, y, z) (y ^ (z & (x^y))) +#define J(x, y, z) (x ^ (y | ~z)) + +#define PUT_64BIT_LE(cp, value) do { \ + (cp)[7] = (byte) ((value) >> 56); \ + (cp)[6] = (byte) ((value) >> 48); \ + (cp)[5] = (byte) ((value) >> 40); \ + (cp)[4] = (byte) ((value) >> 32); \ + (cp)[3] = (byte) ((value) >> 24); \ + (cp)[2] = (byte) ((value) >> 16); \ + (cp)[1] = (byte) ((value) >> 8); \ + (cp)[0] = (byte) (value); } while (0) + +#define PUT_32BIT_LE(cp, value) do { \ + (cp)[3] = (byte) ((value) >> 24); \ + (cp)[2] = (byte) ((value) >> 16); \ + (cp)[1] = (byte) ((value) >> 8); \ + (cp)[0] = (byte) (value); } while (0) + +#ifndef TC_MINIMIZE_CODE_SIZE + +static byte PADDING[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +#else + +static byte PADDING[64]; + +#endif + +void RMD160Init (RMD160_CTX *ctx) +{ + ctx->count = 0; + ctx->state[0] = 0x67452301; + ctx->state[1] = 0xefcdab89; + ctx->state[2] = 0x98badcfe; + ctx->state[3] = 0x10325476; + ctx->state[4] = 0xc3d2e1f0; + PADDING[0] = 0x80; +} + +/* +* Update context to reflect the concatenation of another buffer full +* of bytes. +*/ +void RMD160Update (RMD160_CTX *ctx, const unsigned char *input, unsigned __int32 lenArg) +{ +#ifndef TC_WINDOWS_BOOT + uint64 len = lenArg; +#else + uint32 len = lenArg; +#endif + unsigned int have, need; + + /* Check how many bytes we already have and how many more we need. */ + have = (unsigned int) ((ctx->count) & (RIPEMD160_BLOCK_LENGTH - 1)); + need = RIPEMD160_BLOCK_LENGTH - have; + + /* Update bitcount */ + ctx->count += len; + + if (len >= need) { + if (have != 0) { + memcpy (ctx->buffer + have, input, (size_t) need); + RMD160Transform ((uint32 *) ctx->state, (const uint32 *) ctx->buffer); + input += need; + len -= need; + have = 0; + } + + /* Process data in RIPEMD160_BLOCK_LENGTH-byte chunks. */ + while (len >= RIPEMD160_BLOCK_LENGTH) { + RMD160Transform ((uint32 *) ctx->state, (const uint32 *) input); + input += RIPEMD160_BLOCK_LENGTH; + len -= RIPEMD160_BLOCK_LENGTH; + } + } + + /* Handle any remaining bytes of data. */ + if (len != 0) + memcpy (ctx->buffer + have, input, (size_t) len); +} + +/* +* Pad pad to 64-byte boundary with the bit pattern +* 1 0* (64-bit count of bits processed, MSB-first) +*/ +static void RMD160Pad(RMD160_CTX *ctx) +{ + byte count[8]; + uint32 padlen; + + /* Convert count to 8 bytes in little endian order. */ + +#ifndef TC_WINDOWS_BOOT + uint64 bitcount = ctx->count << 3; + PUT_64BIT_LE(count, bitcount); +#else + *(uint32 *) (count + 4) = 0; + *(uint32 *) (count + 0) = ctx->count << 3; +#endif + + /* Pad out to 56 mod 64. */ + padlen = RIPEMD160_BLOCK_LENGTH - + (uint32)((ctx->count) & (RIPEMD160_BLOCK_LENGTH - 1)); + if (padlen < 1 + 8) + padlen += RIPEMD160_BLOCK_LENGTH; + RMD160Update(ctx, PADDING, padlen - 8); /* padlen - 8 <= 64 */ + RMD160Update(ctx, count, 8); +} + +/* +* Final wrapup--call RMD160Pad, fill in digest and zero out ctx. +*/ +void RMD160Final(unsigned char *digest, RMD160_CTX *ctx) +{ + int i; + + RMD160Pad(ctx); + if (digest) { + for (i = 0; i < 5; i++) + PUT_32BIT_LE(digest + i * 4, ctx->state[i]); +#ifndef TC_WINDOWS_BOOT + burn (ctx, sizeof(*ctx)); +#endif + } +} + + +#ifndef TC_MINIMIZE_CODE_SIZE + +#define word32 unsigned __int32 + +#define k0 0 +#define k1 0x5a827999UL +#define k2 0x6ed9eba1UL +#define k3 0x8f1bbcdcUL +#define k4 0xa953fd4eUL +#define k5 0x50a28be6UL +#define k6 0x5c4dd124UL +#define k7 0x6d703ef3UL +#define k8 0x7a6d76e9UL +#define k9 0 + +static word32 rotlFixed (word32 x, unsigned int y) +{ + return (word32)((x<>(sizeof(word32)*8-y))); +} + +#define Subround(f, a, b, c, d, e, x, s, k) \ + a += f(b, c, d) + x + k;\ + a = rotlFixed((word32)a, s) + e;\ + c = rotlFixed((word32)c, 10U) + +void RMD160Transform (unsigned __int32 *digest, const unsigned __int32 *data) +{ +#if BYTE_ORDER == LITTLE_ENDIAN + const word32 *X = data; +#else + word32 X[16]; + int i; +#endif + + word32 a1, b1, c1, d1, e1, a2, b2, c2, d2, e2; + a1 = a2 = digest[0]; + b1 = b2 = digest[1]; + c1 = c2 = digest[2]; + d1 = d2 = digest[3]; + e1 = e2 = digest[4]; + +#if BYTE_ORDER == BIG_ENDIAN + for (i = 0; i < 16; i++) + { + X[i] = LE32 (data[i]); + } +#endif + + Subround(F, a1, b1, c1, d1, e1, X[ 0], 11, k0); + Subround(F, e1, a1, b1, c1, d1, X[ 1], 14, k0); + Subround(F, d1, e1, a1, b1, c1, X[ 2], 15, k0); + Subround(F, c1, d1, e1, a1, b1, X[ 3], 12, k0); + Subround(F, b1, c1, d1, e1, a1, X[ 4], 5, k0); + Subround(F, a1, b1, c1, d1, e1, X[ 5], 8, k0); + Subround(F, e1, a1, b1, c1, d1, X[ 6], 7, k0); + Subround(F, d1, e1, a1, b1, c1, X[ 7], 9, k0); + Subround(F, c1, d1, e1, a1, b1, X[ 8], 11, k0); + Subround(F, b1, c1, d1, e1, a1, X[ 9], 13, k0); + Subround(F, a1, b1, c1, d1, e1, X[10], 14, k0); + Subround(F, e1, a1, b1, c1, d1, X[11], 15, k0); + Subround(F, d1, e1, a1, b1, c1, X[12], 6, k0); + Subround(F, c1, d1, e1, a1, b1, X[13], 7, k0); + Subround(F, b1, c1, d1, e1, a1, X[14], 9, k0); + Subround(F, a1, b1, c1, d1, e1, X[15], 8, k0); + + Subround(G, e1, a1, b1, c1, d1, X[ 7], 7, k1); + Subround(G, d1, e1, a1, b1, c1, X[ 4], 6, k1); + Subround(G, c1, d1, e1, a1, b1, X[13], 8, k1); + Subround(G, b1, c1, d1, e1, a1, X[ 1], 13, k1); + Subround(G, a1, b1, c1, d1, e1, X[10], 11, k1); + Subround(G, e1, a1, b1, c1, d1, X[ 6], 9, k1); + Subround(G, d1, e1, a1, b1, c1, X[15], 7, k1); + Subround(G, c1, d1, e1, a1, b1, X[ 3], 15, k1); + Subround(G, b1, c1, d1, e1, a1, X[12], 7, k1); + Subround(G, a1, b1, c1, d1, e1, X[ 0], 12, k1); + Subround(G, e1, a1, b1, c1, d1, X[ 9], 15, k1); + Subround(G, d1, e1, a1, b1, c1, X[ 5], 9, k1); + Subround(G, c1, d1, e1, a1, b1, X[ 2], 11, k1); + Subround(G, b1, c1, d1, e1, a1, X[14], 7, k1); + Subround(G, a1, b1, c1, d1, e1, X[11], 13, k1); + Subround(G, e1, a1, b1, c1, d1, X[ 8], 12, k1); + + Subround(H, d1, e1, a1, b1, c1, X[ 3], 11, k2); + Subround(H, c1, d1, e1, a1, b1, X[10], 13, k2); + Subround(H, b1, c1, d1, e1, a1, X[14], 6, k2); + Subround(H, a1, b1, c1, d1, e1, X[ 4], 7, k2); + Subround(H, e1, a1, b1, c1, d1, X[ 9], 14, k2); + Subround(H, d1, e1, a1, b1, c1, X[15], 9, k2); + Subround(H, c1, d1, e1, a1, b1, X[ 8], 13, k2); + Subround(H, b1, c1, d1, e1, a1, X[ 1], 15, k2); + Subround(H, a1, b1, c1, d1, e1, X[ 2], 14, k2); + Subround(H, e1, a1, b1, c1, d1, X[ 7], 8, k2); + Subround(H, d1, e1, a1, b1, c1, X[ 0], 13, k2); + Subround(H, c1, d1, e1, a1, b1, X[ 6], 6, k2); + Subround(H, b1, c1, d1, e1, a1, X[13], 5, k2); + Subround(H, a1, b1, c1, d1, e1, X[11], 12, k2); + Subround(H, e1, a1, b1, c1, d1, X[ 5], 7, k2); + Subround(H, d1, e1, a1, b1, c1, X[12], 5, k2); + + Subround(I, c1, d1, e1, a1, b1, X[ 1], 11, k3); + Subround(I, b1, c1, d1, e1, a1, X[ 9], 12, k3); + Subround(I, a1, b1, c1, d1, e1, X[11], 14, k3); + Subround(I, e1, a1, b1, c1, d1, X[10], 15, k3); + Subround(I, d1, e1, a1, b1, c1, X[ 0], 14, k3); + Subround(I, c1, d1, e1, a1, b1, X[ 8], 15, k3); + Subround(I, b1, c1, d1, e1, a1, X[12], 9, k3); + Subround(I, a1, b1, c1, d1, e1, X[ 4], 8, k3); + Subround(I, e1, a1, b1, c1, d1, X[13], 9, k3); + Subround(I, d1, e1, a1, b1, c1, X[ 3], 14, k3); + Subround(I, c1, d1, e1, a1, b1, X[ 7], 5, k3); + Subround(I, b1, c1, d1, e1, a1, X[15], 6, k3); + Subround(I, a1, b1, c1, d1, e1, X[14], 8, k3); + Subround(I, e1, a1, b1, c1, d1, X[ 5], 6, k3); + Subround(I, d1, e1, a1, b1, c1, X[ 6], 5, k3); + Subround(I, c1, d1, e1, a1, b1, X[ 2], 12, k3); + + Subround(J, b1, c1, d1, e1, a1, X[ 4], 9, k4); + Subround(J, a1, b1, c1, d1, e1, X[ 0], 15, k4); + Subround(J, e1, a1, b1, c1, d1, X[ 5], 5, k4); + Subround(J, d1, e1, a1, b1, c1, X[ 9], 11, k4); + Subround(J, c1, d1, e1, a1, b1, X[ 7], 6, k4); + Subround(J, b1, c1, d1, e1, a1, X[12], 8, k4); + Subround(J, a1, b1, c1, d1, e1, X[ 2], 13, k4); + Subround(J, e1, a1, b1, c1, d1, X[10], 12, k4); + Subround(J, d1, e1, a1, b1, c1, X[14], 5, k4); + Subround(J, c1, d1, e1, a1, b1, X[ 1], 12, k4); + Subround(J, b1, c1, d1, e1, a1, X[ 3], 13, k4); + Subround(J, a1, b1, c1, d1, e1, X[ 8], 14, k4); + Subround(J, e1, a1, b1, c1, d1, X[11], 11, k4); + Subround(J, d1, e1, a1, b1, c1, X[ 6], 8, k4); + Subround(J, c1, d1, e1, a1, b1, X[15], 5, k4); + Subround(J, b1, c1, d1, e1, a1, X[13], 6, k4); + + Subround(J, a2, b2, c2, d2, e2, X[ 5], 8, k5); + Subround(J, e2, a2, b2, c2, d2, X[14], 9, k5); + Subround(J, d2, e2, a2, b2, c2, X[ 7], 9, k5); + Subround(J, c2, d2, e2, a2, b2, X[ 0], 11, k5); + Subround(J, b2, c2, d2, e2, a2, X[ 9], 13, k5); + Subround(J, a2, b2, c2, d2, e2, X[ 2], 15, k5); + Subround(J, e2, a2, b2, c2, d2, X[11], 15, k5); + Subround(J, d2, e2, a2, b2, c2, X[ 4], 5, k5); + Subround(J, c2, d2, e2, a2, b2, X[13], 7, k5); + Subround(J, b2, c2, d2, e2, a2, X[ 6], 7, k5); + Subround(J, a2, b2, c2, d2, e2, X[15], 8, k5); + Subround(J, e2, a2, b2, c2, d2, X[ 8], 11, k5); + Subround(J, d2, e2, a2, b2, c2, X[ 1], 14, k5); + Subround(J, c2, d2, e2, a2, b2, X[10], 14, k5); + Subround(J, b2, c2, d2, e2, a2, X[ 3], 12, k5); + Subround(J, a2, b2, c2, d2, e2, X[12], 6, k5); + + Subround(I, e2, a2, b2, c2, d2, X[ 6], 9, k6); + Subround(I, d2, e2, a2, b2, c2, X[11], 13, k6); + Subround(I, c2, d2, e2, a2, b2, X[ 3], 15, k6); + Subround(I, b2, c2, d2, e2, a2, X[ 7], 7, k6); + Subround(I, a2, b2, c2, d2, e2, X[ 0], 12, k6); + Subround(I, e2, a2, b2, c2, d2, X[13], 8, k6); + Subround(I, d2, e2, a2, b2, c2, X[ 5], 9, k6); + Subround(I, c2, d2, e2, a2, b2, X[10], 11, k6); + Subround(I, b2, c2, d2, e2, a2, X[14], 7, k6); + Subround(I, a2, b2, c2, d2, e2, X[15], 7, k6); + Subround(I, e2, a2, b2, c2, d2, X[ 8], 12, k6); + Subround(I, d2, e2, a2, b2, c2, X[12], 7, k6); + Subround(I, c2, d2, e2, a2, b2, X[ 4], 6, k6); + Subround(I, b2, c2, d2, e2, a2, X[ 9], 15, k6); + Subround(I, a2, b2, c2, d2, e2, X[ 1], 13, k6); + Subround(I, e2, a2, b2, c2, d2, X[ 2], 11, k6); + + Subround(H, d2, e2, a2, b2, c2, X[15], 9, k7); + Subround(H, c2, d2, e2, a2, b2, X[ 5], 7, k7); + Subround(H, b2, c2, d2, e2, a2, X[ 1], 15, k7); + Subround(H, a2, b2, c2, d2, e2, X[ 3], 11, k7); + Subround(H, e2, a2, b2, c2, d2, X[ 7], 8, k7); + Subround(H, d2, e2, a2, b2, c2, X[14], 6, k7); + Subround(H, c2, d2, e2, a2, b2, X[ 6], 6, k7); + Subround(H, b2, c2, d2, e2, a2, X[ 9], 14, k7); + Subround(H, a2, b2, c2, d2, e2, X[11], 12, k7); + Subround(H, e2, a2, b2, c2, d2, X[ 8], 13, k7); + Subround(H, d2, e2, a2, b2, c2, X[12], 5, k7); + Subround(H, c2, d2, e2, a2, b2, X[ 2], 14, k7); + Subround(H, b2, c2, d2, e2, a2, X[10], 13, k7); + Subround(H, a2, b2, c2, d2, e2, X[ 0], 13, k7); + Subround(H, e2, a2, b2, c2, d2, X[ 4], 7, k7); + Subround(H, d2, e2, a2, b2, c2, X[13], 5, k7); + + Subround(G, c2, d2, e2, a2, b2, X[ 8], 15, k8); + Subround(G, b2, c2, d2, e2, a2, X[ 6], 5, k8); + Subround(G, a2, b2, c2, d2, e2, X[ 4], 8, k8); + Subround(G, e2, a2, b2, c2, d2, X[ 1], 11, k8); + Subround(G, d2, e2, a2, b2, c2, X[ 3], 14, k8); + Subround(G, c2, d2, e2, a2, b2, X[11], 14, k8); + Subround(G, b2, c2, d2, e2, a2, X[15], 6, k8); + Subround(G, a2, b2, c2, d2, e2, X[ 0], 14, k8); + Subround(G, e2, a2, b2, c2, d2, X[ 5], 6, k8); + Subround(G, d2, e2, a2, b2, c2, X[12], 9, k8); + Subround(G, c2, d2, e2, a2, b2, X[ 2], 12, k8); + Subround(G, b2, c2, d2, e2, a2, X[13], 9, k8); + Subround(G, a2, b2, c2, d2, e2, X[ 9], 12, k8); + Subround(G, e2, a2, b2, c2, d2, X[ 7], 5, k8); + Subround(G, d2, e2, a2, b2, c2, X[10], 15, k8); + Subround(G, c2, d2, e2, a2, b2, X[14], 8, k8); + + Subround(F, b2, c2, d2, e2, a2, X[12], 8, k9); + Subround(F, a2, b2, c2, d2, e2, X[15], 5, k9); + Subround(F, e2, a2, b2, c2, d2, X[10], 12, k9); + Subround(F, d2, e2, a2, b2, c2, X[ 4], 9, k9); + Subround(F, c2, d2, e2, a2, b2, X[ 1], 12, k9); + Subround(F, b2, c2, d2, e2, a2, X[ 5], 5, k9); + Subround(F, a2, b2, c2, d2, e2, X[ 8], 14, k9); + Subround(F, e2, a2, b2, c2, d2, X[ 7], 6, k9); + Subround(F, d2, e2, a2, b2, c2, X[ 6], 8, k9); + Subround(F, c2, d2, e2, a2, b2, X[ 2], 13, k9); + Subround(F, b2, c2, d2, e2, a2, X[13], 6, k9); + Subround(F, a2, b2, c2, d2, e2, X[14], 5, k9); + Subround(F, e2, a2, b2, c2, d2, X[ 0], 15, k9); + Subround(F, d2, e2, a2, b2, c2, X[ 3], 13, k9); + Subround(F, c2, d2, e2, a2, b2, X[ 9], 11, k9); + Subround(F, b2, c2, d2, e2, a2, X[11], 11, k9); + + c1 = digest[1] + c1 + d2; + digest[1] = digest[2] + d1 + e2; + digest[2] = digest[3] + e1 + a2; + digest[3] = digest[4] + a1 + b2; + digest[4] = digest[0] + b1 + c2; + digest[0] = c1; +} + +#else // TC_MINIMIZE_CODE_SIZE + +/* + Derived from source code of TrueCrypt 7.1a, which is + Copyright (c) 2008-2012 TrueCrypt Developers Association and which is governed + by the TrueCrypt License 3.0. + + Modifications and additions to the original source code (contained in this file) + and all other portions of this file are Copyright (c) 2013-2016 IDRIX + and are governed by the Apache License 2.0 the full text of which is + contained in the file License.txt included in VeraCrypt binary and source + code distribution packages. +*/ + +#pragma optimize ("tl", on) + +typedef unsigned __int32 uint32; +typedef unsigned __int8 byte; + +#include +#pragma intrinsic (_lrotl) + +static const byte OrderTab[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 7, 4, 13, 1, 10, 6, 15, 3, 12, 0, 9, 5, 2, 14, 11, 8, + 3, 10, 14, 4, 9, 15, 8, 1, 2, 7, 0, 6, 13, 11, 5, 12, + 1, 9, 11, 10, 0, 8, 12, 4, 13, 3, 7, 15, 14, 5, 6, 2, + 4, 0, 5, 9, 7, 12, 2, 10, 14, 1, 3, 8, 11, 6, 15, 13, + 5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12, + 6, 11, 3, 7, 0, 13, 5, 10, 14, 15, 8, 12, 4, 9, 1, 2, + 15, 5, 1, 3, 7, 14, 6, 9, 11, 8, 12, 2, 10, 0, 4, 13, + 8, 6, 4, 1, 3, 11, 15, 0, 5, 12, 2, 13, 9, 7, 10, 14, + 12, 15, 10, 4, 1, 5, 8, 7, 6, 2, 13, 14, 0, 3, 9, 11 +}; + +static const byte RolTab[] = { + 11, 14, 15, 12, 5, 8, 7, 9, 11, 13, 14, 15, 6, 7, 9, 8, + 7, 6, 8, 13, 11, 9, 7, 15, 7, 12, 15, 9, 11, 7, 13, 12, + 11, 13, 6, 7, 14, 9, 13, 15, 14, 8, 13, 6, 5, 12, 7, 5, + 11, 12, 14, 15, 14, 15, 9, 8, 9, 14, 5, 6, 8, 6, 5, 12, + 9, 15, 5, 11, 6, 8, 13, 12, 5, 12, 13, 14, 11, 8, 5, 6, + 8, 9, 9, 11, 13, 15, 15, 5, 7, 7, 8, 11, 14, 14, 12, 6, + 9, 13, 15, 7, 12, 8, 9, 11, 7, 7, 12, 7, 6, 15, 13, 11, + 9, 7, 15, 11, 8, 6, 6, 14, 12, 13, 5, 14, 13, 13, 7, 5, + 15, 5, 8, 11, 14, 14, 6, 14, 6, 9, 12, 9, 12, 5, 15, 8, + 8, 5, 12, 9, 12, 5, 14, 6, 8, 13, 6, 5, 15, 13, 11, 11 +}; + +static const uint32 KTab[] = { + 0x00000000UL, + 0x5A827999UL, + 0x6ED9EBA1UL, + 0x8F1BBCDCUL, + 0xA953FD4EUL, + 0x50A28BE6UL, + 0x5C4DD124UL, + 0x6D703EF3UL, + 0x7A6D76E9UL, + 0x00000000UL +}; + + +void RMD160Transform (unsigned __int32 *state, const unsigned __int32 *data) +{ + uint32 a, b, c, d, e; + uint32 a2, b2, c2, d2, e2; + byte pos; + uint32 tmp; + + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + + for (pos = 0; pos < 160; ++pos) + { + tmp = a + data[OrderTab[pos]] + KTab[pos >> 4]; + + switch (pos >> 4) + { + case 0: case 9: tmp += F (b, c, d); break; + case 1: case 8: tmp += G (b, c, d); break; + case 2: case 7: tmp += H (b, c, d); break; + case 3: case 6: tmp += I (b, c, d); break; + case 4: case 5: tmp += J (b, c, d); break; + } + + tmp = _lrotl (tmp, RolTab[pos]) + e; + a = e; + e = d; + d = _lrotl (c, 10); + c = b; + b = tmp; + + if (pos == 79) + { + a2 = a; + b2 = b; + c2 = c; + d2 = d; + e2 = e; + + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + } + } + + tmp = state[1] + c2 + d; + state[1] = state[2] + d2 + e; + state[2] = state[3] + e2 + a; + state[3] = state[4] + a2 + b; + state[4] = state[0] + b2 + c; + state[0] = tmp; +} + +#endif // TC_MINIMIZE_CODE_SIZE diff --git a/src/Crypto/Rmd160.h b/src/Crypto/Rmd160.h index 4dfa38f1..81b5d6f0 100644 --- a/src/Crypto/Rmd160.h +++ b/src/Crypto/Rmd160.h @@ -1,33 +1,33 @@ -#ifndef TC_HEADER_Crypto_Ripemd160 -#define TC_HEADER_Crypto_Ripemd160 - -#include "Common/Tcdefs.h" - -#if defined(__cplusplus) -extern "C" -{ -#endif - -#define RIPEMD160_BLOCK_LENGTH 64 - -typedef struct RMD160Context -{ - unsigned __int32 state[5]; -#ifndef TC_WINDOWS_BOOT - uint64 count; -#else - uint32 count; -#endif - unsigned char buffer[RIPEMD160_BLOCK_LENGTH]; -} RMD160_CTX; - -void RMD160Init (RMD160_CTX *ctx); -void RMD160Transform (unsigned __int32 *state, const unsigned __int32 *data); -void RMD160Update (RMD160_CTX *ctx, const unsigned char *input, unsigned __int32 len); -void RMD160Final (unsigned char *digest, RMD160_CTX *ctx); - -#if defined(__cplusplus) -} -#endif - -#endif // TC_HEADER_Crypto_Ripemd160 +#ifndef TC_HEADER_Crypto_Ripemd160 +#define TC_HEADER_Crypto_Ripemd160 + +#include "Common/Tcdefs.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#define RIPEMD160_BLOCK_LENGTH 64 + +typedef struct RMD160Context +{ + unsigned __int32 state[5]; +#ifndef TC_WINDOWS_BOOT + uint64 count; +#else + uint32 count; +#endif + unsigned char buffer[RIPEMD160_BLOCK_LENGTH]; +} RMD160_CTX; + +void RMD160Init (RMD160_CTX *ctx); +void RMD160Transform (unsigned __int32 *state, const unsigned __int32 *data); +void RMD160Update (RMD160_CTX *ctx, const unsigned char *input, unsigned __int32 len); +void RMD160Final (unsigned char *digest, RMD160_CTX *ctx); + +#if defined(__cplusplus) +} +#endif + +#endif // TC_HEADER_Crypto_Ripemd160 diff --git a/src/Crypto/Serpent.c b/src/Crypto/Serpent.c index 87d710c4..a8c528de 100644 --- a/src/Crypto/Serpent.c +++ b/src/Crypto/Serpent.c @@ -1,938 +1,938 @@ -// serpent.cpp - written and placed in the public domain by Wei Dai - -/* Adapted for TrueCrypt */ -/* Adapted for VeraCrypt */ - -#ifdef TC_WINDOWS_BOOT -#pragma optimize ("t", on) -#endif - -#include "Serpent.h" -#include "Common/Endian.h" - -#include - -#if defined(_WIN32) && !defined(_DEBUG) -#include -#define rotlFixed _rotl -#define rotrFixed _rotr -#else -#define rotlFixed(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) -#define rotrFixed(x,n) (((x) >> (n)) | ((x) << (32 - (n)))) -#endif - -// linear transformation -#define LT(i,a,b,c,d,e) {\ - a = rotlFixed(a, 13); \ - c = rotlFixed(c, 3); \ - d = rotlFixed(d ^ c ^ (a << 3), 7); \ - b = rotlFixed(b ^ a ^ c, 1); \ - a = rotlFixed(a ^ b ^ d, 5); \ - c = rotlFixed(c ^ d ^ (b << 7), 22);} - -// inverse linear transformation -#define ILT(i,a,b,c,d,e) {\ - c = rotrFixed(c, 22); \ - a = rotrFixed(a, 5); \ - c ^= d ^ (b << 7); \ - a ^= b ^ d; \ - b = rotrFixed(b, 1); \ - d = rotrFixed(d, 7) ^ c ^ (a << 3); \ - b ^= a ^ c; \ - c = rotrFixed(c, 3); \ - a = rotrFixed(a, 13);} - -// order of output from S-box functions -#define beforeS0(f) f(0,a,b,c,d,e) -#define afterS0(f) f(1,b,e,c,a,d) -#define afterS1(f) f(2,c,b,a,e,d) -#define afterS2(f) f(3,a,e,b,d,c) -#define afterS3(f) f(4,e,b,d,c,a) -#define afterS4(f) f(5,b,a,e,c,d) -#define afterS5(f) f(6,a,c,b,e,d) -#define afterS6(f) f(7,a,c,d,b,e) -#define afterS7(f) f(8,d,e,b,a,c) - -// order of output from inverse S-box functions -#define beforeI7(f) f(8,a,b,c,d,e) -#define afterI7(f) f(7,d,a,b,e,c) -#define afterI6(f) f(6,a,b,c,e,d) -#define afterI5(f) f(5,b,d,e,c,a) -#define afterI4(f) f(4,b,c,e,a,d) -#define afterI3(f) f(3,a,b,e,c,d) -#define afterI2(f) f(2,b,d,e,c,a) -#define afterI1(f) f(1,a,b,c,e,d) -#define afterI0(f) f(0,a,d,b,e,c) - -// The instruction sequences for the S-box functions -// come from Dag Arne Osvik's paper "Speeding up Serpent". - -#define S0(i, r0, r1, r2, r3, r4) \ - { \ - r3 ^= r0; \ - r4 = r1; \ - r1 &= r3; \ - r4 ^= r2; \ - r1 ^= r0; \ - r0 |= r3; \ - r0 ^= r4; \ - r4 ^= r3; \ - r3 ^= r2; \ - r2 |= r1; \ - r2 ^= r4; \ - r4 = ~r4; \ - r4 |= r1; \ - r1 ^= r3; \ - r1 ^= r4; \ - r3 |= r0; \ - r1 ^= r3; \ - r4 ^= r3; \ - } - -#define I0(i, r0, r1, r2, r3, r4) \ - { \ - r2 = ~r2; \ - r4 = r1; \ - r1 |= r0; \ - r4 = ~r4; \ - r1 ^= r2; \ - r2 |= r4; \ - r1 ^= r3; \ - r0 ^= r4; \ - r2 ^= r0; \ - r0 &= r3; \ - r4 ^= r0; \ - r0 |= r1; \ - r0 ^= r2; \ - r3 ^= r4; \ - r2 ^= r1; \ - r3 ^= r0; \ - r3 ^= r1; \ - r2 &= r3; \ - r4 ^= r2; \ - } - -#define S1(i, r0, r1, r2, r3, r4) \ - { \ - r0 = ~r0; \ - r2 = ~r2; \ - r4 = r0; \ - r0 &= r1; \ - r2 ^= r0; \ - r0 |= r3; \ - r3 ^= r2; \ - r1 ^= r0; \ - r0 ^= r4; \ - r4 |= r1; \ - r1 ^= r3; \ - r2 |= r0; \ - r2 &= r4; \ - r0 ^= r1; \ - r1 &= r2; \ - r1 ^= r0; \ - r0 &= r2; \ - r0 ^= r4; \ - } - -#define I1(i, r0, r1, r2, r3, r4) \ - { \ - r4 = r1; \ - r1 ^= r3; \ - r3 &= r1; \ - r4 ^= r2; \ - r3 ^= r0; \ - r0 |= r1; \ - r2 ^= r3; \ - r0 ^= r4; \ - r0 |= r2; \ - r1 ^= r3; \ - r0 ^= r1; \ - r1 |= r3; \ - r1 ^= r0; \ - r4 = ~r4; \ - r4 ^= r1; \ - r1 |= r0; \ - r1 ^= r0; \ - r1 |= r4; \ - r3 ^= r1; \ - } - -#define S2(i, r0, r1, r2, r3, r4) \ - { \ - r4 = r0; \ - r0 &= r2; \ - r0 ^= r3; \ - r2 ^= r1; \ - r2 ^= r0; \ - r3 |= r4; \ - r3 ^= r1; \ - r4 ^= r2; \ - r1 = r3; \ - r3 |= r4; \ - r3 ^= r0; \ - r0 &= r1; \ - r4 ^= r0; \ - r1 ^= r3; \ - r1 ^= r4; \ - r4 = ~r4; \ - } - -#define I2(i, r0, r1, r2, r3, r4) \ - { \ - r2 ^= r3; \ - r3 ^= r0; \ - r4 = r3; \ - r3 &= r2; \ - r3 ^= r1; \ - r1 |= r2; \ - r1 ^= r4; \ - r4 &= r3; \ - r2 ^= r3; \ - r4 &= r0; \ - r4 ^= r2; \ - r2 &= r1; \ - r2 |= r0; \ - r3 = ~r3; \ - r2 ^= r3; \ - r0 ^= r3; \ - r0 &= r1; \ - r3 ^= r4; \ - r3 ^= r0; \ - } - -#define S3(i, r0, r1, r2, r3, r4) \ - { \ - r4 = r0; \ - r0 |= r3; \ - r3 ^= r1; \ - r1 &= r4; \ - r4 ^= r2; \ - r2 ^= r3; \ - r3 &= r0; \ - r4 |= r1; \ - r3 ^= r4; \ - r0 ^= r1; \ - r4 &= r0; \ - r1 ^= r3; \ - r4 ^= r2; \ - r1 |= r0; \ - r1 ^= r2; \ - r0 ^= r3; \ - r2 = r1; \ - r1 |= r3; \ - r1 ^= r0; \ - } - -#define I3(i, r0, r1, r2, r3, r4) \ - { \ - r4 = r2; \ - r2 ^= r1; \ - r1 &= r2; \ - r1 ^= r0; \ - r0 &= r4; \ - r4 ^= r3; \ - r3 |= r1; \ - r3 ^= r2; \ - r0 ^= r4; \ - r2 ^= r0; \ - r0 |= r3; \ - r0 ^= r1; \ - r4 ^= r2; \ - r2 &= r3; \ - r1 |= r3; \ - r1 ^= r2; \ - r4 ^= r0; \ - r2 ^= r4; \ - } - -#define S4(i, r0, r1, r2, r3, r4) \ - { \ - r1 ^= r3; \ - r3 = ~r3; \ - r2 ^= r3; \ - r3 ^= r0; \ - r4 = r1; \ - r1 &= r3; \ - r1 ^= r2; \ - r4 ^= r3; \ - r0 ^= r4; \ - r2 &= r4; \ - r2 ^= r0; \ - r0 &= r1; \ - r3 ^= r0; \ - r4 |= r1; \ - r4 ^= r0; \ - r0 |= r3; \ - r0 ^= r2; \ - r2 &= r3; \ - r0 = ~r0; \ - r4 ^= r2; \ - } - -#define I4(i, r0, r1, r2, r3, r4) \ - { \ - r4 = r2; \ - r2 &= r3; \ - r2 ^= r1; \ - r1 |= r3; \ - r1 &= r0; \ - r4 ^= r2; \ - r4 ^= r1; \ - r1 &= r2; \ - r0 = ~r0; \ - r3 ^= r4; \ - r1 ^= r3; \ - r3 &= r0; \ - r3 ^= r2; \ - r0 ^= r1; \ - r2 &= r0; \ - r3 ^= r0; \ - r2 ^= r4; \ - r2 |= r3; \ - r3 ^= r0; \ - r2 ^= r1; \ - } - -#define S5(i, r0, r1, r2, r3, r4) \ - { \ - r0 ^= r1; \ - r1 ^= r3; \ - r3 = ~r3; \ - r4 = r1; \ - r1 &= r0; \ - r2 ^= r3; \ - r1 ^= r2; \ - r2 |= r4; \ - r4 ^= r3; \ - r3 &= r1; \ - r3 ^= r0; \ - r4 ^= r1; \ - r4 ^= r2; \ - r2 ^= r0; \ - r0 &= r3; \ - r2 = ~r2; \ - r0 ^= r4; \ - r4 |= r3; \ - r2 ^= r4; \ - } - -#define I5(i, r0, r1, r2, r3, r4) \ - { \ - r1 = ~r1; \ - r4 = r3; \ - r2 ^= r1; \ - r3 |= r0; \ - r3 ^= r2; \ - r2 |= r1; \ - r2 &= r0; \ - r4 ^= r3; \ - r2 ^= r4; \ - r4 |= r0; \ - r4 ^= r1; \ - r1 &= r2; \ - r1 ^= r3; \ - r4 ^= r2; \ - r3 &= r4; \ - r4 ^= r1; \ - r3 ^= r0; \ - r3 ^= r4; \ - r4 = ~r4; \ - } - -#define S6(i, r0, r1, r2, r3, r4) \ - { \ - r2 = ~r2; \ - r4 = r3; \ - r3 &= r0; \ - r0 ^= r4; \ - r3 ^= r2; \ - r2 |= r4; \ - r1 ^= r3; \ - r2 ^= r0; \ - r0 |= r1; \ - r2 ^= r1; \ - r4 ^= r0; \ - r0 |= r3; \ - r0 ^= r2; \ - r4 ^= r3; \ - r4 ^= r0; \ - r3 = ~r3; \ - r2 &= r4; \ - r2 ^= r3; \ - } - -#define I6(i, r0, r1, r2, r3, r4) \ - { \ - r0 ^= r2; \ - r4 = r2; \ - r2 &= r0; \ - r4 ^= r3; \ - r2 = ~r2; \ - r3 ^= r1; \ - r2 ^= r3; \ - r4 |= r0; \ - r0 ^= r2; \ - r3 ^= r4; \ - r4 ^= r1; \ - r1 &= r3; \ - r1 ^= r0; \ - r0 ^= r3; \ - r0 |= r2; \ - r3 ^= r1; \ - r4 ^= r0; \ - } - -#define S7(i, r0, r1, r2, r3, r4) \ - { \ - r4 = r2; \ - r2 &= r1; \ - r2 ^= r3; \ - r3 &= r1; \ - r4 ^= r2; \ - r2 ^= r1; \ - r1 ^= r0; \ - r0 |= r4; \ - r0 ^= r2; \ - r3 ^= r1; \ - r2 ^= r3; \ - r3 &= r0; \ - r3 ^= r4; \ - r4 ^= r2; \ - r2 &= r0; \ - r4 = ~r4; \ - r2 ^= r4; \ - r4 &= r0; \ - r1 ^= r3; \ - r4 ^= r1; \ - } - -#define I7(i, r0, r1, r2, r3, r4) \ - { \ - r4 = r2; \ - r2 ^= r0; \ - r0 &= r3; \ - r2 = ~r2; \ - r4 |= r3; \ - r3 ^= r1; \ - r1 |= r0; \ - r0 ^= r2; \ - r2 &= r4; \ - r1 ^= r2; \ - r2 ^= r0; \ - r0 |= r2; \ - r3 &= r4; \ - r0 ^= r3; \ - r4 ^= r1; \ - r3 ^= r4; \ - r4 |= r0; \ - r3 ^= r2; \ - r4 ^= r2; \ - } - -// key xor -#define KX(r, a, b, c, d, e) {\ - a ^= k[4 * r + 0]; \ - b ^= k[4 * r + 1]; \ - c ^= k[4 * r + 2]; \ - d ^= k[4 * r + 3];} - - -#ifdef TC_MINIMIZE_CODE_SIZE - -static void S0f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) -{ - *r3 ^= *r0; - *r4 = *r1; - *r1 &= *r3; - *r4 ^= *r2; - *r1 ^= *r0; - *r0 |= *r3; - *r0 ^= *r4; - *r4 ^= *r3; - *r3 ^= *r2; - *r2 |= *r1; - *r2 ^= *r4; - *r4 = ~*r4; - *r4 |= *r1; - *r1 ^= *r3; - *r1 ^= *r4; - *r3 |= *r0; - *r1 ^= *r3; - *r4 ^= *r3; -} - -static void S1f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) -{ - *r0 = ~*r0; - *r2 = ~*r2; - *r4 = *r0; - *r0 &= *r1; - *r2 ^= *r0; - *r0 |= *r3; - *r3 ^= *r2; - *r1 ^= *r0; - *r0 ^= *r4; - *r4 |= *r1; - *r1 ^= *r3; - *r2 |= *r0; - *r2 &= *r4; - *r0 ^= *r1; - *r1 &= *r2; - *r1 ^= *r0; - *r0 &= *r2; - *r0 ^= *r4; -} - -static void S2f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) -{ - *r4 = *r0; - *r0 &= *r2; - *r0 ^= *r3; - *r2 ^= *r1; - *r2 ^= *r0; - *r3 |= *r4; - *r3 ^= *r1; - *r4 ^= *r2; - *r1 = *r3; - *r3 |= *r4; - *r3 ^= *r0; - *r0 &= *r1; - *r4 ^= *r0; - *r1 ^= *r3; - *r1 ^= *r4; - *r4 = ~*r4; -} - -static void S3f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) -{ - *r4 = *r0; - *r0 |= *r3; - *r3 ^= *r1; - *r1 &= *r4; - *r4 ^= *r2; - *r2 ^= *r3; - *r3 &= *r0; - *r4 |= *r1; - *r3 ^= *r4; - *r0 ^= *r1; - *r4 &= *r0; - *r1 ^= *r3; - *r4 ^= *r2; - *r1 |= *r0; - *r1 ^= *r2; - *r0 ^= *r3; - *r2 = *r1; - *r1 |= *r3; - *r1 ^= *r0; -} - -static void S4f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) -{ - *r1 ^= *r3; - *r3 = ~*r3; - *r2 ^= *r3; - *r3 ^= *r0; - *r4 = *r1; - *r1 &= *r3; - *r1 ^= *r2; - *r4 ^= *r3; - *r0 ^= *r4; - *r2 &= *r4; - *r2 ^= *r0; - *r0 &= *r1; - *r3 ^= *r0; - *r4 |= *r1; - *r4 ^= *r0; - *r0 |= *r3; - *r0 ^= *r2; - *r2 &= *r3; - *r0 = ~*r0; - *r4 ^= *r2; -} - -static void S5f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) -{ - *r0 ^= *r1; - *r1 ^= *r3; - *r3 = ~*r3; - *r4 = *r1; - *r1 &= *r0; - *r2 ^= *r3; - *r1 ^= *r2; - *r2 |= *r4; - *r4 ^= *r3; - *r3 &= *r1; - *r3 ^= *r0; - *r4 ^= *r1; - *r4 ^= *r2; - *r2 ^= *r0; - *r0 &= *r3; - *r2 = ~*r2; - *r0 ^= *r4; - *r4 |= *r3; - *r2 ^= *r4; -} - -static void S6f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) -{ - *r2 = ~*r2; - *r4 = *r3; - *r3 &= *r0; - *r0 ^= *r4; - *r3 ^= *r2; - *r2 |= *r4; - *r1 ^= *r3; - *r2 ^= *r0; - *r0 |= *r1; - *r2 ^= *r1; - *r4 ^= *r0; - *r0 |= *r3; - *r0 ^= *r2; - *r4 ^= *r3; - *r4 ^= *r0; - *r3 = ~*r3; - *r2 &= *r4; - *r2 ^= *r3; -} - -static void S7f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) -{ - *r4 = *r2; - *r2 &= *r1; - *r2 ^= *r3; - *r3 &= *r1; - *r4 ^= *r2; - *r2 ^= *r1; - *r1 ^= *r0; - *r0 |= *r4; - *r0 ^= *r2; - *r3 ^= *r1; - *r2 ^= *r3; - *r3 &= *r0; - *r3 ^= *r4; - *r4 ^= *r2; - *r2 &= *r0; - *r4 = ~*r4; - *r2 ^= *r4; - *r4 &= *r0; - *r1 ^= *r3; - *r4 ^= *r1; -} - -static void KXf (const unsigned __int32 *k, unsigned int r, unsigned __int32 *a, unsigned __int32 *b, unsigned __int32 *c, unsigned __int32 *d) -{ - *a ^= k[r]; - *b ^= k[r + 1]; - *c ^= k[r + 2]; - *d ^= k[r + 3]; -} - -#endif // TC_MINIMIZE_CODE_SIZE - -#ifndef TC_MINIMIZE_CODE_SIZE - -void serpent_set_key(const unsigned __int8 userKey[],unsigned __int8 *ks) -{ - unsigned __int32 a,b,c,d,e; - unsigned __int32 *k = (unsigned __int32 *)ks; - unsigned __int32 t; - int i; - - for (i = 0; i < 8; i++) - k[i] = LE32(((unsigned __int32*)userKey)[i]); - - k += 8; - t = k[-1]; - for (i = 0; i < 132; ++i) - k[i] = t = rotlFixed(k[i-8] ^ k[i-5] ^ k[i-3] ^ t ^ 0x9e3779b9 ^ i, 11); - k -= 20; - -#define LK(r, a, b, c, d, e) {\ - a = k[(8-r)*4 + 0]; \ - b = k[(8-r)*4 + 1]; \ - c = k[(8-r)*4 + 2]; \ - d = k[(8-r)*4 + 3];} - -#define SK(r, a, b, c, d, e) {\ - k[(8-r)*4 + 4] = a; \ - k[(8-r)*4 + 5] = b; \ - k[(8-r)*4 + 6] = c; \ - k[(8-r)*4 + 7] = d;} \ - - for (i=0; i<4; i++) - { - afterS2(LK); afterS2(S3); afterS3(SK); - afterS1(LK); afterS1(S2); afterS2(SK); - afterS0(LK); afterS0(S1); afterS1(SK); - beforeS0(LK); beforeS0(S0); afterS0(SK); - k += 8*4; - afterS6(LK); afterS6(S7); afterS7(SK); - afterS5(LK); afterS5(S6); afterS6(SK); - afterS4(LK); afterS4(S5); afterS5(SK); - afterS3(LK); afterS3(S4); afterS4(SK); - } - afterS2(LK); afterS2(S3); afterS3(SK); -} - -#else // TC_MINIMIZE_CODE_SIZE - -static void LKf (unsigned __int32 *k, unsigned int r, unsigned __int32 *a, unsigned __int32 *b, unsigned __int32 *c, unsigned __int32 *d) -{ - *a = k[r]; - *b = k[r + 1]; - *c = k[r + 2]; - *d = k[r + 3]; -} - -static void SKf (unsigned __int32 *k, unsigned int r, unsigned __int32 *a, unsigned __int32 *b, unsigned __int32 *c, unsigned __int32 *d) -{ - k[r + 4] = *a; - k[r + 5] = *b; - k[r + 6] = *c; - k[r + 7] = *d; -} - -void serpent_set_key(const unsigned __int8 userKey[], unsigned __int8 *ks) -{ - unsigned __int32 a,b,c,d,e; - unsigned __int32 *k = (unsigned __int32 *)ks; - unsigned __int32 t; - int i; - - for (i = 0; i < 8; i++) - k[i] = LE32(((unsigned __int32*)userKey)[i]); - - k += 8; - t = k[-1]; - for (i = 0; i < 132; ++i) - k[i] = t = rotlFixed(k[i-8] ^ k[i-5] ^ k[i-3] ^ t ^ 0x9e3779b9 ^ i, 11); - k -= 20; - - for (i=0; i<4; i++) - { - LKf (k, 20, &a, &e, &b, &d); S3f (&a, &e, &b, &d, &c); SKf (k, 16, &e, &b, &d, &c); - LKf (k, 24, &c, &b, &a, &e); S2f (&c, &b, &a, &e, &d); SKf (k, 20, &a, &e, &b, &d); - LKf (k, 28, &b, &e, &c, &a); S1f (&b, &e, &c, &a, &d); SKf (k, 24, &c, &b, &a, &e); - LKf (k, 32, &a, &b, &c, &d); S0f (&a, &b, &c, &d, &e); SKf (k, 28, &b, &e, &c, &a); - k += 8*4; - LKf (k, 4, &a, &c, &d, &b); S7f (&a, &c, &d, &b, &e); SKf (k, 0, &d, &e, &b, &a); - LKf (k, 8, &a, &c, &b, &e); S6f (&a, &c, &b, &e, &d); SKf (k, 4, &a, &c, &d, &b); - LKf (k, 12, &b, &a, &e, &c); S5f (&b, &a, &e, &c, &d); SKf (k, 8, &a, &c, &b, &e); - LKf (k, 16, &e, &b, &d, &c); S4f (&e, &b, &d, &c, &a); SKf (k, 12, &b, &a, &e, &c); - } - LKf (k, 20, &a, &e, &b, &d); S3f (&a, &e, &b, &d, &c); SKf (k, 16, &e, &b, &d, &c); -} - -#endif // TC_MINIMIZE_CODE_SIZE - - -#ifndef TC_MINIMIZE_CODE_SIZE - -void serpent_encrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks) -{ - unsigned __int32 a, b, c, d, e; - unsigned int i=1; - const unsigned __int32 *k = (unsigned __int32 *)ks + 8; - unsigned __int32 *in = (unsigned __int32 *) inBlock; - unsigned __int32 *out = (unsigned __int32 *) outBlock; - - a = LE32(in[0]); - b = LE32(in[1]); - c = LE32(in[2]); - d = LE32(in[3]); - - do - { - beforeS0(KX); beforeS0(S0); afterS0(LT); - afterS0(KX); afterS0(S1); afterS1(LT); - afterS1(KX); afterS1(S2); afterS2(LT); - afterS2(KX); afterS2(S3); afterS3(LT); - afterS3(KX); afterS3(S4); afterS4(LT); - afterS4(KX); afterS4(S5); afterS5(LT); - afterS5(KX); afterS5(S6); afterS6(LT); - afterS6(KX); afterS6(S7); - - if (i == 4) - break; - - ++i; - c = b; - b = e; - e = d; - d = a; - a = e; - k += 32; - beforeS0(LT); - } - while (1); - - afterS7(KX); - - out[0] = LE32(d); - out[1] = LE32(e); - out[2] = LE32(b); - out[3] = LE32(a); -} - -#else // TC_MINIMIZE_CODE_SIZE - -typedef unsigned __int32 uint32; - -static void LTf (uint32 *a, uint32 *b, uint32 *c, uint32 *d) -{ - *a = rotlFixed(*a, 13); - *c = rotlFixed(*c, 3); - *d = rotlFixed(*d ^ *c ^ (*a << 3), 7); - *b = rotlFixed(*b ^ *a ^ *c, 1); - *a = rotlFixed(*a ^ *b ^ *d, 5); - *c = rotlFixed(*c ^ *d ^ (*b << 7), 22); -} - -void serpent_encrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks) -{ - unsigned __int32 a, b, c, d, e; - unsigned int i=1; - const unsigned __int32 *k = (unsigned __int32 *)ks + 8; - unsigned __int32 *in = (unsigned __int32 *) inBlock; - unsigned __int32 *out = (unsigned __int32 *) outBlock; - - a = LE32(in[0]); - b = LE32(in[1]); - c = LE32(in[2]); - d = LE32(in[3]); - - do - { - KXf (k, 0, &a, &b, &c, &d); S0f (&a, &b, &c, &d, &e); LTf (&b, &e, &c, &a); - KXf (k, 4, &b, &e, &c, &a); S1f (&b, &e, &c, &a, &d); LTf (&c, &b, &a, &e); - KXf (k, 8, &c, &b, &a, &e); S2f (&c, &b, &a, &e, &d); LTf (&a, &e, &b, &d); - KXf (k, 12, &a, &e, &b, &d); S3f (&a, &e, &b, &d, &c); LTf (&e, &b, &d, &c); - KXf (k, 16, &e, &b, &d, &c); S4f (&e, &b, &d, &c, &a); LTf (&b, &a, &e, &c); - KXf (k, 20, &b, &a, &e, &c); S5f (&b, &a, &e, &c, &d); LTf (&a, &c, &b, &e); - KXf (k, 24, &a, &c, &b, &e); S6f (&a, &c, &b, &e, &d); LTf (&a, &c, &d, &b); - KXf (k, 28, &a, &c, &d, &b); S7f (&a, &c, &d, &b, &e); - - if (i == 4) - break; - - ++i; - c = b; - b = e; - e = d; - d = a; - a = e; - k += 32; - LTf (&a,&b,&c,&d); - } - while (1); - - KXf (k, 32, &d, &e, &b, &a); - - out[0] = LE32(d); - out[1] = LE32(e); - out[2] = LE32(b); - out[3] = LE32(a); -} - -#endif // TC_MINIMIZE_CODE_SIZE - -#if !defined (TC_MINIMIZE_CODE_SIZE) - -void serpent_decrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks) -{ - unsigned __int32 a, b, c, d, e; - const unsigned __int32 *k = (unsigned __int32 *)ks + 104; - unsigned int i=4; - unsigned __int32 *in = (unsigned __int32 *) inBlock; - unsigned __int32 *out = (unsigned __int32 *) outBlock; - - a = LE32(in[0]); - b = LE32(in[1]); - c = LE32(in[2]); - d = LE32(in[3]); - - beforeI7(KX); - goto start; - - do - { - c = b; - b = d; - d = e; - k -= 32; - beforeI7(ILT); -start: - beforeI7(I7); afterI7(KX); - afterI7(ILT); afterI7(I6); afterI6(KX); - afterI6(ILT); afterI6(I5); afterI5(KX); - afterI5(ILT); afterI5(I4); afterI4(KX); - afterI4(ILT); afterI4(I3); afterI3(KX); - afterI3(ILT); afterI3(I2); afterI2(KX); - afterI2(ILT); afterI2(I1); afterI1(KX); - afterI1(ILT); afterI1(I0); afterI0(KX); - } - while (--i != 0); - - out[0] = LE32(a); - out[1] = LE32(d); - out[2] = LE32(b); - out[3] = LE32(e); -} - -#else // TC_MINIMIZE_CODE_SIZE - -static void ILTf (uint32 *a, uint32 *b, uint32 *c, uint32 *d) -{ - *c = rotrFixed(*c, 22); - *a = rotrFixed(*a, 5); - *c ^= *d ^ (*b << 7); - *a ^= *b ^ *d; - *b = rotrFixed(*b, 1); - *d = rotrFixed(*d, 7) ^ *c ^ (*a << 3); - *b ^= *a ^ *c; - *c = rotrFixed(*c, 3); - *a = rotrFixed(*a, 13); -} - -void serpent_decrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks) -{ - unsigned __int32 a, b, c, d, e; - const unsigned __int32 *k = (unsigned __int32 *)ks + 104; - unsigned int i=4; - unsigned __int32 *in = (unsigned __int32 *) inBlock; - unsigned __int32 *out = (unsigned __int32 *) outBlock; - - a = LE32(in[0]); - b = LE32(in[1]); - c = LE32(in[2]); - d = LE32(in[3]); - - KXf (k, 32, &a, &b, &c, &d); - goto start; - - do - { - c = b; - b = d; - d = e; - k -= 32; - beforeI7(ILT); -start: - beforeI7(I7); KXf (k, 28, &d, &a, &b, &e); - ILTf (&d, &a, &b, &e); afterI7(I6); KXf (k, 24, &a, &b, &c, &e); - ILTf (&a, &b, &c, &e); afterI6(I5); KXf (k, 20, &b, &d, &e, &c); - ILTf (&b, &d, &e, &c); afterI5(I4); KXf (k, 16, &b, &c, &e, &a); - ILTf (&b, &c, &e, &a); afterI4(I3); KXf (k, 12, &a, &b, &e, &c); - ILTf (&a, &b, &e, &c); afterI3(I2); KXf (k, 8, &b, &d, &e, &c); - ILTf (&b, &d, &e, &c); afterI2(I1); KXf (k, 4, &a, &b, &c, &e); - ILTf (&a, &b, &c, &e); afterI1(I0); KXf (k, 0, &a, &d, &b, &e); - } - while (--i != 0); - - out[0] = LE32(a); - out[1] = LE32(d); - out[2] = LE32(b); - out[3] = LE32(e); -} - -#endif // TC_MINIMIZE_CODE_SIZE +// serpent.cpp - written and placed in the public domain by Wei Dai + +/* Adapted for TrueCrypt */ +/* Adapted for VeraCrypt */ + +#ifdef TC_WINDOWS_BOOT +#pragma optimize ("t", on) +#endif + +#include "Serpent.h" +#include "Common/Endian.h" + +#include + +#if defined(_WIN32) && !defined(_DEBUG) +#include +#define rotlFixed _rotl +#define rotrFixed _rotr +#else +#define rotlFixed(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) +#define rotrFixed(x,n) (((x) >> (n)) | ((x) << (32 - (n)))) +#endif + +// linear transformation +#define LT(i,a,b,c,d,e) {\ + a = rotlFixed(a, 13); \ + c = rotlFixed(c, 3); \ + d = rotlFixed(d ^ c ^ (a << 3), 7); \ + b = rotlFixed(b ^ a ^ c, 1); \ + a = rotlFixed(a ^ b ^ d, 5); \ + c = rotlFixed(c ^ d ^ (b << 7), 22);} + +// inverse linear transformation +#define ILT(i,a,b,c,d,e) {\ + c = rotrFixed(c, 22); \ + a = rotrFixed(a, 5); \ + c ^= d ^ (b << 7); \ + a ^= b ^ d; \ + b = rotrFixed(b, 1); \ + d = rotrFixed(d, 7) ^ c ^ (a << 3); \ + b ^= a ^ c; \ + c = rotrFixed(c, 3); \ + a = rotrFixed(a, 13);} + +// order of output from S-box functions +#define beforeS0(f) f(0,a,b,c,d,e) +#define afterS0(f) f(1,b,e,c,a,d) +#define afterS1(f) f(2,c,b,a,e,d) +#define afterS2(f) f(3,a,e,b,d,c) +#define afterS3(f) f(4,e,b,d,c,a) +#define afterS4(f) f(5,b,a,e,c,d) +#define afterS5(f) f(6,a,c,b,e,d) +#define afterS6(f) f(7,a,c,d,b,e) +#define afterS7(f) f(8,d,e,b,a,c) + +// order of output from inverse S-box functions +#define beforeI7(f) f(8,a,b,c,d,e) +#define afterI7(f) f(7,d,a,b,e,c) +#define afterI6(f) f(6,a,b,c,e,d) +#define afterI5(f) f(5,b,d,e,c,a) +#define afterI4(f) f(4,b,c,e,a,d) +#define afterI3(f) f(3,a,b,e,c,d) +#define afterI2(f) f(2,b,d,e,c,a) +#define afterI1(f) f(1,a,b,c,e,d) +#define afterI0(f) f(0,a,d,b,e,c) + +// The instruction sequences for the S-box functions +// come from Dag Arne Osvik's paper "Speeding up Serpent". + +#define S0(i, r0, r1, r2, r3, r4) \ + { \ + r3 ^= r0; \ + r4 = r1; \ + r1 &= r3; \ + r4 ^= r2; \ + r1 ^= r0; \ + r0 |= r3; \ + r0 ^= r4; \ + r4 ^= r3; \ + r3 ^= r2; \ + r2 |= r1; \ + r2 ^= r4; \ + r4 = ~r4; \ + r4 |= r1; \ + r1 ^= r3; \ + r1 ^= r4; \ + r3 |= r0; \ + r1 ^= r3; \ + r4 ^= r3; \ + } + +#define I0(i, r0, r1, r2, r3, r4) \ + { \ + r2 = ~r2; \ + r4 = r1; \ + r1 |= r0; \ + r4 = ~r4; \ + r1 ^= r2; \ + r2 |= r4; \ + r1 ^= r3; \ + r0 ^= r4; \ + r2 ^= r0; \ + r0 &= r3; \ + r4 ^= r0; \ + r0 |= r1; \ + r0 ^= r2; \ + r3 ^= r4; \ + r2 ^= r1; \ + r3 ^= r0; \ + r3 ^= r1; \ + r2 &= r3; \ + r4 ^= r2; \ + } + +#define S1(i, r0, r1, r2, r3, r4) \ + { \ + r0 = ~r0; \ + r2 = ~r2; \ + r4 = r0; \ + r0 &= r1; \ + r2 ^= r0; \ + r0 |= r3; \ + r3 ^= r2; \ + r1 ^= r0; \ + r0 ^= r4; \ + r4 |= r1; \ + r1 ^= r3; \ + r2 |= r0; \ + r2 &= r4; \ + r0 ^= r1; \ + r1 &= r2; \ + r1 ^= r0; \ + r0 &= r2; \ + r0 ^= r4; \ + } + +#define I1(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r1; \ + r1 ^= r3; \ + r3 &= r1; \ + r4 ^= r2; \ + r3 ^= r0; \ + r0 |= r1; \ + r2 ^= r3; \ + r0 ^= r4; \ + r0 |= r2; \ + r1 ^= r3; \ + r0 ^= r1; \ + r1 |= r3; \ + r1 ^= r0; \ + r4 = ~r4; \ + r4 ^= r1; \ + r1 |= r0; \ + r1 ^= r0; \ + r1 |= r4; \ + r3 ^= r1; \ + } + +#define S2(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r0; \ + r0 &= r2; \ + r0 ^= r3; \ + r2 ^= r1; \ + r2 ^= r0; \ + r3 |= r4; \ + r3 ^= r1; \ + r4 ^= r2; \ + r1 = r3; \ + r3 |= r4; \ + r3 ^= r0; \ + r0 &= r1; \ + r4 ^= r0; \ + r1 ^= r3; \ + r1 ^= r4; \ + r4 = ~r4; \ + } + +#define I2(i, r0, r1, r2, r3, r4) \ + { \ + r2 ^= r3; \ + r3 ^= r0; \ + r4 = r3; \ + r3 &= r2; \ + r3 ^= r1; \ + r1 |= r2; \ + r1 ^= r4; \ + r4 &= r3; \ + r2 ^= r3; \ + r4 &= r0; \ + r4 ^= r2; \ + r2 &= r1; \ + r2 |= r0; \ + r3 = ~r3; \ + r2 ^= r3; \ + r0 ^= r3; \ + r0 &= r1; \ + r3 ^= r4; \ + r3 ^= r0; \ + } + +#define S3(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r0; \ + r0 |= r3; \ + r3 ^= r1; \ + r1 &= r4; \ + r4 ^= r2; \ + r2 ^= r3; \ + r3 &= r0; \ + r4 |= r1; \ + r3 ^= r4; \ + r0 ^= r1; \ + r4 &= r0; \ + r1 ^= r3; \ + r4 ^= r2; \ + r1 |= r0; \ + r1 ^= r2; \ + r0 ^= r3; \ + r2 = r1; \ + r1 |= r3; \ + r1 ^= r0; \ + } + +#define I3(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r2; \ + r2 ^= r1; \ + r1 &= r2; \ + r1 ^= r0; \ + r0 &= r4; \ + r4 ^= r3; \ + r3 |= r1; \ + r3 ^= r2; \ + r0 ^= r4; \ + r2 ^= r0; \ + r0 |= r3; \ + r0 ^= r1; \ + r4 ^= r2; \ + r2 &= r3; \ + r1 |= r3; \ + r1 ^= r2; \ + r4 ^= r0; \ + r2 ^= r4; \ + } + +#define S4(i, r0, r1, r2, r3, r4) \ + { \ + r1 ^= r3; \ + r3 = ~r3; \ + r2 ^= r3; \ + r3 ^= r0; \ + r4 = r1; \ + r1 &= r3; \ + r1 ^= r2; \ + r4 ^= r3; \ + r0 ^= r4; \ + r2 &= r4; \ + r2 ^= r0; \ + r0 &= r1; \ + r3 ^= r0; \ + r4 |= r1; \ + r4 ^= r0; \ + r0 |= r3; \ + r0 ^= r2; \ + r2 &= r3; \ + r0 = ~r0; \ + r4 ^= r2; \ + } + +#define I4(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r2; \ + r2 &= r3; \ + r2 ^= r1; \ + r1 |= r3; \ + r1 &= r0; \ + r4 ^= r2; \ + r4 ^= r1; \ + r1 &= r2; \ + r0 = ~r0; \ + r3 ^= r4; \ + r1 ^= r3; \ + r3 &= r0; \ + r3 ^= r2; \ + r0 ^= r1; \ + r2 &= r0; \ + r3 ^= r0; \ + r2 ^= r4; \ + r2 |= r3; \ + r3 ^= r0; \ + r2 ^= r1; \ + } + +#define S5(i, r0, r1, r2, r3, r4) \ + { \ + r0 ^= r1; \ + r1 ^= r3; \ + r3 = ~r3; \ + r4 = r1; \ + r1 &= r0; \ + r2 ^= r3; \ + r1 ^= r2; \ + r2 |= r4; \ + r4 ^= r3; \ + r3 &= r1; \ + r3 ^= r0; \ + r4 ^= r1; \ + r4 ^= r2; \ + r2 ^= r0; \ + r0 &= r3; \ + r2 = ~r2; \ + r0 ^= r4; \ + r4 |= r3; \ + r2 ^= r4; \ + } + +#define I5(i, r0, r1, r2, r3, r4) \ + { \ + r1 = ~r1; \ + r4 = r3; \ + r2 ^= r1; \ + r3 |= r0; \ + r3 ^= r2; \ + r2 |= r1; \ + r2 &= r0; \ + r4 ^= r3; \ + r2 ^= r4; \ + r4 |= r0; \ + r4 ^= r1; \ + r1 &= r2; \ + r1 ^= r3; \ + r4 ^= r2; \ + r3 &= r4; \ + r4 ^= r1; \ + r3 ^= r0; \ + r3 ^= r4; \ + r4 = ~r4; \ + } + +#define S6(i, r0, r1, r2, r3, r4) \ + { \ + r2 = ~r2; \ + r4 = r3; \ + r3 &= r0; \ + r0 ^= r4; \ + r3 ^= r2; \ + r2 |= r4; \ + r1 ^= r3; \ + r2 ^= r0; \ + r0 |= r1; \ + r2 ^= r1; \ + r4 ^= r0; \ + r0 |= r3; \ + r0 ^= r2; \ + r4 ^= r3; \ + r4 ^= r0; \ + r3 = ~r3; \ + r2 &= r4; \ + r2 ^= r3; \ + } + +#define I6(i, r0, r1, r2, r3, r4) \ + { \ + r0 ^= r2; \ + r4 = r2; \ + r2 &= r0; \ + r4 ^= r3; \ + r2 = ~r2; \ + r3 ^= r1; \ + r2 ^= r3; \ + r4 |= r0; \ + r0 ^= r2; \ + r3 ^= r4; \ + r4 ^= r1; \ + r1 &= r3; \ + r1 ^= r0; \ + r0 ^= r3; \ + r0 |= r2; \ + r3 ^= r1; \ + r4 ^= r0; \ + } + +#define S7(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r2; \ + r2 &= r1; \ + r2 ^= r3; \ + r3 &= r1; \ + r4 ^= r2; \ + r2 ^= r1; \ + r1 ^= r0; \ + r0 |= r4; \ + r0 ^= r2; \ + r3 ^= r1; \ + r2 ^= r3; \ + r3 &= r0; \ + r3 ^= r4; \ + r4 ^= r2; \ + r2 &= r0; \ + r4 = ~r4; \ + r2 ^= r4; \ + r4 &= r0; \ + r1 ^= r3; \ + r4 ^= r1; \ + } + +#define I7(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r2; \ + r2 ^= r0; \ + r0 &= r3; \ + r2 = ~r2; \ + r4 |= r3; \ + r3 ^= r1; \ + r1 |= r0; \ + r0 ^= r2; \ + r2 &= r4; \ + r1 ^= r2; \ + r2 ^= r0; \ + r0 |= r2; \ + r3 &= r4; \ + r0 ^= r3; \ + r4 ^= r1; \ + r3 ^= r4; \ + r4 |= r0; \ + r3 ^= r2; \ + r4 ^= r2; \ + } + +// key xor +#define KX(r, a, b, c, d, e) {\ + a ^= k[4 * r + 0]; \ + b ^= k[4 * r + 1]; \ + c ^= k[4 * r + 2]; \ + d ^= k[4 * r + 3];} + + +#ifdef TC_MINIMIZE_CODE_SIZE + +static void S0f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r3 ^= *r0; + *r4 = *r1; + *r1 &= *r3; + *r4 ^= *r2; + *r1 ^= *r0; + *r0 |= *r3; + *r0 ^= *r4; + *r4 ^= *r3; + *r3 ^= *r2; + *r2 |= *r1; + *r2 ^= *r4; + *r4 = ~*r4; + *r4 |= *r1; + *r1 ^= *r3; + *r1 ^= *r4; + *r3 |= *r0; + *r1 ^= *r3; + *r4 ^= *r3; +} + +static void S1f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r0 = ~*r0; + *r2 = ~*r2; + *r4 = *r0; + *r0 &= *r1; + *r2 ^= *r0; + *r0 |= *r3; + *r3 ^= *r2; + *r1 ^= *r0; + *r0 ^= *r4; + *r4 |= *r1; + *r1 ^= *r3; + *r2 |= *r0; + *r2 &= *r4; + *r0 ^= *r1; + *r1 &= *r2; + *r1 ^= *r0; + *r0 &= *r2; + *r0 ^= *r4; +} + +static void S2f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r4 = *r0; + *r0 &= *r2; + *r0 ^= *r3; + *r2 ^= *r1; + *r2 ^= *r0; + *r3 |= *r4; + *r3 ^= *r1; + *r4 ^= *r2; + *r1 = *r3; + *r3 |= *r4; + *r3 ^= *r0; + *r0 &= *r1; + *r4 ^= *r0; + *r1 ^= *r3; + *r1 ^= *r4; + *r4 = ~*r4; +} + +static void S3f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r4 = *r0; + *r0 |= *r3; + *r3 ^= *r1; + *r1 &= *r4; + *r4 ^= *r2; + *r2 ^= *r3; + *r3 &= *r0; + *r4 |= *r1; + *r3 ^= *r4; + *r0 ^= *r1; + *r4 &= *r0; + *r1 ^= *r3; + *r4 ^= *r2; + *r1 |= *r0; + *r1 ^= *r2; + *r0 ^= *r3; + *r2 = *r1; + *r1 |= *r3; + *r1 ^= *r0; +} + +static void S4f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r1 ^= *r3; + *r3 = ~*r3; + *r2 ^= *r3; + *r3 ^= *r0; + *r4 = *r1; + *r1 &= *r3; + *r1 ^= *r2; + *r4 ^= *r3; + *r0 ^= *r4; + *r2 &= *r4; + *r2 ^= *r0; + *r0 &= *r1; + *r3 ^= *r0; + *r4 |= *r1; + *r4 ^= *r0; + *r0 |= *r3; + *r0 ^= *r2; + *r2 &= *r3; + *r0 = ~*r0; + *r4 ^= *r2; +} + +static void S5f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r0 ^= *r1; + *r1 ^= *r3; + *r3 = ~*r3; + *r4 = *r1; + *r1 &= *r0; + *r2 ^= *r3; + *r1 ^= *r2; + *r2 |= *r4; + *r4 ^= *r3; + *r3 &= *r1; + *r3 ^= *r0; + *r4 ^= *r1; + *r4 ^= *r2; + *r2 ^= *r0; + *r0 &= *r3; + *r2 = ~*r2; + *r0 ^= *r4; + *r4 |= *r3; + *r2 ^= *r4; +} + +static void S6f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r2 = ~*r2; + *r4 = *r3; + *r3 &= *r0; + *r0 ^= *r4; + *r3 ^= *r2; + *r2 |= *r4; + *r1 ^= *r3; + *r2 ^= *r0; + *r0 |= *r1; + *r2 ^= *r1; + *r4 ^= *r0; + *r0 |= *r3; + *r0 ^= *r2; + *r4 ^= *r3; + *r4 ^= *r0; + *r3 = ~*r3; + *r2 &= *r4; + *r2 ^= *r3; +} + +static void S7f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r4 = *r2; + *r2 &= *r1; + *r2 ^= *r3; + *r3 &= *r1; + *r4 ^= *r2; + *r2 ^= *r1; + *r1 ^= *r0; + *r0 |= *r4; + *r0 ^= *r2; + *r3 ^= *r1; + *r2 ^= *r3; + *r3 &= *r0; + *r3 ^= *r4; + *r4 ^= *r2; + *r2 &= *r0; + *r4 = ~*r4; + *r2 ^= *r4; + *r4 &= *r0; + *r1 ^= *r3; + *r4 ^= *r1; +} + +static void KXf (const unsigned __int32 *k, unsigned int r, unsigned __int32 *a, unsigned __int32 *b, unsigned __int32 *c, unsigned __int32 *d) +{ + *a ^= k[r]; + *b ^= k[r + 1]; + *c ^= k[r + 2]; + *d ^= k[r + 3]; +} + +#endif // TC_MINIMIZE_CODE_SIZE + +#ifndef TC_MINIMIZE_CODE_SIZE + +void serpent_set_key(const unsigned __int8 userKey[],unsigned __int8 *ks) +{ + unsigned __int32 a,b,c,d,e; + unsigned __int32 *k = (unsigned __int32 *)ks; + unsigned __int32 t; + int i; + + for (i = 0; i < 8; i++) + k[i] = LE32(((unsigned __int32*)userKey)[i]); + + k += 8; + t = k[-1]; + for (i = 0; i < 132; ++i) + k[i] = t = rotlFixed(k[i-8] ^ k[i-5] ^ k[i-3] ^ t ^ 0x9e3779b9 ^ i, 11); + k -= 20; + +#define LK(r, a, b, c, d, e) {\ + a = k[(8-r)*4 + 0]; \ + b = k[(8-r)*4 + 1]; \ + c = k[(8-r)*4 + 2]; \ + d = k[(8-r)*4 + 3];} + +#define SK(r, a, b, c, d, e) {\ + k[(8-r)*4 + 4] = a; \ + k[(8-r)*4 + 5] = b; \ + k[(8-r)*4 + 6] = c; \ + k[(8-r)*4 + 7] = d;} \ + + for (i=0; i<4; i++) + { + afterS2(LK); afterS2(S3); afterS3(SK); + afterS1(LK); afterS1(S2); afterS2(SK); + afterS0(LK); afterS0(S1); afterS1(SK); + beforeS0(LK); beforeS0(S0); afterS0(SK); + k += 8*4; + afterS6(LK); afterS6(S7); afterS7(SK); + afterS5(LK); afterS5(S6); afterS6(SK); + afterS4(LK); afterS4(S5); afterS5(SK); + afterS3(LK); afterS3(S4); afterS4(SK); + } + afterS2(LK); afterS2(S3); afterS3(SK); +} + +#else // TC_MINIMIZE_CODE_SIZE + +static void LKf (unsigned __int32 *k, unsigned int r, unsigned __int32 *a, unsigned __int32 *b, unsigned __int32 *c, unsigned __int32 *d) +{ + *a = k[r]; + *b = k[r + 1]; + *c = k[r + 2]; + *d = k[r + 3]; +} + +static void SKf (unsigned __int32 *k, unsigned int r, unsigned __int32 *a, unsigned __int32 *b, unsigned __int32 *c, unsigned __int32 *d) +{ + k[r + 4] = *a; + k[r + 5] = *b; + k[r + 6] = *c; + k[r + 7] = *d; +} + +void serpent_set_key(const unsigned __int8 userKey[], unsigned __int8 *ks) +{ + unsigned __int32 a,b,c,d,e; + unsigned __int32 *k = (unsigned __int32 *)ks; + unsigned __int32 t; + int i; + + for (i = 0; i < 8; i++) + k[i] = LE32(((unsigned __int32*)userKey)[i]); + + k += 8; + t = k[-1]; + for (i = 0; i < 132; ++i) + k[i] = t = rotlFixed(k[i-8] ^ k[i-5] ^ k[i-3] ^ t ^ 0x9e3779b9 ^ i, 11); + k -= 20; + + for (i=0; i<4; i++) + { + LKf (k, 20, &a, &e, &b, &d); S3f (&a, &e, &b, &d, &c); SKf (k, 16, &e, &b, &d, &c); + LKf (k, 24, &c, &b, &a, &e); S2f (&c, &b, &a, &e, &d); SKf (k, 20, &a, &e, &b, &d); + LKf (k, 28, &b, &e, &c, &a); S1f (&b, &e, &c, &a, &d); SKf (k, 24, &c, &b, &a, &e); + LKf (k, 32, &a, &b, &c, &d); S0f (&a, &b, &c, &d, &e); SKf (k, 28, &b, &e, &c, &a); + k += 8*4; + LKf (k, 4, &a, &c, &d, &b); S7f (&a, &c, &d, &b, &e); SKf (k, 0, &d, &e, &b, &a); + LKf (k, 8, &a, &c, &b, &e); S6f (&a, &c, &b, &e, &d); SKf (k, 4, &a, &c, &d, &b); + LKf (k, 12, &b, &a, &e, &c); S5f (&b, &a, &e, &c, &d); SKf (k, 8, &a, &c, &b, &e); + LKf (k, 16, &e, &b, &d, &c); S4f (&e, &b, &d, &c, &a); SKf (k, 12, &b, &a, &e, &c); + } + LKf (k, 20, &a, &e, &b, &d); S3f (&a, &e, &b, &d, &c); SKf (k, 16, &e, &b, &d, &c); +} + +#endif // TC_MINIMIZE_CODE_SIZE + + +#ifndef TC_MINIMIZE_CODE_SIZE + +void serpent_encrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks) +{ + unsigned __int32 a, b, c, d, e; + unsigned int i=1; + const unsigned __int32 *k = (unsigned __int32 *)ks + 8; + unsigned __int32 *in = (unsigned __int32 *) inBlock; + unsigned __int32 *out = (unsigned __int32 *) outBlock; + + a = LE32(in[0]); + b = LE32(in[1]); + c = LE32(in[2]); + d = LE32(in[3]); + + do + { + beforeS0(KX); beforeS0(S0); afterS0(LT); + afterS0(KX); afterS0(S1); afterS1(LT); + afterS1(KX); afterS1(S2); afterS2(LT); + afterS2(KX); afterS2(S3); afterS3(LT); + afterS3(KX); afterS3(S4); afterS4(LT); + afterS4(KX); afterS4(S5); afterS5(LT); + afterS5(KX); afterS5(S6); afterS6(LT); + afterS6(KX); afterS6(S7); + + if (i == 4) + break; + + ++i; + c = b; + b = e; + e = d; + d = a; + a = e; + k += 32; + beforeS0(LT); + } + while (1); + + afterS7(KX); + + out[0] = LE32(d); + out[1] = LE32(e); + out[2] = LE32(b); + out[3] = LE32(a); +} + +#else // TC_MINIMIZE_CODE_SIZE + +typedef unsigned __int32 uint32; + +static void LTf (uint32 *a, uint32 *b, uint32 *c, uint32 *d) +{ + *a = rotlFixed(*a, 13); + *c = rotlFixed(*c, 3); + *d = rotlFixed(*d ^ *c ^ (*a << 3), 7); + *b = rotlFixed(*b ^ *a ^ *c, 1); + *a = rotlFixed(*a ^ *b ^ *d, 5); + *c = rotlFixed(*c ^ *d ^ (*b << 7), 22); +} + +void serpent_encrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks) +{ + unsigned __int32 a, b, c, d, e; + unsigned int i=1; + const unsigned __int32 *k = (unsigned __int32 *)ks + 8; + unsigned __int32 *in = (unsigned __int32 *) inBlock; + unsigned __int32 *out = (unsigned __int32 *) outBlock; + + a = LE32(in[0]); + b = LE32(in[1]); + c = LE32(in[2]); + d = LE32(in[3]); + + do + { + KXf (k, 0, &a, &b, &c, &d); S0f (&a, &b, &c, &d, &e); LTf (&b, &e, &c, &a); + KXf (k, 4, &b, &e, &c, &a); S1f (&b, &e, &c, &a, &d); LTf (&c, &b, &a, &e); + KXf (k, 8, &c, &b, &a, &e); S2f (&c, &b, &a, &e, &d); LTf (&a, &e, &b, &d); + KXf (k, 12, &a, &e, &b, &d); S3f (&a, &e, &b, &d, &c); LTf (&e, &b, &d, &c); + KXf (k, 16, &e, &b, &d, &c); S4f (&e, &b, &d, &c, &a); LTf (&b, &a, &e, &c); + KXf (k, 20, &b, &a, &e, &c); S5f (&b, &a, &e, &c, &d); LTf (&a, &c, &b, &e); + KXf (k, 24, &a, &c, &b, &e); S6f (&a, &c, &b, &e, &d); LTf (&a, &c, &d, &b); + KXf (k, 28, &a, &c, &d, &b); S7f (&a, &c, &d, &b, &e); + + if (i == 4) + break; + + ++i; + c = b; + b = e; + e = d; + d = a; + a = e; + k += 32; + LTf (&a,&b,&c,&d); + } + while (1); + + KXf (k, 32, &d, &e, &b, &a); + + out[0] = LE32(d); + out[1] = LE32(e); + out[2] = LE32(b); + out[3] = LE32(a); +} + +#endif // TC_MINIMIZE_CODE_SIZE + +#if !defined (TC_MINIMIZE_CODE_SIZE) + +void serpent_decrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks) +{ + unsigned __int32 a, b, c, d, e; + const unsigned __int32 *k = (unsigned __int32 *)ks + 104; + unsigned int i=4; + unsigned __int32 *in = (unsigned __int32 *) inBlock; + unsigned __int32 *out = (unsigned __int32 *) outBlock; + + a = LE32(in[0]); + b = LE32(in[1]); + c = LE32(in[2]); + d = LE32(in[3]); + + beforeI7(KX); + goto start; + + do + { + c = b; + b = d; + d = e; + k -= 32; + beforeI7(ILT); +start: + beforeI7(I7); afterI7(KX); + afterI7(ILT); afterI7(I6); afterI6(KX); + afterI6(ILT); afterI6(I5); afterI5(KX); + afterI5(ILT); afterI5(I4); afterI4(KX); + afterI4(ILT); afterI4(I3); afterI3(KX); + afterI3(ILT); afterI3(I2); afterI2(KX); + afterI2(ILT); afterI2(I1); afterI1(KX); + afterI1(ILT); afterI1(I0); afterI0(KX); + } + while (--i != 0); + + out[0] = LE32(a); + out[1] = LE32(d); + out[2] = LE32(b); + out[3] = LE32(e); +} + +#else // TC_MINIMIZE_CODE_SIZE + +static void ILTf (uint32 *a, uint32 *b, uint32 *c, uint32 *d) +{ + *c = rotrFixed(*c, 22); + *a = rotrFixed(*a, 5); + *c ^= *d ^ (*b << 7); + *a ^= *b ^ *d; + *b = rotrFixed(*b, 1); + *d = rotrFixed(*d, 7) ^ *c ^ (*a << 3); + *b ^= *a ^ *c; + *c = rotrFixed(*c, 3); + *a = rotrFixed(*a, 13); +} + +void serpent_decrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks) +{ + unsigned __int32 a, b, c, d, e; + const unsigned __int32 *k = (unsigned __int32 *)ks + 104; + unsigned int i=4; + unsigned __int32 *in = (unsigned __int32 *) inBlock; + unsigned __int32 *out = (unsigned __int32 *) outBlock; + + a = LE32(in[0]); + b = LE32(in[1]); + c = LE32(in[2]); + d = LE32(in[3]); + + KXf (k, 32, &a, &b, &c, &d); + goto start; + + do + { + c = b; + b = d; + d = e; + k -= 32; + beforeI7(ILT); +start: + beforeI7(I7); KXf (k, 28, &d, &a, &b, &e); + ILTf (&d, &a, &b, &e); afterI7(I6); KXf (k, 24, &a, &b, &c, &e); + ILTf (&a, &b, &c, &e); afterI6(I5); KXf (k, 20, &b, &d, &e, &c); + ILTf (&b, &d, &e, &c); afterI5(I4); KXf (k, 16, &b, &c, &e, &a); + ILTf (&b, &c, &e, &a); afterI4(I3); KXf (k, 12, &a, &b, &e, &c); + ILTf (&a, &b, &e, &c); afterI3(I2); KXf (k, 8, &b, &d, &e, &c); + ILTf (&b, &d, &e, &c); afterI2(I1); KXf (k, 4, &a, &b, &c, &e); + ILTf (&a, &b, &c, &e); afterI1(I0); KXf (k, 0, &a, &d, &b, &e); + } + while (--i != 0); + + out[0] = LE32(a); + out[1] = LE32(d); + out[2] = LE32(b); + out[3] = LE32(e); +} + +#endif // TC_MINIMIZE_CODE_SIZE diff --git a/src/Crypto/Serpent.h b/src/Crypto/Serpent.h index b88ddc4d..0f4ab787 100644 --- a/src/Crypto/Serpent.h +++ b/src/Crypto/Serpent.h @@ -1,20 +1,20 @@ -#ifndef HEADER_Crypto_Serpent -#define HEADER_Crypto_Serpent - -#include "Common/Tcdefs.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/* userKey is always 32-bytes long */ -void serpent_set_key(const unsigned __int8 userKey[], unsigned __int8 *ks); -void serpent_encrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks); -void serpent_decrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks); - -#ifdef __cplusplus -} -#endif - -#endif // HEADER_Crypto_Serpent +#ifndef HEADER_Crypto_Serpent +#define HEADER_Crypto_Serpent + +#include "Common/Tcdefs.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/* userKey is always 32-bytes long */ +void serpent_set_key(const unsigned __int8 userKey[], unsigned __int8 *ks); +void serpent_encrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks); +void serpent_decrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks); + +#ifdef __cplusplus +} +#endif + +#endif // HEADER_Crypto_Serpent diff --git a/src/Crypto/Sha2.c b/src/Crypto/Sha2.c index f1a9850a..02680eb5 100644 --- a/src/Crypto/Sha2.c +++ b/src/Crypto/Sha2.c @@ -1,753 +1,753 @@ -/* - --------------------------------------------------------------------------- - Copyright (c) 2002, Dr Brian Gladman, Worcester, UK. All rights reserved. - - LICENSE TERMS - - The free distribution and use of this software is allowed (with or without - changes) provided that: - - 1. source code distributions include the above copyright notice, this - list of conditions and the following disclaimer; - - 2. binary distributions include the above copyright notice, this list - of conditions and the following disclaimer in their documentation; - - 3. the name of the copyright holder is not used to endorse products - built using this software without specific written permission. - - DISCLAIMER - - This software is provided 'as is' with no explicit or implied warranties - in respect of its properties, including, but not limited to, correctness - and/or fitness for purpose. - --------------------------------------------------------------------------- - Issue Date: 01/08/2005 - - This is a byte oriented version of SHA2 that operates on arrays of bytes - stored in memory. This code implements sha256, sha384 and sha512 but the - latter two functions rely on efficient 64-bit integer operations that - may not be very efficient on 32-bit machines - - The sha256 functions use a type 'sha256_ctx' to hold details of the - current hash state and uses the following three calls: - - void sha256_begin(sha256_ctx ctx[1]) - void sha256_hash(const unsigned char data[], - unsigned long len, sha256_ctx ctx[1]) - void sha_end1(unsigned char hval[], sha256_ctx ctx[1]) - - The first subroutine initialises a hash computation by setting up the - context in the sha256_ctx context. The second subroutine hashes 8-bit - bytes from array data[] into the hash state withinh sha256_ctx context, - the number of bytes to be hashed being given by the the unsigned long - integer len. The third subroutine completes the hash calculation and - places the resulting digest value in the array of 8-bit bytes hval[]. - - The sha384 and sha512 functions are similar and use the interfaces: - - void sha384_begin(sha384_ctx ctx[1]); - void sha384_hash(const unsigned char data[], - unsigned long len, sha384_ctx ctx[1]); - void sha384_end(unsigned char hval[], sha384_ctx ctx[1]); - - void sha512_begin(sha512_ctx ctx[1]); - void sha512_hash(const unsigned char data[], - unsigned long len, sha512_ctx ctx[1]); - void sha512_end(unsigned char hval[], sha512_ctx ctx[1]); - - In addition there is a function sha2 that can be used to call all these - functions using a call with a hash length parameter as follows: - - int sha2_begin(unsigned long len, sha2_ctx ctx[1]); - void sha2_hash(const unsigned char data[], - unsigned long len, sha2_ctx ctx[1]); - void sha2_end(unsigned char hval[], sha2_ctx ctx[1]); - - My thanks to Erik Andersen for testing this code - on big-endian systems and for his assistance with corrections -*/ - -#include "Common/Endian.h" -#include "Crypto/misc.h" -#define PLATFORM_BYTE_ORDER BYTE_ORDER -#define IS_LITTLE_ENDIAN LITTLE_ENDIAN - -#if 0 -#define UNROLL_SHA2 /* for SHA2 loop unroll */ -#endif - -#include /* for memcpy() etc. */ - -#include "Sha2.h" - -#if defined(__cplusplus) -extern "C" -{ -#endif - -#if defined( _MSC_VER ) && ( _MSC_VER > 800 ) -#pragma intrinsic(memcpy) -#endif - -#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) -#define SWAP_BYTES -#else -#undef SWAP_BYTES -#endif - -#if 0 - -#define ch(x,y,z) (((x) & (y)) ^ (~(x) & (z))) -#define maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) - -#else /* Thanks to Rich Schroeppel and Colin Plumb for the following */ - -#define ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) -#define maj(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) - -#endif - -/* round transforms for SHA256 and SHA512 compression functions */ - -#define vf(n,i) v[(n - i) & 7] - -#define hf(i) (p[i & 15] += \ - g_1(p[(i + 14) & 15]) + p[(i + 9) & 15] + g_0(p[(i + 1) & 15])) - -#define v_cycle(i,j) \ - vf(7,i) += (j ? hf(i) : p[i]) + k_0[i+j] \ - + s_1(vf(4,i)) + ch(vf(4,i),vf(5,i),vf(6,i)); \ - vf(3,i) += vf(7,i); \ - vf(7,i) += s_0(vf(0,i))+ maj(vf(0,i),vf(1,i),vf(2,i)) - -#if defined(SHA_224) || defined(SHA_256) - -#define SHA256_MASK (SHA256_BLOCK_SIZE - 1) - -#if defined(SWAP_BYTES) -#define bsw_32(p,n) \ - { int _i = (n); while(_i--) ((uint_32t*)p)[_i] = bswap_32(((uint_32t*)p)[_i]); } -#else -#define bsw_32(p,n) -#endif - -#define s_0(x) (rotr32((x), 2) ^ rotr32((x), 13) ^ rotr32((x), 22)) -#define s_1(x) (rotr32((x), 6) ^ rotr32((x), 11) ^ rotr32((x), 25)) -#define g_0(x) (rotr32((x), 7) ^ rotr32((x), 18) ^ ((x) >> 3)) -#define g_1(x) (rotr32((x), 17) ^ rotr32((x), 19) ^ ((x) >> 10)) -#define k_0 k256 - -/* rotated SHA256 round definition. Rather than swapping variables as in */ -/* FIPS-180, different variables are 'rotated' on each round, returning */ -/* to their starting positions every eight rounds */ - -#define q(n) v##n - -#define one_cycle(a,b,c,d,e,f,g,h,k,w) \ - q(h) += s_1(q(e)) + ch(q(e), q(f), q(g)) + k + w; \ - q(d) += q(h); q(h) += s_0(q(a)) + maj(q(a), q(b), q(c)) - -/* SHA256 mixing data */ - -const uint_32t k256[64] = -{ 0x428a2f98ul, 0x71374491ul, 0xb5c0fbcful, 0xe9b5dba5ul, - 0x3956c25bul, 0x59f111f1ul, 0x923f82a4ul, 0xab1c5ed5ul, - 0xd807aa98ul, 0x12835b01ul, 0x243185beul, 0x550c7dc3ul, - 0x72be5d74ul, 0x80deb1feul, 0x9bdc06a7ul, 0xc19bf174ul, - 0xe49b69c1ul, 0xefbe4786ul, 0x0fc19dc6ul, 0x240ca1ccul, - 0x2de92c6ful, 0x4a7484aaul, 0x5cb0a9dcul, 0x76f988daul, - 0x983e5152ul, 0xa831c66dul, 0xb00327c8ul, 0xbf597fc7ul, - 0xc6e00bf3ul, 0xd5a79147ul, 0x06ca6351ul, 0x14292967ul, - 0x27b70a85ul, 0x2e1b2138ul, 0x4d2c6dfcul, 0x53380d13ul, - 0x650a7354ul, 0x766a0abbul, 0x81c2c92eul, 0x92722c85ul, - 0xa2bfe8a1ul, 0xa81a664bul, 0xc24b8b70ul, 0xc76c51a3ul, - 0xd192e819ul, 0xd6990624ul, 0xf40e3585ul, 0x106aa070ul, - 0x19a4c116ul, 0x1e376c08ul, 0x2748774cul, 0x34b0bcb5ul, - 0x391c0cb3ul, 0x4ed8aa4aul, 0x5b9cca4ful, 0x682e6ff3ul, - 0x748f82eeul, 0x78a5636ful, 0x84c87814ul, 0x8cc70208ul, - 0x90befffaul, 0xa4506cebul, 0xbef9a3f7ul, 0xc67178f2ul, -}; - -/* Compile 64 bytes of hash data into SHA256 digest value */ -/* NOTE: this routine assumes that the byte order in the */ -/* ctx->wbuf[] at this point is such that low address bytes */ -/* in the ORIGINAL byte stream will go into the high end of */ -/* words on BOTH big and little endian systems */ - -VOID_RETURN sha256_compile(sha256_ctx ctx[1]) -{ -#if !defined(UNROLL_SHA2) - - uint_32t j, *p = ctx->wbuf, v[8]; - - memcpy(v, ctx->hash, 8 * sizeof(uint_32t)); - - for(j = 0; j < 64; j += 16) - { - v_cycle( 0, j); v_cycle( 1, j); - v_cycle( 2, j); v_cycle( 3, j); - v_cycle( 4, j); v_cycle( 5, j); - v_cycle( 6, j); v_cycle( 7, j); - v_cycle( 8, j); v_cycle( 9, j); - v_cycle(10, j); v_cycle(11, j); - v_cycle(12, j); v_cycle(13, j); - v_cycle(14, j); v_cycle(15, j); - } - - ctx->hash[0] += v[0]; ctx->hash[1] += v[1]; - ctx->hash[2] += v[2]; ctx->hash[3] += v[3]; - ctx->hash[4] += v[4]; ctx->hash[5] += v[5]; - ctx->hash[6] += v[6]; ctx->hash[7] += v[7]; - -#else - - uint_32t *p = ctx->wbuf,v0,v1,v2,v3,v4,v5,v6,v7; - - v0 = ctx->hash[0]; v1 = ctx->hash[1]; - v2 = ctx->hash[2]; v3 = ctx->hash[3]; - v4 = ctx->hash[4]; v5 = ctx->hash[5]; - v6 = ctx->hash[6]; v7 = ctx->hash[7]; - - one_cycle(0,1,2,3,4,5,6,7,k256[ 0],p[ 0]); - one_cycle(7,0,1,2,3,4,5,6,k256[ 1],p[ 1]); - one_cycle(6,7,0,1,2,3,4,5,k256[ 2],p[ 2]); - one_cycle(5,6,7,0,1,2,3,4,k256[ 3],p[ 3]); - one_cycle(4,5,6,7,0,1,2,3,k256[ 4],p[ 4]); - one_cycle(3,4,5,6,7,0,1,2,k256[ 5],p[ 5]); - one_cycle(2,3,4,5,6,7,0,1,k256[ 6],p[ 6]); - one_cycle(1,2,3,4,5,6,7,0,k256[ 7],p[ 7]); - one_cycle(0,1,2,3,4,5,6,7,k256[ 8],p[ 8]); - one_cycle(7,0,1,2,3,4,5,6,k256[ 9],p[ 9]); - one_cycle(6,7,0,1,2,3,4,5,k256[10],p[10]); - one_cycle(5,6,7,0,1,2,3,4,k256[11],p[11]); - one_cycle(4,5,6,7,0,1,2,3,k256[12],p[12]); - one_cycle(3,4,5,6,7,0,1,2,k256[13],p[13]); - one_cycle(2,3,4,5,6,7,0,1,k256[14],p[14]); - one_cycle(1,2,3,4,5,6,7,0,k256[15],p[15]); - - one_cycle(0,1,2,3,4,5,6,7,k256[16],hf( 0)); - one_cycle(7,0,1,2,3,4,5,6,k256[17],hf( 1)); - one_cycle(6,7,0,1,2,3,4,5,k256[18],hf( 2)); - one_cycle(5,6,7,0,1,2,3,4,k256[19],hf( 3)); - one_cycle(4,5,6,7,0,1,2,3,k256[20],hf( 4)); - one_cycle(3,4,5,6,7,0,1,2,k256[21],hf( 5)); - one_cycle(2,3,4,5,6,7,0,1,k256[22],hf( 6)); - one_cycle(1,2,3,4,5,6,7,0,k256[23],hf( 7)); - one_cycle(0,1,2,3,4,5,6,7,k256[24],hf( 8)); - one_cycle(7,0,1,2,3,4,5,6,k256[25],hf( 9)); - one_cycle(6,7,0,1,2,3,4,5,k256[26],hf(10)); - one_cycle(5,6,7,0,1,2,3,4,k256[27],hf(11)); - one_cycle(4,5,6,7,0,1,2,3,k256[28],hf(12)); - one_cycle(3,4,5,6,7,0,1,2,k256[29],hf(13)); - one_cycle(2,3,4,5,6,7,0,1,k256[30],hf(14)); - one_cycle(1,2,3,4,5,6,7,0,k256[31],hf(15)); - - one_cycle(0,1,2,3,4,5,6,7,k256[32],hf( 0)); - one_cycle(7,0,1,2,3,4,5,6,k256[33],hf( 1)); - one_cycle(6,7,0,1,2,3,4,5,k256[34],hf( 2)); - one_cycle(5,6,7,0,1,2,3,4,k256[35],hf( 3)); - one_cycle(4,5,6,7,0,1,2,3,k256[36],hf( 4)); - one_cycle(3,4,5,6,7,0,1,2,k256[37],hf( 5)); - one_cycle(2,3,4,5,6,7,0,1,k256[38],hf( 6)); - one_cycle(1,2,3,4,5,6,7,0,k256[39],hf( 7)); - one_cycle(0,1,2,3,4,5,6,7,k256[40],hf( 8)); - one_cycle(7,0,1,2,3,4,5,6,k256[41],hf( 9)); - one_cycle(6,7,0,1,2,3,4,5,k256[42],hf(10)); - one_cycle(5,6,7,0,1,2,3,4,k256[43],hf(11)); - one_cycle(4,5,6,7,0,1,2,3,k256[44],hf(12)); - one_cycle(3,4,5,6,7,0,1,2,k256[45],hf(13)); - one_cycle(2,3,4,5,6,7,0,1,k256[46],hf(14)); - one_cycle(1,2,3,4,5,6,7,0,k256[47],hf(15)); - - one_cycle(0,1,2,3,4,5,6,7,k256[48],hf( 0)); - one_cycle(7,0,1,2,3,4,5,6,k256[49],hf( 1)); - one_cycle(6,7,0,1,2,3,4,5,k256[50],hf( 2)); - one_cycle(5,6,7,0,1,2,3,4,k256[51],hf( 3)); - one_cycle(4,5,6,7,0,1,2,3,k256[52],hf( 4)); - one_cycle(3,4,5,6,7,0,1,2,k256[53],hf( 5)); - one_cycle(2,3,4,5,6,7,0,1,k256[54],hf( 6)); - one_cycle(1,2,3,4,5,6,7,0,k256[55],hf( 7)); - one_cycle(0,1,2,3,4,5,6,7,k256[56],hf( 8)); - one_cycle(7,0,1,2,3,4,5,6,k256[57],hf( 9)); - one_cycle(6,7,0,1,2,3,4,5,k256[58],hf(10)); - one_cycle(5,6,7,0,1,2,3,4,k256[59],hf(11)); - one_cycle(4,5,6,7,0,1,2,3,k256[60],hf(12)); - one_cycle(3,4,5,6,7,0,1,2,k256[61],hf(13)); - one_cycle(2,3,4,5,6,7,0,1,k256[62],hf(14)); - one_cycle(1,2,3,4,5,6,7,0,k256[63],hf(15)); - - ctx->hash[0] += v0; ctx->hash[1] += v1; - ctx->hash[2] += v2; ctx->hash[3] += v3; - ctx->hash[4] += v4; ctx->hash[5] += v5; - ctx->hash[6] += v6; ctx->hash[7] += v7; -#endif -} - -/* SHA256 hash data in an array of bytes into hash buffer */ -/* and call the hash_compile function as required. */ - -VOID_RETURN sha256_hash(const unsigned char data[], unsigned long len, sha256_ctx ctx[1]) -{ uint_32t pos = (uint_32t)(ctx->count[0] & SHA256_MASK), - space = SHA256_BLOCK_SIZE - pos; - const unsigned char *sp = data; - - if((ctx->count[0] += len) < len) - ++(ctx->count[1]); - - while(len >= space) /* tranfer whole blocks while possible */ - { - memcpy(((unsigned char*)ctx->wbuf) + pos, sp, space); - sp += space; len -= space; space = SHA256_BLOCK_SIZE; pos = 0; - bsw_32(ctx->wbuf, SHA256_BLOCK_SIZE >> 2) - sha256_compile(ctx); - } - - memcpy(((unsigned char*)ctx->wbuf) + pos, sp, len); -} - -/* SHA256 Final padding and digest calculation */ - -static void sha_end1(unsigned char hval[], sha256_ctx ctx[1], const unsigned int hlen) -{ uint_32t i = (uint_32t)(ctx->count[0] & SHA256_MASK); - - /* put bytes in the buffer in an order in which references to */ - /* 32-bit words will put bytes with lower addresses into the */ - /* top of 32 bit words on BOTH big and little endian machines */ - bsw_32(ctx->wbuf, (i + 3) >> 2) - - /* we now need to mask valid bytes and add the padding which is */ - /* a single 1 bit and as many zero bits as necessary. Note that */ - /* we can always add the first padding byte here because the */ - /* buffer always has at least one empty slot */ - ctx->wbuf[i >> 2] &= 0xffffff80 << 8 * (~i & 3); - ctx->wbuf[i >> 2] |= 0x00000080 << 8 * (~i & 3); - - /* we need 9 or more empty positions, one for the padding byte */ - /* (above) and eight for the length count. If there is not */ - /* enough space pad and empty the buffer */ - if(i > SHA256_BLOCK_SIZE - 9) - { - if(i < 60) ctx->wbuf[15] = 0; - sha256_compile(ctx); - i = 0; - } - else /* compute a word index for the empty buffer positions */ - i = (i >> 2) + 1; - - while(i < 14) /* and zero pad all but last two positions */ - ctx->wbuf[i++] = 0; - - /* the following 32-bit length fields are assembled in the */ - /* wrong byte order on little endian machines but this is */ - /* corrected later since they are only ever used as 32-bit */ - /* word values. */ - ctx->wbuf[14] = (ctx->count[1] << 3) | (ctx->count[0] >> 29); - ctx->wbuf[15] = ctx->count[0] << 3; - sha256_compile(ctx); - - /* extract the hash value as bytes in case the hash buffer is */ - /* mislaigned for 32-bit words */ - for(i = 0; i < hlen; ++i) - hval[i] = (unsigned char)(ctx->hash[i >> 2] >> (8 * (~i & 3))); -} - -#endif - -#if defined(SHA_224) - -const uint_32t i224[8] = -{ - 0xc1059ed8ul, 0x367cd507ul, 0x3070dd17ul, 0xf70e5939ul, - 0xffc00b31ul, 0x68581511ul, 0x64f98fa7ul, 0xbefa4fa4ul -}; - -VOID_RETURN sha224_begin(sha224_ctx ctx[1]) -{ - ctx->count[0] = ctx->count[1] = 0; - memcpy(ctx->hash, i224, 8 * sizeof(uint_32t)); -} - -VOID_RETURN sha224_end(unsigned char hval[], sha224_ctx ctx[1]) -{ - sha_end1(hval, ctx, SHA224_DIGEST_SIZE); -} - -VOID_RETURN sha224(unsigned char hval[], const unsigned char data[], unsigned long len) -{ sha224_ctx cx[1]; - - sha224_begin(cx); - sha224_hash(data, len, cx); - sha_end1(hval, cx, SHA224_DIGEST_SIZE); -} - -#endif - -#if defined(SHA_256) - -const uint_32t i256[8] = -{ - 0x6a09e667ul, 0xbb67ae85ul, 0x3c6ef372ul, 0xa54ff53aul, - 0x510e527ful, 0x9b05688cul, 0x1f83d9abul, 0x5be0cd19ul -}; - -VOID_RETURN sha256_begin(sha256_ctx ctx[1]) -{ - ctx->count[0] = ctx->count[1] = 0; - memcpy(ctx->hash, i256, 8 * sizeof(uint_32t)); -} - -VOID_RETURN sha256_end(unsigned char hval[], sha256_ctx ctx[1]) -{ - sha_end1(hval, ctx, SHA256_DIGEST_SIZE); -} - -VOID_RETURN sha256(unsigned char hval[], const unsigned char data[], unsigned long len) -{ sha256_ctx cx[1]; - - sha256_begin(cx); - sha256_hash(data, len, cx); - sha_end1(hval, cx, SHA256_DIGEST_SIZE); -} - -#endif - -#if defined(SHA_384) || defined(SHA_512) - -#define SHA512_MASK (SHA512_BLOCK_SIZE - 1) - -#if defined(SWAP_BYTES) -#define bsw_64(p,n) \ - { int _i = (n); while(_i--) ((uint_64t*)p)[_i] = bswap_64(((uint_64t*)p)[_i]); } -#else -#define bsw_64(p,n) -#endif - -/* SHA512 mixing function definitions */ - -#ifdef s_0 -# undef s_0 -# undef s_1 -# undef g_0 -# undef g_1 -# undef k_0 -#endif - -#define s_0(x) (rotr64((x), 28) ^ rotr64((x), 34) ^ rotr64((x), 39)) -#define s_1(x) (rotr64((x), 14) ^ rotr64((x), 18) ^ rotr64((x), 41)) -#define g_0(x) (rotr64((x), 1) ^ rotr64((x), 8) ^ ((x) >> 7)) -#define g_1(x) (rotr64((x), 19) ^ rotr64((x), 61) ^ ((x) >> 6)) -#define k_0 k512 - -/* SHA384/SHA512 mixing data */ - -const uint_64t k512[80] = -{ - li_64(428a2f98d728ae22), li_64(7137449123ef65cd), - li_64(b5c0fbcfec4d3b2f), li_64(e9b5dba58189dbbc), - li_64(3956c25bf348b538), li_64(59f111f1b605d019), - li_64(923f82a4af194f9b), li_64(ab1c5ed5da6d8118), - li_64(d807aa98a3030242), li_64(12835b0145706fbe), - li_64(243185be4ee4b28c), li_64(550c7dc3d5ffb4e2), - li_64(72be5d74f27b896f), li_64(80deb1fe3b1696b1), - li_64(9bdc06a725c71235), li_64(c19bf174cf692694), - li_64(e49b69c19ef14ad2), li_64(efbe4786384f25e3), - li_64(0fc19dc68b8cd5b5), li_64(240ca1cc77ac9c65), - li_64(2de92c6f592b0275), li_64(4a7484aa6ea6e483), - li_64(5cb0a9dcbd41fbd4), li_64(76f988da831153b5), - li_64(983e5152ee66dfab), li_64(a831c66d2db43210), - li_64(b00327c898fb213f), li_64(bf597fc7beef0ee4), - li_64(c6e00bf33da88fc2), li_64(d5a79147930aa725), - li_64(06ca6351e003826f), li_64(142929670a0e6e70), - li_64(27b70a8546d22ffc), li_64(2e1b21385c26c926), - li_64(4d2c6dfc5ac42aed), li_64(53380d139d95b3df), - li_64(650a73548baf63de), li_64(766a0abb3c77b2a8), - li_64(81c2c92e47edaee6), li_64(92722c851482353b), - li_64(a2bfe8a14cf10364), li_64(a81a664bbc423001), - li_64(c24b8b70d0f89791), li_64(c76c51a30654be30), - li_64(d192e819d6ef5218), li_64(d69906245565a910), - li_64(f40e35855771202a), li_64(106aa07032bbd1b8), - li_64(19a4c116b8d2d0c8), li_64(1e376c085141ab53), - li_64(2748774cdf8eeb99), li_64(34b0bcb5e19b48a8), - li_64(391c0cb3c5c95a63), li_64(4ed8aa4ae3418acb), - li_64(5b9cca4f7763e373), li_64(682e6ff3d6b2b8a3), - li_64(748f82ee5defb2fc), li_64(78a5636f43172f60), - li_64(84c87814a1f0ab72), li_64(8cc702081a6439ec), - li_64(90befffa23631e28), li_64(a4506cebde82bde9), - li_64(bef9a3f7b2c67915), li_64(c67178f2e372532b), - li_64(ca273eceea26619c), li_64(d186b8c721c0c207), - li_64(eada7dd6cde0eb1e), li_64(f57d4f7fee6ed178), - li_64(06f067aa72176fba), li_64(0a637dc5a2c898a6), - li_64(113f9804bef90dae), li_64(1b710b35131c471b), - li_64(28db77f523047d84), li_64(32caab7b40c72493), - li_64(3c9ebe0a15c9bebc), li_64(431d67c49c100d4c), - li_64(4cc5d4becb3e42b6), li_64(597f299cfc657e2a), - li_64(5fcb6fab3ad6faec), li_64(6c44198c4a475817) -}; - -/* Compile 128 bytes of hash data into SHA384/512 digest */ -/* NOTE: this routine assumes that the byte order in the */ -/* ctx->wbuf[] at this point is such that low address bytes */ -/* in the ORIGINAL byte stream will go into the high end of */ -/* words on BOTH big and little endian systems */ - -VOID_RETURN sha512_compile(sha512_ctx ctx[1]) -{ uint_64t v[8], *p = ctx->wbuf; - uint_32t j; - - memcpy(v, ctx->hash, 8 * sizeof(uint_64t)); - - for(j = 0; j < 80; j += 16) - { - v_cycle( 0, j); v_cycle( 1, j); - v_cycle( 2, j); v_cycle( 3, j); - v_cycle( 4, j); v_cycle( 5, j); - v_cycle( 6, j); v_cycle( 7, j); - v_cycle( 8, j); v_cycle( 9, j); - v_cycle(10, j); v_cycle(11, j); - v_cycle(12, j); v_cycle(13, j); - v_cycle(14, j); v_cycle(15, j); - } - - ctx->hash[0] += v[0]; ctx->hash[1] += v[1]; - ctx->hash[2] += v[2]; ctx->hash[3] += v[3]; - ctx->hash[4] += v[4]; ctx->hash[5] += v[5]; - ctx->hash[6] += v[6]; ctx->hash[7] += v[7]; -} - -/* Compile 128 bytes of hash data into SHA256 digest value */ -/* NOTE: this routine assumes that the byte order in the */ -/* ctx->wbuf[] at this point is in such an order that low */ -/* address bytes in the ORIGINAL byte stream placed in this */ -/* buffer will now go to the high end of words on BOTH big */ -/* and little endian systems */ - -VOID_RETURN sha512_hash(const unsigned char data[], unsigned long len, sha512_ctx ctx[1]) -{ uint_32t pos = (uint_32t)(ctx->count[0] & SHA512_MASK), - space = SHA512_BLOCK_SIZE - pos; - const unsigned char *sp = data; - - if((ctx->count[0] += len) < len) - ++(ctx->count[1]); - - while(len >= space) /* tranfer whole blocks while possible */ - { - memcpy(((unsigned char*)ctx->wbuf) + pos, sp, space); - sp += space; len -= space; space = SHA512_BLOCK_SIZE; pos = 0; - bsw_64(ctx->wbuf, SHA512_BLOCK_SIZE >> 3); - sha512_compile(ctx); - } - - memcpy(((unsigned char*)ctx->wbuf) + pos, sp, len); -} - -/* SHA384/512 Final padding and digest calculation */ - -static void sha_end2(unsigned char hval[], sha512_ctx ctx[1], const unsigned int hlen) -{ uint_32t i = (uint_32t)(ctx->count[0] & SHA512_MASK); - - /* put bytes in the buffer in an order in which references to */ - /* 32-bit words will put bytes with lower addresses into the */ - /* top of 32 bit words on BOTH big and little endian machines */ - bsw_64(ctx->wbuf, (i + 7) >> 3); - - /* we now need to mask valid bytes and add the padding which is */ - /* a single 1 bit and as many zero bits as necessary. Note that */ - /* we can always add the first padding byte here because the */ - /* buffer always has at least one empty slot */ - ctx->wbuf[i >> 3] &= li_64(ffffffffffffff00) << 8 * (~i & 7); - ctx->wbuf[i >> 3] |= li_64(0000000000000080) << 8 * (~i & 7); - - /* we need 17 or more empty byte positions, one for the padding */ - /* byte (above) and sixteen for the length count. If there is */ - /* not enough space pad and empty the buffer */ - if(i > SHA512_BLOCK_SIZE - 17) - { - if(i < 120) ctx->wbuf[15] = 0; - sha512_compile(ctx); - i = 0; - } - else - i = (i >> 3) + 1; - - while(i < 14) - ctx->wbuf[i++] = 0; - - /* the following 64-bit length fields are assembled in the */ - /* wrong byte order on little endian machines but this is */ - /* corrected later since they are only ever used as 64-bit */ - /* word values. */ - ctx->wbuf[14] = (ctx->count[1] << 3) | (ctx->count[0] >> 61); - ctx->wbuf[15] = ctx->count[0] << 3; - sha512_compile(ctx); - - /* extract the hash value as bytes in case the hash buffer is */ - /* misaligned for 32-bit words */ - for(i = 0; i < hlen; ++i) - hval[i] = (unsigned char)(ctx->hash[i >> 3] >> (8 * (~i & 7))); -} - -#endif - -#if defined(SHA_384) - -/* SHA384 initialisation data */ - -const uint_64t i384[80] = -{ - li_64(cbbb9d5dc1059ed8), li_64(629a292a367cd507), - li_64(9159015a3070dd17), li_64(152fecd8f70e5939), - li_64(67332667ffc00b31), li_64(8eb44a8768581511), - li_64(db0c2e0d64f98fa7), li_64(47b5481dbefa4fa4) -}; - -VOID_RETURN sha384_begin(sha384_ctx ctx[1]) -{ - ctx->count[0] = ctx->count[1] = 0; - memcpy(ctx->hash, i384, 8 * sizeof(uint_64t)); -} - -VOID_RETURN sha384_end(unsigned char hval[], sha384_ctx ctx[1]) -{ - sha_end2(hval, ctx, SHA384_DIGEST_SIZE); -} - -VOID_RETURN sha384(unsigned char hval[], const unsigned char data[], unsigned long len) -{ sha384_ctx cx[1]; - - sha384_begin(cx); - sha384_hash(data, len, cx); - sha_end2(hval, cx, SHA384_DIGEST_SIZE); -} - -#endif - -#if defined(SHA_512) - -/* SHA512 initialisation data */ - -const uint_64t i512[80] = -{ - li_64(6a09e667f3bcc908), li_64(bb67ae8584caa73b), - li_64(3c6ef372fe94f82b), li_64(a54ff53a5f1d36f1), - li_64(510e527fade682d1), li_64(9b05688c2b3e6c1f), - li_64(1f83d9abfb41bd6b), li_64(5be0cd19137e2179) -}; - -VOID_RETURN sha512_begin(sha512_ctx ctx[1]) -{ - ctx->count[0] = ctx->count[1] = 0; - memcpy(ctx->hash, i512, 8 * sizeof(uint_64t)); -} - -VOID_RETURN sha512_end(unsigned char hval[], sha512_ctx ctx[1]) -{ - sha_end2(hval, ctx, SHA512_DIGEST_SIZE); -} - -VOID_RETURN sha512(unsigned char hval[], const unsigned char data[], unsigned long len) -{ sha512_ctx cx[1]; - - sha512_begin(cx); - sha512_hash(data, len, cx); - sha_end2(hval, cx, SHA512_DIGEST_SIZE); -} - -#endif - -#if defined(SHA_2) - -#define CTX_224(x) ((x)->uu->ctx256) -#define CTX_256(x) ((x)->uu->ctx256) -#define CTX_384(x) ((x)->uu->ctx512) -#define CTX_512(x) ((x)->uu->ctx512) - -/* SHA2 initialisation */ - -INT_RETURN sha2_begin(unsigned long len, sha2_ctx ctx[1]) -{ - switch(len) - { -#if defined(SHA_224) - case 224: - case 28: CTX_256(ctx)->count[0] = CTX_256(ctx)->count[1] = 0; - memcpy(CTX_256(ctx)->hash, i224, 32); - ctx->sha2_len = 28; return EXIT_SUCCESS; -#endif -#if defined(SHA_256) - case 256: - case 32: CTX_256(ctx)->count[0] = CTX_256(ctx)->count[1] = 0; - memcpy(CTX_256(ctx)->hash, i256, 32); - ctx->sha2_len = 32; return EXIT_SUCCESS; -#endif -#if defined(SHA_384) - case 384: - case 48: CTX_384(ctx)->count[0] = CTX_384(ctx)->count[1] = 0; - memcpy(CTX_384(ctx)->hash, i384, 64); - ctx->sha2_len = 48; return EXIT_SUCCESS; -#endif -#if defined(SHA_512) - case 512: - case 64: CTX_512(ctx)->count[0] = CTX_512(ctx)->count[1] = 0; - memcpy(CTX_512(ctx)->hash, i512, 64); - ctx->sha2_len = 64; return EXIT_SUCCESS; -#endif - default: return EXIT_FAILURE; - } -} - -VOID_RETURN sha2_hash(const unsigned char data[], unsigned long len, sha2_ctx ctx[1]) -{ - switch(ctx->sha2_len) - { -#if defined(SHA_224) - case 28: sha224_hash(data, len, CTX_224(ctx)); return; -#endif -#if defined(SHA_256) - case 32: sha256_hash(data, len, CTX_256(ctx)); return; -#endif -#if defined(SHA_384) - case 48: sha384_hash(data, len, CTX_384(ctx)); return; -#endif -#if defined(SHA_512) - case 64: sha512_hash(data, len, CTX_512(ctx)); return; -#endif - } -} - -VOID_RETURN sha2_end(unsigned char hval[], sha2_ctx ctx[1]) -{ - switch(ctx->sha2_len) - { -#if defined(SHA_224) - case 28: sha_end1(hval, CTX_224(ctx), SHA224_DIGEST_SIZE); return; -#endif -#if defined(SHA_256) - case 32: sha_end1(hval, CTX_256(ctx), SHA256_DIGEST_SIZE); return; -#endif -#if defined(SHA_384) - case 48: sha_end2(hval, CTX_384(ctx), SHA384_DIGEST_SIZE); return; -#endif -#if defined(SHA_512) - case 64: sha_end2(hval, CTX_512(ctx), SHA512_DIGEST_SIZE); return; -#endif - } -} - -INT_RETURN sha2(unsigned char hval[], unsigned long size, - const unsigned char data[], unsigned long len) -{ sha2_ctx cx[1]; - - if(sha2_begin(size, cx) == EXIT_SUCCESS) - { - sha2_hash(data, len, cx); sha2_end(hval, cx); return EXIT_SUCCESS; - } - else - return EXIT_FAILURE; -} - -#endif - -#if defined(__cplusplus) -} -#endif +/* + --------------------------------------------------------------------------- + Copyright (c) 2002, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 01/08/2005 + + This is a byte oriented version of SHA2 that operates on arrays of bytes + stored in memory. This code implements sha256, sha384 and sha512 but the + latter two functions rely on efficient 64-bit integer operations that + may not be very efficient on 32-bit machines + + The sha256 functions use a type 'sha256_ctx' to hold details of the + current hash state and uses the following three calls: + + void sha256_begin(sha256_ctx ctx[1]) + void sha256_hash(const unsigned char data[], + unsigned long len, sha256_ctx ctx[1]) + void sha_end1(unsigned char hval[], sha256_ctx ctx[1]) + + The first subroutine initialises a hash computation by setting up the + context in the sha256_ctx context. The second subroutine hashes 8-bit + bytes from array data[] into the hash state withinh sha256_ctx context, + the number of bytes to be hashed being given by the the unsigned long + integer len. The third subroutine completes the hash calculation and + places the resulting digest value in the array of 8-bit bytes hval[]. + + The sha384 and sha512 functions are similar and use the interfaces: + + void sha384_begin(sha384_ctx ctx[1]); + void sha384_hash(const unsigned char data[], + unsigned long len, sha384_ctx ctx[1]); + void sha384_end(unsigned char hval[], sha384_ctx ctx[1]); + + void sha512_begin(sha512_ctx ctx[1]); + void sha512_hash(const unsigned char data[], + unsigned long len, sha512_ctx ctx[1]); + void sha512_end(unsigned char hval[], sha512_ctx ctx[1]); + + In addition there is a function sha2 that can be used to call all these + functions using a call with a hash length parameter as follows: + + int sha2_begin(unsigned long len, sha2_ctx ctx[1]); + void sha2_hash(const unsigned char data[], + unsigned long len, sha2_ctx ctx[1]); + void sha2_end(unsigned char hval[], sha2_ctx ctx[1]); + + My thanks to Erik Andersen for testing this code + on big-endian systems and for his assistance with corrections +*/ + +#include "Common/Endian.h" +#include "Crypto/misc.h" +#define PLATFORM_BYTE_ORDER BYTE_ORDER +#define IS_LITTLE_ENDIAN LITTLE_ENDIAN + +#if 0 +#define UNROLL_SHA2 /* for SHA2 loop unroll */ +#endif + +#include /* for memcpy() etc. */ + +#include "Sha2.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#if defined( _MSC_VER ) && ( _MSC_VER > 800 ) +#pragma intrinsic(memcpy) +#endif + +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) +#define SWAP_BYTES +#else +#undef SWAP_BYTES +#endif + +#if 0 + +#define ch(x,y,z) (((x) & (y)) ^ (~(x) & (z))) +#define maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) + +#else /* Thanks to Rich Schroeppel and Colin Plumb for the following */ + +#define ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) +#define maj(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) + +#endif + +/* round transforms for SHA256 and SHA512 compression functions */ + +#define vf(n,i) v[(n - i) & 7] + +#define hf(i) (p[i & 15] += \ + g_1(p[(i + 14) & 15]) + p[(i + 9) & 15] + g_0(p[(i + 1) & 15])) + +#define v_cycle(i,j) \ + vf(7,i) += (j ? hf(i) : p[i]) + k_0[i+j] \ + + s_1(vf(4,i)) + ch(vf(4,i),vf(5,i),vf(6,i)); \ + vf(3,i) += vf(7,i); \ + vf(7,i) += s_0(vf(0,i))+ maj(vf(0,i),vf(1,i),vf(2,i)) + +#if defined(SHA_224) || defined(SHA_256) + +#define SHA256_MASK (SHA256_BLOCK_SIZE - 1) + +#if defined(SWAP_BYTES) +#define bsw_32(p,n) \ + { int _i = (n); while(_i--) ((uint_32t*)p)[_i] = bswap_32(((uint_32t*)p)[_i]); } +#else +#define bsw_32(p,n) +#endif + +#define s_0(x) (rotr32((x), 2) ^ rotr32((x), 13) ^ rotr32((x), 22)) +#define s_1(x) (rotr32((x), 6) ^ rotr32((x), 11) ^ rotr32((x), 25)) +#define g_0(x) (rotr32((x), 7) ^ rotr32((x), 18) ^ ((x) >> 3)) +#define g_1(x) (rotr32((x), 17) ^ rotr32((x), 19) ^ ((x) >> 10)) +#define k_0 k256 + +/* rotated SHA256 round definition. Rather than swapping variables as in */ +/* FIPS-180, different variables are 'rotated' on each round, returning */ +/* to their starting positions every eight rounds */ + +#define q(n) v##n + +#define one_cycle(a,b,c,d,e,f,g,h,k,w) \ + q(h) += s_1(q(e)) + ch(q(e), q(f), q(g)) + k + w; \ + q(d) += q(h); q(h) += s_0(q(a)) + maj(q(a), q(b), q(c)) + +/* SHA256 mixing data */ + +const uint_32t k256[64] = +{ 0x428a2f98ul, 0x71374491ul, 0xb5c0fbcful, 0xe9b5dba5ul, + 0x3956c25bul, 0x59f111f1ul, 0x923f82a4ul, 0xab1c5ed5ul, + 0xd807aa98ul, 0x12835b01ul, 0x243185beul, 0x550c7dc3ul, + 0x72be5d74ul, 0x80deb1feul, 0x9bdc06a7ul, 0xc19bf174ul, + 0xe49b69c1ul, 0xefbe4786ul, 0x0fc19dc6ul, 0x240ca1ccul, + 0x2de92c6ful, 0x4a7484aaul, 0x5cb0a9dcul, 0x76f988daul, + 0x983e5152ul, 0xa831c66dul, 0xb00327c8ul, 0xbf597fc7ul, + 0xc6e00bf3ul, 0xd5a79147ul, 0x06ca6351ul, 0x14292967ul, + 0x27b70a85ul, 0x2e1b2138ul, 0x4d2c6dfcul, 0x53380d13ul, + 0x650a7354ul, 0x766a0abbul, 0x81c2c92eul, 0x92722c85ul, + 0xa2bfe8a1ul, 0xa81a664bul, 0xc24b8b70ul, 0xc76c51a3ul, + 0xd192e819ul, 0xd6990624ul, 0xf40e3585ul, 0x106aa070ul, + 0x19a4c116ul, 0x1e376c08ul, 0x2748774cul, 0x34b0bcb5ul, + 0x391c0cb3ul, 0x4ed8aa4aul, 0x5b9cca4ful, 0x682e6ff3ul, + 0x748f82eeul, 0x78a5636ful, 0x84c87814ul, 0x8cc70208ul, + 0x90befffaul, 0xa4506cebul, 0xbef9a3f7ul, 0xc67178f2ul, +}; + +/* Compile 64 bytes of hash data into SHA256 digest value */ +/* NOTE: this routine assumes that the byte order in the */ +/* ctx->wbuf[] at this point is such that low address bytes */ +/* in the ORIGINAL byte stream will go into the high end of */ +/* words on BOTH big and little endian systems */ + +VOID_RETURN sha256_compile(sha256_ctx ctx[1]) +{ +#if !defined(UNROLL_SHA2) + + uint_32t j, *p = ctx->wbuf, v[8]; + + memcpy(v, ctx->hash, 8 * sizeof(uint_32t)); + + for(j = 0; j < 64; j += 16) + { + v_cycle( 0, j); v_cycle( 1, j); + v_cycle( 2, j); v_cycle( 3, j); + v_cycle( 4, j); v_cycle( 5, j); + v_cycle( 6, j); v_cycle( 7, j); + v_cycle( 8, j); v_cycle( 9, j); + v_cycle(10, j); v_cycle(11, j); + v_cycle(12, j); v_cycle(13, j); + v_cycle(14, j); v_cycle(15, j); + } + + ctx->hash[0] += v[0]; ctx->hash[1] += v[1]; + ctx->hash[2] += v[2]; ctx->hash[3] += v[3]; + ctx->hash[4] += v[4]; ctx->hash[5] += v[5]; + ctx->hash[6] += v[6]; ctx->hash[7] += v[7]; + +#else + + uint_32t *p = ctx->wbuf,v0,v1,v2,v3,v4,v5,v6,v7; + + v0 = ctx->hash[0]; v1 = ctx->hash[1]; + v2 = ctx->hash[2]; v3 = ctx->hash[3]; + v4 = ctx->hash[4]; v5 = ctx->hash[5]; + v6 = ctx->hash[6]; v7 = ctx->hash[7]; + + one_cycle(0,1,2,3,4,5,6,7,k256[ 0],p[ 0]); + one_cycle(7,0,1,2,3,4,5,6,k256[ 1],p[ 1]); + one_cycle(6,7,0,1,2,3,4,5,k256[ 2],p[ 2]); + one_cycle(5,6,7,0,1,2,3,4,k256[ 3],p[ 3]); + one_cycle(4,5,6,7,0,1,2,3,k256[ 4],p[ 4]); + one_cycle(3,4,5,6,7,0,1,2,k256[ 5],p[ 5]); + one_cycle(2,3,4,5,6,7,0,1,k256[ 6],p[ 6]); + one_cycle(1,2,3,4,5,6,7,0,k256[ 7],p[ 7]); + one_cycle(0,1,2,3,4,5,6,7,k256[ 8],p[ 8]); + one_cycle(7,0,1,2,3,4,5,6,k256[ 9],p[ 9]); + one_cycle(6,7,0,1,2,3,4,5,k256[10],p[10]); + one_cycle(5,6,7,0,1,2,3,4,k256[11],p[11]); + one_cycle(4,5,6,7,0,1,2,3,k256[12],p[12]); + one_cycle(3,4,5,6,7,0,1,2,k256[13],p[13]); + one_cycle(2,3,4,5,6,7,0,1,k256[14],p[14]); + one_cycle(1,2,3,4,5,6,7,0,k256[15],p[15]); + + one_cycle(0,1,2,3,4,5,6,7,k256[16],hf( 0)); + one_cycle(7,0,1,2,3,4,5,6,k256[17],hf( 1)); + one_cycle(6,7,0,1,2,3,4,5,k256[18],hf( 2)); + one_cycle(5,6,7,0,1,2,3,4,k256[19],hf( 3)); + one_cycle(4,5,6,7,0,1,2,3,k256[20],hf( 4)); + one_cycle(3,4,5,6,7,0,1,2,k256[21],hf( 5)); + one_cycle(2,3,4,5,6,7,0,1,k256[22],hf( 6)); + one_cycle(1,2,3,4,5,6,7,0,k256[23],hf( 7)); + one_cycle(0,1,2,3,4,5,6,7,k256[24],hf( 8)); + one_cycle(7,0,1,2,3,4,5,6,k256[25],hf( 9)); + one_cycle(6,7,0,1,2,3,4,5,k256[26],hf(10)); + one_cycle(5,6,7,0,1,2,3,4,k256[27],hf(11)); + one_cycle(4,5,6,7,0,1,2,3,k256[28],hf(12)); + one_cycle(3,4,5,6,7,0,1,2,k256[29],hf(13)); + one_cycle(2,3,4,5,6,7,0,1,k256[30],hf(14)); + one_cycle(1,2,3,4,5,6,7,0,k256[31],hf(15)); + + one_cycle(0,1,2,3,4,5,6,7,k256[32],hf( 0)); + one_cycle(7,0,1,2,3,4,5,6,k256[33],hf( 1)); + one_cycle(6,7,0,1,2,3,4,5,k256[34],hf( 2)); + one_cycle(5,6,7,0,1,2,3,4,k256[35],hf( 3)); + one_cycle(4,5,6,7,0,1,2,3,k256[36],hf( 4)); + one_cycle(3,4,5,6,7,0,1,2,k256[37],hf( 5)); + one_cycle(2,3,4,5,6,7,0,1,k256[38],hf( 6)); + one_cycle(1,2,3,4,5,6,7,0,k256[39],hf( 7)); + one_cycle(0,1,2,3,4,5,6,7,k256[40],hf( 8)); + one_cycle(7,0,1,2,3,4,5,6,k256[41],hf( 9)); + one_cycle(6,7,0,1,2,3,4,5,k256[42],hf(10)); + one_cycle(5,6,7,0,1,2,3,4,k256[43],hf(11)); + one_cycle(4,5,6,7,0,1,2,3,k256[44],hf(12)); + one_cycle(3,4,5,6,7,0,1,2,k256[45],hf(13)); + one_cycle(2,3,4,5,6,7,0,1,k256[46],hf(14)); + one_cycle(1,2,3,4,5,6,7,0,k256[47],hf(15)); + + one_cycle(0,1,2,3,4,5,6,7,k256[48],hf( 0)); + one_cycle(7,0,1,2,3,4,5,6,k256[49],hf( 1)); + one_cycle(6,7,0,1,2,3,4,5,k256[50],hf( 2)); + one_cycle(5,6,7,0,1,2,3,4,k256[51],hf( 3)); + one_cycle(4,5,6,7,0,1,2,3,k256[52],hf( 4)); + one_cycle(3,4,5,6,7,0,1,2,k256[53],hf( 5)); + one_cycle(2,3,4,5,6,7,0,1,k256[54],hf( 6)); + one_cycle(1,2,3,4,5,6,7,0,k256[55],hf( 7)); + one_cycle(0,1,2,3,4,5,6,7,k256[56],hf( 8)); + one_cycle(7,0,1,2,3,4,5,6,k256[57],hf( 9)); + one_cycle(6,7,0,1,2,3,4,5,k256[58],hf(10)); + one_cycle(5,6,7,0,1,2,3,4,k256[59],hf(11)); + one_cycle(4,5,6,7,0,1,2,3,k256[60],hf(12)); + one_cycle(3,4,5,6,7,0,1,2,k256[61],hf(13)); + one_cycle(2,3,4,5,6,7,0,1,k256[62],hf(14)); + one_cycle(1,2,3,4,5,6,7,0,k256[63],hf(15)); + + ctx->hash[0] += v0; ctx->hash[1] += v1; + ctx->hash[2] += v2; ctx->hash[3] += v3; + ctx->hash[4] += v4; ctx->hash[5] += v5; + ctx->hash[6] += v6; ctx->hash[7] += v7; +#endif +} + +/* SHA256 hash data in an array of bytes into hash buffer */ +/* and call the hash_compile function as required. */ + +VOID_RETURN sha256_hash(const unsigned char data[], unsigned long len, sha256_ctx ctx[1]) +{ uint_32t pos = (uint_32t)(ctx->count[0] & SHA256_MASK), + space = SHA256_BLOCK_SIZE - pos; + const unsigned char *sp = data; + + if((ctx->count[0] += len) < len) + ++(ctx->count[1]); + + while(len >= space) /* tranfer whole blocks while possible */ + { + memcpy(((unsigned char*)ctx->wbuf) + pos, sp, space); + sp += space; len -= space; space = SHA256_BLOCK_SIZE; pos = 0; + bsw_32(ctx->wbuf, SHA256_BLOCK_SIZE >> 2) + sha256_compile(ctx); + } + + memcpy(((unsigned char*)ctx->wbuf) + pos, sp, len); +} + +/* SHA256 Final padding and digest calculation */ + +static void sha_end1(unsigned char hval[], sha256_ctx ctx[1], const unsigned int hlen) +{ uint_32t i = (uint_32t)(ctx->count[0] & SHA256_MASK); + + /* put bytes in the buffer in an order in which references to */ + /* 32-bit words will put bytes with lower addresses into the */ + /* top of 32 bit words on BOTH big and little endian machines */ + bsw_32(ctx->wbuf, (i + 3) >> 2) + + /* we now need to mask valid bytes and add the padding which is */ + /* a single 1 bit and as many zero bits as necessary. Note that */ + /* we can always add the first padding byte here because the */ + /* buffer always has at least one empty slot */ + ctx->wbuf[i >> 2] &= 0xffffff80 << 8 * (~i & 3); + ctx->wbuf[i >> 2] |= 0x00000080 << 8 * (~i & 3); + + /* we need 9 or more empty positions, one for the padding byte */ + /* (above) and eight for the length count. If there is not */ + /* enough space pad and empty the buffer */ + if(i > SHA256_BLOCK_SIZE - 9) + { + if(i < 60) ctx->wbuf[15] = 0; + sha256_compile(ctx); + i = 0; + } + else /* compute a word index for the empty buffer positions */ + i = (i >> 2) + 1; + + while(i < 14) /* and zero pad all but last two positions */ + ctx->wbuf[i++] = 0; + + /* the following 32-bit length fields are assembled in the */ + /* wrong byte order on little endian machines but this is */ + /* corrected later since they are only ever used as 32-bit */ + /* word values. */ + ctx->wbuf[14] = (ctx->count[1] << 3) | (ctx->count[0] >> 29); + ctx->wbuf[15] = ctx->count[0] << 3; + sha256_compile(ctx); + + /* extract the hash value as bytes in case the hash buffer is */ + /* mislaigned for 32-bit words */ + for(i = 0; i < hlen; ++i) + hval[i] = (unsigned char)(ctx->hash[i >> 2] >> (8 * (~i & 3))); +} + +#endif + +#if defined(SHA_224) + +const uint_32t i224[8] = +{ + 0xc1059ed8ul, 0x367cd507ul, 0x3070dd17ul, 0xf70e5939ul, + 0xffc00b31ul, 0x68581511ul, 0x64f98fa7ul, 0xbefa4fa4ul +}; + +VOID_RETURN sha224_begin(sha224_ctx ctx[1]) +{ + ctx->count[0] = ctx->count[1] = 0; + memcpy(ctx->hash, i224, 8 * sizeof(uint_32t)); +} + +VOID_RETURN sha224_end(unsigned char hval[], sha224_ctx ctx[1]) +{ + sha_end1(hval, ctx, SHA224_DIGEST_SIZE); +} + +VOID_RETURN sha224(unsigned char hval[], const unsigned char data[], unsigned long len) +{ sha224_ctx cx[1]; + + sha224_begin(cx); + sha224_hash(data, len, cx); + sha_end1(hval, cx, SHA224_DIGEST_SIZE); +} + +#endif + +#if defined(SHA_256) + +const uint_32t i256[8] = +{ + 0x6a09e667ul, 0xbb67ae85ul, 0x3c6ef372ul, 0xa54ff53aul, + 0x510e527ful, 0x9b05688cul, 0x1f83d9abul, 0x5be0cd19ul +}; + +VOID_RETURN sha256_begin(sha256_ctx ctx[1]) +{ + ctx->count[0] = ctx->count[1] = 0; + memcpy(ctx->hash, i256, 8 * sizeof(uint_32t)); +} + +VOID_RETURN sha256_end(unsigned char hval[], sha256_ctx ctx[1]) +{ + sha_end1(hval, ctx, SHA256_DIGEST_SIZE); +} + +VOID_RETURN sha256(unsigned char hval[], const unsigned char data[], unsigned long len) +{ sha256_ctx cx[1]; + + sha256_begin(cx); + sha256_hash(data, len, cx); + sha_end1(hval, cx, SHA256_DIGEST_SIZE); +} + +#endif + +#if defined(SHA_384) || defined(SHA_512) + +#define SHA512_MASK (SHA512_BLOCK_SIZE - 1) + +#if defined(SWAP_BYTES) +#define bsw_64(p,n) \ + { int _i = (n); while(_i--) ((uint_64t*)p)[_i] = bswap_64(((uint_64t*)p)[_i]); } +#else +#define bsw_64(p,n) +#endif + +/* SHA512 mixing function definitions */ + +#ifdef s_0 +# undef s_0 +# undef s_1 +# undef g_0 +# undef g_1 +# undef k_0 +#endif + +#define s_0(x) (rotr64((x), 28) ^ rotr64((x), 34) ^ rotr64((x), 39)) +#define s_1(x) (rotr64((x), 14) ^ rotr64((x), 18) ^ rotr64((x), 41)) +#define g_0(x) (rotr64((x), 1) ^ rotr64((x), 8) ^ ((x) >> 7)) +#define g_1(x) (rotr64((x), 19) ^ rotr64((x), 61) ^ ((x) >> 6)) +#define k_0 k512 + +/* SHA384/SHA512 mixing data */ + +const uint_64t k512[80] = +{ + li_64(428a2f98d728ae22), li_64(7137449123ef65cd), + li_64(b5c0fbcfec4d3b2f), li_64(e9b5dba58189dbbc), + li_64(3956c25bf348b538), li_64(59f111f1b605d019), + li_64(923f82a4af194f9b), li_64(ab1c5ed5da6d8118), + li_64(d807aa98a3030242), li_64(12835b0145706fbe), + li_64(243185be4ee4b28c), li_64(550c7dc3d5ffb4e2), + li_64(72be5d74f27b896f), li_64(80deb1fe3b1696b1), + li_64(9bdc06a725c71235), li_64(c19bf174cf692694), + li_64(e49b69c19ef14ad2), li_64(efbe4786384f25e3), + li_64(0fc19dc68b8cd5b5), li_64(240ca1cc77ac9c65), + li_64(2de92c6f592b0275), li_64(4a7484aa6ea6e483), + li_64(5cb0a9dcbd41fbd4), li_64(76f988da831153b5), + li_64(983e5152ee66dfab), li_64(a831c66d2db43210), + li_64(b00327c898fb213f), li_64(bf597fc7beef0ee4), + li_64(c6e00bf33da88fc2), li_64(d5a79147930aa725), + li_64(06ca6351e003826f), li_64(142929670a0e6e70), + li_64(27b70a8546d22ffc), li_64(2e1b21385c26c926), + li_64(4d2c6dfc5ac42aed), li_64(53380d139d95b3df), + li_64(650a73548baf63de), li_64(766a0abb3c77b2a8), + li_64(81c2c92e47edaee6), li_64(92722c851482353b), + li_64(a2bfe8a14cf10364), li_64(a81a664bbc423001), + li_64(c24b8b70d0f89791), li_64(c76c51a30654be30), + li_64(d192e819d6ef5218), li_64(d69906245565a910), + li_64(f40e35855771202a), li_64(106aa07032bbd1b8), + li_64(19a4c116b8d2d0c8), li_64(1e376c085141ab53), + li_64(2748774cdf8eeb99), li_64(34b0bcb5e19b48a8), + li_64(391c0cb3c5c95a63), li_64(4ed8aa4ae3418acb), + li_64(5b9cca4f7763e373), li_64(682e6ff3d6b2b8a3), + li_64(748f82ee5defb2fc), li_64(78a5636f43172f60), + li_64(84c87814a1f0ab72), li_64(8cc702081a6439ec), + li_64(90befffa23631e28), li_64(a4506cebde82bde9), + li_64(bef9a3f7b2c67915), li_64(c67178f2e372532b), + li_64(ca273eceea26619c), li_64(d186b8c721c0c207), + li_64(eada7dd6cde0eb1e), li_64(f57d4f7fee6ed178), + li_64(06f067aa72176fba), li_64(0a637dc5a2c898a6), + li_64(113f9804bef90dae), li_64(1b710b35131c471b), + li_64(28db77f523047d84), li_64(32caab7b40c72493), + li_64(3c9ebe0a15c9bebc), li_64(431d67c49c100d4c), + li_64(4cc5d4becb3e42b6), li_64(597f299cfc657e2a), + li_64(5fcb6fab3ad6faec), li_64(6c44198c4a475817) +}; + +/* Compile 128 bytes of hash data into SHA384/512 digest */ +/* NOTE: this routine assumes that the byte order in the */ +/* ctx->wbuf[] at this point is such that low address bytes */ +/* in the ORIGINAL byte stream will go into the high end of */ +/* words on BOTH big and little endian systems */ + +VOID_RETURN sha512_compile(sha512_ctx ctx[1]) +{ uint_64t v[8], *p = ctx->wbuf; + uint_32t j; + + memcpy(v, ctx->hash, 8 * sizeof(uint_64t)); + + for(j = 0; j < 80; j += 16) + { + v_cycle( 0, j); v_cycle( 1, j); + v_cycle( 2, j); v_cycle( 3, j); + v_cycle( 4, j); v_cycle( 5, j); + v_cycle( 6, j); v_cycle( 7, j); + v_cycle( 8, j); v_cycle( 9, j); + v_cycle(10, j); v_cycle(11, j); + v_cycle(12, j); v_cycle(13, j); + v_cycle(14, j); v_cycle(15, j); + } + + ctx->hash[0] += v[0]; ctx->hash[1] += v[1]; + ctx->hash[2] += v[2]; ctx->hash[3] += v[3]; + ctx->hash[4] += v[4]; ctx->hash[5] += v[5]; + ctx->hash[6] += v[6]; ctx->hash[7] += v[7]; +} + +/* Compile 128 bytes of hash data into SHA256 digest value */ +/* NOTE: this routine assumes that the byte order in the */ +/* ctx->wbuf[] at this point is in such an order that low */ +/* address bytes in the ORIGINAL byte stream placed in this */ +/* buffer will now go to the high end of words on BOTH big */ +/* and little endian systems */ + +VOID_RETURN sha512_hash(const unsigned char data[], unsigned long len, sha512_ctx ctx[1]) +{ uint_32t pos = (uint_32t)(ctx->count[0] & SHA512_MASK), + space = SHA512_BLOCK_SIZE - pos; + const unsigned char *sp = data; + + if((ctx->count[0] += len) < len) + ++(ctx->count[1]); + + while(len >= space) /* tranfer whole blocks while possible */ + { + memcpy(((unsigned char*)ctx->wbuf) + pos, sp, space); + sp += space; len -= space; space = SHA512_BLOCK_SIZE; pos = 0; + bsw_64(ctx->wbuf, SHA512_BLOCK_SIZE >> 3); + sha512_compile(ctx); + } + + memcpy(((unsigned char*)ctx->wbuf) + pos, sp, len); +} + +/* SHA384/512 Final padding and digest calculation */ + +static void sha_end2(unsigned char hval[], sha512_ctx ctx[1], const unsigned int hlen) +{ uint_32t i = (uint_32t)(ctx->count[0] & SHA512_MASK); + + /* put bytes in the buffer in an order in which references to */ + /* 32-bit words will put bytes with lower addresses into the */ + /* top of 32 bit words on BOTH big and little endian machines */ + bsw_64(ctx->wbuf, (i + 7) >> 3); + + /* we now need to mask valid bytes and add the padding which is */ + /* a single 1 bit and as many zero bits as necessary. Note that */ + /* we can always add the first padding byte here because the */ + /* buffer always has at least one empty slot */ + ctx->wbuf[i >> 3] &= li_64(ffffffffffffff00) << 8 * (~i & 7); + ctx->wbuf[i >> 3] |= li_64(0000000000000080) << 8 * (~i & 7); + + /* we need 17 or more empty byte positions, one for the padding */ + /* byte (above) and sixteen for the length count. If there is */ + /* not enough space pad and empty the buffer */ + if(i > SHA512_BLOCK_SIZE - 17) + { + if(i < 120) ctx->wbuf[15] = 0; + sha512_compile(ctx); + i = 0; + } + else + i = (i >> 3) + 1; + + while(i < 14) + ctx->wbuf[i++] = 0; + + /* the following 64-bit length fields are assembled in the */ + /* wrong byte order on little endian machines but this is */ + /* corrected later since they are only ever used as 64-bit */ + /* word values. */ + ctx->wbuf[14] = (ctx->count[1] << 3) | (ctx->count[0] >> 61); + ctx->wbuf[15] = ctx->count[0] << 3; + sha512_compile(ctx); + + /* extract the hash value as bytes in case the hash buffer is */ + /* misaligned for 32-bit words */ + for(i = 0; i < hlen; ++i) + hval[i] = (unsigned char)(ctx->hash[i >> 3] >> (8 * (~i & 7))); +} + +#endif + +#if defined(SHA_384) + +/* SHA384 initialisation data */ + +const uint_64t i384[80] = +{ + li_64(cbbb9d5dc1059ed8), li_64(629a292a367cd507), + li_64(9159015a3070dd17), li_64(152fecd8f70e5939), + li_64(67332667ffc00b31), li_64(8eb44a8768581511), + li_64(db0c2e0d64f98fa7), li_64(47b5481dbefa4fa4) +}; + +VOID_RETURN sha384_begin(sha384_ctx ctx[1]) +{ + ctx->count[0] = ctx->count[1] = 0; + memcpy(ctx->hash, i384, 8 * sizeof(uint_64t)); +} + +VOID_RETURN sha384_end(unsigned char hval[], sha384_ctx ctx[1]) +{ + sha_end2(hval, ctx, SHA384_DIGEST_SIZE); +} + +VOID_RETURN sha384(unsigned char hval[], const unsigned char data[], unsigned long len) +{ sha384_ctx cx[1]; + + sha384_begin(cx); + sha384_hash(data, len, cx); + sha_end2(hval, cx, SHA384_DIGEST_SIZE); +} + +#endif + +#if defined(SHA_512) + +/* SHA512 initialisation data */ + +const uint_64t i512[80] = +{ + li_64(6a09e667f3bcc908), li_64(bb67ae8584caa73b), + li_64(3c6ef372fe94f82b), li_64(a54ff53a5f1d36f1), + li_64(510e527fade682d1), li_64(9b05688c2b3e6c1f), + li_64(1f83d9abfb41bd6b), li_64(5be0cd19137e2179) +}; + +VOID_RETURN sha512_begin(sha512_ctx ctx[1]) +{ + ctx->count[0] = ctx->count[1] = 0; + memcpy(ctx->hash, i512, 8 * sizeof(uint_64t)); +} + +VOID_RETURN sha512_end(unsigned char hval[], sha512_ctx ctx[1]) +{ + sha_end2(hval, ctx, SHA512_DIGEST_SIZE); +} + +VOID_RETURN sha512(unsigned char hval[], const unsigned char data[], unsigned long len) +{ sha512_ctx cx[1]; + + sha512_begin(cx); + sha512_hash(data, len, cx); + sha_end2(hval, cx, SHA512_DIGEST_SIZE); +} + +#endif + +#if defined(SHA_2) + +#define CTX_224(x) ((x)->uu->ctx256) +#define CTX_256(x) ((x)->uu->ctx256) +#define CTX_384(x) ((x)->uu->ctx512) +#define CTX_512(x) ((x)->uu->ctx512) + +/* SHA2 initialisation */ + +INT_RETURN sha2_begin(unsigned long len, sha2_ctx ctx[1]) +{ + switch(len) + { +#if defined(SHA_224) + case 224: + case 28: CTX_256(ctx)->count[0] = CTX_256(ctx)->count[1] = 0; + memcpy(CTX_256(ctx)->hash, i224, 32); + ctx->sha2_len = 28; return EXIT_SUCCESS; +#endif +#if defined(SHA_256) + case 256: + case 32: CTX_256(ctx)->count[0] = CTX_256(ctx)->count[1] = 0; + memcpy(CTX_256(ctx)->hash, i256, 32); + ctx->sha2_len = 32; return EXIT_SUCCESS; +#endif +#if defined(SHA_384) + case 384: + case 48: CTX_384(ctx)->count[0] = CTX_384(ctx)->count[1] = 0; + memcpy(CTX_384(ctx)->hash, i384, 64); + ctx->sha2_len = 48; return EXIT_SUCCESS; +#endif +#if defined(SHA_512) + case 512: + case 64: CTX_512(ctx)->count[0] = CTX_512(ctx)->count[1] = 0; + memcpy(CTX_512(ctx)->hash, i512, 64); + ctx->sha2_len = 64; return EXIT_SUCCESS; +#endif + default: return EXIT_FAILURE; + } +} + +VOID_RETURN sha2_hash(const unsigned char data[], unsigned long len, sha2_ctx ctx[1]) +{ + switch(ctx->sha2_len) + { +#if defined(SHA_224) + case 28: sha224_hash(data, len, CTX_224(ctx)); return; +#endif +#if defined(SHA_256) + case 32: sha256_hash(data, len, CTX_256(ctx)); return; +#endif +#if defined(SHA_384) + case 48: sha384_hash(data, len, CTX_384(ctx)); return; +#endif +#if defined(SHA_512) + case 64: sha512_hash(data, len, CTX_512(ctx)); return; +#endif + } +} + +VOID_RETURN sha2_end(unsigned char hval[], sha2_ctx ctx[1]) +{ + switch(ctx->sha2_len) + { +#if defined(SHA_224) + case 28: sha_end1(hval, CTX_224(ctx), SHA224_DIGEST_SIZE); return; +#endif +#if defined(SHA_256) + case 32: sha_end1(hval, CTX_256(ctx), SHA256_DIGEST_SIZE); return; +#endif +#if defined(SHA_384) + case 48: sha_end2(hval, CTX_384(ctx), SHA384_DIGEST_SIZE); return; +#endif +#if defined(SHA_512) + case 64: sha_end2(hval, CTX_512(ctx), SHA512_DIGEST_SIZE); return; +#endif + } +} + +INT_RETURN sha2(unsigned char hval[], unsigned long size, + const unsigned char data[], unsigned long len) +{ sha2_ctx cx[1]; + + if(sha2_begin(size, cx) == EXIT_SUCCESS) + { + sha2_hash(data, len, cx); sha2_end(hval, cx); return EXIT_SUCCESS; + } + else + return EXIT_FAILURE; +} + +#endif + +#if defined(__cplusplus) +} +#endif diff --git a/src/Crypto/Sha2.h b/src/Crypto/Sha2.h index 64379d17..6d0aeb0f 100644 --- a/src/Crypto/Sha2.h +++ b/src/Crypto/Sha2.h @@ -1,155 +1,155 @@ -/* - --------------------------------------------------------------------------- - Copyright (c) 2002, Dr Brian Gladman, Worcester, UK. All rights reserved. - - LICENSE TERMS - - The free distribution and use of this software is allowed (with or without - changes) provided that: - - 1. source code distributions include the above copyright notice, this - list of conditions and the following disclaimer; - - 2. binary distributions include the above copyright notice, this list - of conditions and the following disclaimer in their documentation; - - 3. the name of the copyright holder is not used to endorse products - built using this software without specific written permission. - - DISCLAIMER - - This software is provided 'as is' with no explicit or implied warranties - in respect of its properties, including, but not limited to, correctness - and/or fitness for purpose. - --------------------------------------------------------------------------- - Issue Date: 01/08/2005 -*/ - -#ifndef _SHA2_H -#define _SHA2_H - -#include "Common/Tcdefs.h" -#include "Common/Endian.h" - -#define SHA_64BIT - -/* define the hash functions that you need */ -#define SHA_2 /* for dynamic hash length */ -#define SHA_224 -#define SHA_256 -#ifdef SHA_64BIT -# define SHA_384 -# define SHA_512 -# define NEED_UINT_64T -#endif - -#ifndef EXIT_SUCCESS -#define EXIT_SUCCESS 0 -#define EXIT_FAILURE 1 -#endif - -#define li_64(h) 0x##h##ull - -#define VOID_RETURN void -#define INT_RETURN int - -#if defined(__cplusplus) -extern "C" -{ -#endif - -/* Note that the following function prototypes are the same */ -/* for both the bit and byte oriented implementations. But */ -/* the length fields are in bytes or bits as is appropriate */ -/* for the version used. Bit sequences are arrays of bytes */ -/* in which bit sequence indexes increase from the most to */ -/* the least significant end of each byte */ - -#define SHA224_DIGEST_SIZE 28 -#define SHA224_BLOCK_SIZE 64 -#define SHA256_DIGEST_SIZE 32 -#define SHA256_BLOCK_SIZE 64 - -/* type to hold the SHA256 (and SHA224) context */ - -typedef struct -{ uint_32t count[2]; - uint_32t hash[8]; - uint_32t wbuf[16]; -} sha256_ctx; - -typedef sha256_ctx sha224_ctx; - -VOID_RETURN sha256_compile(sha256_ctx ctx[1]); - -VOID_RETURN sha224_begin(sha224_ctx ctx[1]); -#define sha224_hash sha256_hash -VOID_RETURN sha224_end(unsigned char hval[], sha224_ctx ctx[1]); -VOID_RETURN sha224(unsigned char hval[], const unsigned char data[], unsigned long len); - -VOID_RETURN sha256_begin(sha256_ctx ctx[1]); -VOID_RETURN sha256_hash(const unsigned char data[], unsigned long len, sha256_ctx ctx[1]); -VOID_RETURN sha256_end(unsigned char hval[], sha256_ctx ctx[1]); -VOID_RETURN sha256(unsigned char hval[], const unsigned char data[], unsigned long len); - -#ifndef SHA_64BIT - -typedef struct -{ union - { sha256_ctx ctx256[1]; - } uu[1]; - uint_32t sha2_len; -} sha2_ctx; - -#define SHA2_MAX_DIGEST_SIZE SHA256_DIGEST_SIZE - -#else - -#define SHA384_DIGEST_SIZE 48 -#define SHA384_BLOCK_SIZE 128 -#define SHA512_DIGEST_SIZE 64 -#define SHA512_BLOCK_SIZE 128 -#define SHA2_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE - -/* type to hold the SHA384 (and SHA512) context */ - -typedef struct -{ uint_64t count[2]; - uint_64t hash[8]; - uint_64t wbuf[16]; -} sha512_ctx; - -typedef sha512_ctx sha384_ctx; - -typedef struct -{ union - { sha256_ctx ctx256[1]; - sha512_ctx ctx512[1]; - } uu[1]; - uint_32t sha2_len; -} sha2_ctx; - -VOID_RETURN sha512_compile(sha512_ctx ctx[1]); - -VOID_RETURN sha384_begin(sha384_ctx ctx[1]); -#define sha384_hash sha512_hash -VOID_RETURN sha384_end(unsigned char hval[], sha384_ctx ctx[1]); -VOID_RETURN sha384(unsigned char hval[], const unsigned char data[], unsigned long len); - -VOID_RETURN sha512_begin(sha512_ctx ctx[1]); -VOID_RETURN sha512_hash(const unsigned char data[], unsigned long len, sha512_ctx ctx[1]); -VOID_RETURN sha512_end(unsigned char hval[], sha512_ctx ctx[1]); -VOID_RETURN sha512(unsigned char hval[], const unsigned char data[], unsigned long len); - -INT_RETURN sha2_begin(unsigned long size, sha2_ctx ctx[1]); -VOID_RETURN sha2_hash(const unsigned char data[], unsigned long len, sha2_ctx ctx[1]); -VOID_RETURN sha2_end(unsigned char hval[], sha2_ctx ctx[1]); -INT_RETURN sha2(unsigned char hval[], unsigned long size, const unsigned char data[], unsigned long len); - -#endif - -#if defined(__cplusplus) -} -#endif - -#endif +/* + --------------------------------------------------------------------------- + Copyright (c) 2002, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 01/08/2005 +*/ + +#ifndef _SHA2_H +#define _SHA2_H + +#include "Common/Tcdefs.h" +#include "Common/Endian.h" + +#define SHA_64BIT + +/* define the hash functions that you need */ +#define SHA_2 /* for dynamic hash length */ +#define SHA_224 +#define SHA_256 +#ifdef SHA_64BIT +# define SHA_384 +# define SHA_512 +# define NEED_UINT_64T +#endif + +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#define EXIT_FAILURE 1 +#endif + +#define li_64(h) 0x##h##ull + +#define VOID_RETURN void +#define INT_RETURN int + +#if defined(__cplusplus) +extern "C" +{ +#endif + +/* Note that the following function prototypes are the same */ +/* for both the bit and byte oriented implementations. But */ +/* the length fields are in bytes or bits as is appropriate */ +/* for the version used. Bit sequences are arrays of bytes */ +/* in which bit sequence indexes increase from the most to */ +/* the least significant end of each byte */ + +#define SHA224_DIGEST_SIZE 28 +#define SHA224_BLOCK_SIZE 64 +#define SHA256_DIGEST_SIZE 32 +#define SHA256_BLOCK_SIZE 64 + +/* type to hold the SHA256 (and SHA224) context */ + +typedef struct +{ uint_32t count[2]; + uint_32t hash[8]; + uint_32t wbuf[16]; +} sha256_ctx; + +typedef sha256_ctx sha224_ctx; + +VOID_RETURN sha256_compile(sha256_ctx ctx[1]); + +VOID_RETURN sha224_begin(sha224_ctx ctx[1]); +#define sha224_hash sha256_hash +VOID_RETURN sha224_end(unsigned char hval[], sha224_ctx ctx[1]); +VOID_RETURN sha224(unsigned char hval[], const unsigned char data[], unsigned long len); + +VOID_RETURN sha256_begin(sha256_ctx ctx[1]); +VOID_RETURN sha256_hash(const unsigned char data[], unsigned long len, sha256_ctx ctx[1]); +VOID_RETURN sha256_end(unsigned char hval[], sha256_ctx ctx[1]); +VOID_RETURN sha256(unsigned char hval[], const unsigned char data[], unsigned long len); + +#ifndef SHA_64BIT + +typedef struct +{ union + { sha256_ctx ctx256[1]; + } uu[1]; + uint_32t sha2_len; +} sha2_ctx; + +#define SHA2_MAX_DIGEST_SIZE SHA256_DIGEST_SIZE + +#else + +#define SHA384_DIGEST_SIZE 48 +#define SHA384_BLOCK_SIZE 128 +#define SHA512_DIGEST_SIZE 64 +#define SHA512_BLOCK_SIZE 128 +#define SHA2_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE + +/* type to hold the SHA384 (and SHA512) context */ + +typedef struct +{ uint_64t count[2]; + uint_64t hash[8]; + uint_64t wbuf[16]; +} sha512_ctx; + +typedef sha512_ctx sha384_ctx; + +typedef struct +{ union + { sha256_ctx ctx256[1]; + sha512_ctx ctx512[1]; + } uu[1]; + uint_32t sha2_len; +} sha2_ctx; + +VOID_RETURN sha512_compile(sha512_ctx ctx[1]); + +VOID_RETURN sha384_begin(sha384_ctx ctx[1]); +#define sha384_hash sha512_hash +VOID_RETURN sha384_end(unsigned char hval[], sha384_ctx ctx[1]); +VOID_RETURN sha384(unsigned char hval[], const unsigned char data[], unsigned long len); + +VOID_RETURN sha512_begin(sha512_ctx ctx[1]); +VOID_RETURN sha512_hash(const unsigned char data[], unsigned long len, sha512_ctx ctx[1]); +VOID_RETURN sha512_end(unsigned char hval[], sha512_ctx ctx[1]); +VOID_RETURN sha512(unsigned char hval[], const unsigned char data[], unsigned long len); + +INT_RETURN sha2_begin(unsigned long size, sha2_ctx ctx[1]); +VOID_RETURN sha2_hash(const unsigned char data[], unsigned long len, sha2_ctx ctx[1]); +VOID_RETURN sha2_end(unsigned char hval[], sha2_ctx ctx[1]); +INT_RETURN sha2(unsigned char hval[], unsigned long size, const unsigned char data[], unsigned long len); + +#endif + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/src/Crypto/Sha2Small.c b/src/Crypto/Sha2Small.c index 9acd1b83..539ff05d 100644 --- a/src/Crypto/Sha2Small.c +++ b/src/Crypto/Sha2Small.c @@ -10,237 +10,237 @@ * */ -/* Adapted for VeraCrypt */ - -#include -#include "Common/Tcdefs.h" -#include "Common/Endian.h" -#include "Sha2Small.h" - -#pragma optimize ("tl", on) - -typedef unsigned __int32 uint32; -typedef unsigned __int8 byte; - -#include -#pragma intrinsic(_lrotr) -#define RORc(x,n) _lrotr(x,n) - -/******************************************************************************/ - -/* - The K array - */ - -static const uint32 K[64] = { - 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, 0x3956c25bUL, - 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, 0xd807aa98UL, 0x12835b01UL, - 0x243185beUL, 0x550c7dc3UL, 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, - 0xc19bf174UL, 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, - 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, 0x983e5152UL, - 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, 0xc6e00bf3UL, 0xd5a79147UL, - 0x06ca6351UL, 0x14292967UL, 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, - 0x53380d13UL, 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL, - 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, 0xd192e819UL, - 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, 0x19a4c116UL, 0x1e376c08UL, - 0x2748774cUL, 0x34b0bcb5UL, 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, - 0x682e6ff3UL, 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, - 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL -}; - -/* - Various logical functions - */ -#define Ch(x,y,z) (z ^ (x & (y ^ z))) -#define Maj(x,y,z) (((x | y) & z) | (x & y)) -#define S(x, n) RORc((x),(n)) -#define R(x, n) ((x)>>(n)) -#define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22)) -#define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25)) -#define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3)) -#define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10)) - -#define STORE32H(x, y, i) { \ -(y)[i] = (unsigned char)(((x)>>24)); \ -(y)[i+1] = (unsigned char)(((x)>>16)); \ -(y)[i+2] = (unsigned char)(((x)>>8)); \ -(y)[i+3] = (unsigned char)((x)); \ -} - -#define LOAD32H(x, y, i) { \ -x = ((unsigned long)((y)[i])<<24) | \ -((unsigned long)((y)[i+1])<<16) | \ -((unsigned long)((y)[i+2])<<8) | \ -((unsigned long)((y)[i+3])); \ -} - -/* - compress 512-bits - */ -static void sha256_compress(sha256_ctx * ctx, unsigned char *buf) -{ - - uint32 S[8], W[64], t0, t1; - uint32 t, w2, w15; - int i; - -/* - copy state into S - */ - for (i = 0; i < 8; i++) { - S[i] = ctx->state[i]; - } - -/* - copy the state into 512-bits into W[0..15] - */ - for (i = 0; i < 16; i++) { - LOAD32H(W[i], buf , (4*i)); - } - -/* - fill W[16..63] - */ - for (i = 16; i < 64; i++) { - w2 = W[i - 2]; - w15 = W[i - 15]; - W[i] = Gamma1(w2) + W[i - 7] + Gamma0(w15) + W[i - 16]; - } - -/* - Compress - */ - -#define RND(a,b,c,d,e,f,g,h,i) \ - t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \ - t1 = Sigma0(a) + Maj(a, b, c); \ - d += t0; \ - h = t0 + t1; - - for (i = 0; i < 64; ++i) { - RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i); - t = S[7]; S[7] = S[6]; S[6] = S[5]; S[5] = S[4]; - S[4] = S[3]; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t; - } - -/* - feedback - */ - for (i = 0; i < 8; i++) { - ctx->state[i] += S[i]; - } - -} - -/* - init the sha256 state - */ -VOID_RETURN sha256_begin(sha256_ctx* ctx) -{ - ctx->curlen = 0; - ctx->state[0] = 0x6A09E667UL; - ctx->state[1] = 0xBB67AE85UL; - ctx->state[2] = 0x3C6EF372UL; - ctx->state[3] = 0xA54FF53AUL; - ctx->state[4] = 0x510E527FUL; - ctx->state[5] = 0x9B05688CUL; - ctx->state[6] = 0x1F83D9ABUL; - ctx->state[7] = 0x5BE0CD19UL; - ctx->highLength = 0; - ctx->lowLength = 0; -} - -VOID_RETURN sha256_hash(unsigned char* data, unsigned int len, sha256_ctx* ctx) -{ - uint32 n; - while (len > 0) { - if (ctx->curlen == 0 && len >= 64) { - sha256_compress(ctx, (unsigned char *)data); - - n = ctx->lowLength + 512; - if (n < ctx->lowLength) { - ctx->highLength++; - } - ctx->lowLength = n; - data += 64; - len -= 64; - } else { - n = min(len, 64 - ctx->curlen); - memcpy(ctx->buf + ctx->curlen, data, (size_t)n); - ctx->curlen += (unsigned int) n; - data += (unsigned int) n; - len -= (unsigned int) n; - - if (ctx->curlen == 64) { - sha256_compress (ctx, ctx->buf); - - n = ctx->lowLength + 512; - if (n < ctx->lowLength) { - ctx->highLength++; - } - ctx->lowLength = n; - ctx->curlen = 0; - } - } - } - return; -} - -VOID_RETURN sha256_end(unsigned char* hval, sha256_ctx* ctx) -{ - int i; - uint32 n; - -/* - increase the length of the message - */ - - n = ctx->lowLength + (ctx->curlen << 3); - if (n < ctx->lowLength) { - ctx->highLength++; - } - ctx->highLength += (ctx->curlen >> 29); - ctx->lowLength = n; - -/* - append the '1' bit - */ - ctx->buf[ctx->curlen++] = (unsigned char)0x80; - -/* - if the length is currently above 56 bytes we append zeros then compress. - Then we can fall back to padding zeros and length encoding like normal. - */ - if (ctx->curlen > 56) { - while (ctx->curlen < 64) { - ctx->buf[ctx->curlen++] = (unsigned char)0; - } - sha256_compress(ctx, ctx->buf); - ctx->curlen = 0; - } - -/* - pad upto 56 bytes of zeroes - */ - while (ctx->curlen < 56) { - ctx->buf[ctx->curlen++] = (unsigned char)0; - } - -/* - store length - */ - - STORE32H(ctx->highLength, ctx->buf, 56); - STORE32H(ctx->lowLength, ctx->buf, 60); - - sha256_compress(ctx, ctx->buf); - -/* - copy output - */ - for (i = 0; i < 8; i++) { - STORE32H(ctx->state[i], hval, (4*i)); - } -} - -/******************************************************************************/ +/* Adapted for VeraCrypt */ + +#include +#include "Common/Tcdefs.h" +#include "Common/Endian.h" +#include "Sha2Small.h" + +#pragma optimize ("tl", on) + +typedef unsigned __int32 uint32; +typedef unsigned __int8 byte; + +#include +#pragma intrinsic(_lrotr) +#define RORc(x,n) _lrotr(x,n) + +/******************************************************************************/ + +/* + The K array + */ + +static const uint32 K[64] = { + 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, 0x3956c25bUL, + 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, 0xd807aa98UL, 0x12835b01UL, + 0x243185beUL, 0x550c7dc3UL, 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, + 0xc19bf174UL, 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, + 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, 0x983e5152UL, + 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, 0xc6e00bf3UL, 0xd5a79147UL, + 0x06ca6351UL, 0x14292967UL, 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, + 0x53380d13UL, 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL, + 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, 0xd192e819UL, + 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, 0x19a4c116UL, 0x1e376c08UL, + 0x2748774cUL, 0x34b0bcb5UL, 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, + 0x682e6ff3UL, 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, + 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL +}; + +/* + Various logical functions + */ +#define Ch(x,y,z) (z ^ (x & (y ^ z))) +#define Maj(x,y,z) (((x | y) & z) | (x & y)) +#define S(x, n) RORc((x),(n)) +#define R(x, n) ((x)>>(n)) +#define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22)) +#define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25)) +#define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3)) +#define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10)) + +#define STORE32H(x, y, i) { \ +(y)[i] = (unsigned char)(((x)>>24)); \ +(y)[i+1] = (unsigned char)(((x)>>16)); \ +(y)[i+2] = (unsigned char)(((x)>>8)); \ +(y)[i+3] = (unsigned char)((x)); \ +} + +#define LOAD32H(x, y, i) { \ +x = ((unsigned long)((y)[i])<<24) | \ +((unsigned long)((y)[i+1])<<16) | \ +((unsigned long)((y)[i+2])<<8) | \ +((unsigned long)((y)[i+3])); \ +} + +/* + compress 512-bits + */ +static void sha256_compress(sha256_ctx * ctx, unsigned char *buf) +{ + + uint32 S[8], W[64], t0, t1; + uint32 t, w2, w15; + int i; + +/* + copy state into S + */ + for (i = 0; i < 8; i++) { + S[i] = ctx->state[i]; + } + +/* + copy the state into 512-bits into W[0..15] + */ + for (i = 0; i < 16; i++) { + LOAD32H(W[i], buf , (4*i)); + } + +/* + fill W[16..63] + */ + for (i = 16; i < 64; i++) { + w2 = W[i - 2]; + w15 = W[i - 15]; + W[i] = Gamma1(w2) + W[i - 7] + Gamma0(w15) + W[i - 16]; + } + +/* + Compress + */ + +#define RND(a,b,c,d,e,f,g,h,i) \ + t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \ + t1 = Sigma0(a) + Maj(a, b, c); \ + d += t0; \ + h = t0 + t1; + + for (i = 0; i < 64; ++i) { + RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i); + t = S[7]; S[7] = S[6]; S[6] = S[5]; S[5] = S[4]; + S[4] = S[3]; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t; + } + +/* + feedback + */ + for (i = 0; i < 8; i++) { + ctx->state[i] += S[i]; + } + +} + +/* + init the sha256 state + */ +VOID_RETURN sha256_begin(sha256_ctx* ctx) +{ + ctx->curlen = 0; + ctx->state[0] = 0x6A09E667UL; + ctx->state[1] = 0xBB67AE85UL; + ctx->state[2] = 0x3C6EF372UL; + ctx->state[3] = 0xA54FF53AUL; + ctx->state[4] = 0x510E527FUL; + ctx->state[5] = 0x9B05688CUL; + ctx->state[6] = 0x1F83D9ABUL; + ctx->state[7] = 0x5BE0CD19UL; + ctx->highLength = 0; + ctx->lowLength = 0; +} + +VOID_RETURN sha256_hash(unsigned char* data, unsigned int len, sha256_ctx* ctx) +{ + uint32 n; + while (len > 0) { + if (ctx->curlen == 0 && len >= 64) { + sha256_compress(ctx, (unsigned char *)data); + + n = ctx->lowLength + 512; + if (n < ctx->lowLength) { + ctx->highLength++; + } + ctx->lowLength = n; + data += 64; + len -= 64; + } else { + n = min(len, 64 - ctx->curlen); + memcpy(ctx->buf + ctx->curlen, data, (size_t)n); + ctx->curlen += (unsigned int) n; + data += (unsigned int) n; + len -= (unsigned int) n; + + if (ctx->curlen == 64) { + sha256_compress (ctx, ctx->buf); + + n = ctx->lowLength + 512; + if (n < ctx->lowLength) { + ctx->highLength++; + } + ctx->lowLength = n; + ctx->curlen = 0; + } + } + } + return; +} + +VOID_RETURN sha256_end(unsigned char* hval, sha256_ctx* ctx) +{ + int i; + uint32 n; + +/* + increase the length of the message + */ + + n = ctx->lowLength + (ctx->curlen << 3); + if (n < ctx->lowLength) { + ctx->highLength++; + } + ctx->highLength += (ctx->curlen >> 29); + ctx->lowLength = n; + +/* + append the '1' bit + */ + ctx->buf[ctx->curlen++] = (unsigned char)0x80; + +/* + if the length is currently above 56 bytes we append zeros then compress. + Then we can fall back to padding zeros and length encoding like normal. + */ + if (ctx->curlen > 56) { + while (ctx->curlen < 64) { + ctx->buf[ctx->curlen++] = (unsigned char)0; + } + sha256_compress(ctx, ctx->buf); + ctx->curlen = 0; + } + +/* + pad upto 56 bytes of zeroes + */ + while (ctx->curlen < 56) { + ctx->buf[ctx->curlen++] = (unsigned char)0; + } + +/* + store length + */ + + STORE32H(ctx->highLength, ctx->buf, 56); + STORE32H(ctx->lowLength, ctx->buf, 60); + + sha256_compress(ctx, ctx->buf); + +/* + copy output + */ + for (i = 0; i < 8; i++) { + STORE32H(ctx->state[i], hval, (4*i)); + } +} + +/******************************************************************************/ diff --git a/src/Crypto/Sha2Small.h b/src/Crypto/Sha2Small.h index 2b79eaf4..1b5c106e 100644 --- a/src/Crypto/Sha2Small.h +++ b/src/Crypto/Sha2Small.h @@ -12,21 +12,21 @@ /* Adapted for VeraCrypt */ -#ifndef _SHA2_SMALL_H +#ifndef _SHA2_SMALL_H #define _SHA2_SMALL_H -#include "Common/Tcdefs.h" +#include "Common/Tcdefs.h" #include "Common/Endian.h" -#define SHA256_DIGEST_SIZE 32 +#define SHA256_DIGEST_SIZE 32 #define SHA256_BLOCK_SIZE 64 -#define VOID_RETURN void -#define INT_RETURN int - -#if defined(__cplusplus) -extern "C" -{ +#define VOID_RETURN void +#define INT_RETURN int + +#if defined(__cplusplus) +extern "C" +{ #endif typedef struct { @@ -40,12 +40,12 @@ typedef struct { /******************************************************************************/ -VOID_RETURN sha256_begin(sha256_ctx* ctx); -VOID_RETURN sha256_hash(unsigned char* data, unsigned int len, sha256_ctx* ctx); +VOID_RETURN sha256_begin(sha256_ctx* ctx); +VOID_RETURN sha256_hash(unsigned char* data, unsigned int len, sha256_ctx* ctx); VOID_RETURN sha256_end(unsigned char* hval, sha256_ctx* ctx); -#if defined(__cplusplus) -} +#if defined(__cplusplus) +} #endif /******************************************************************************/ diff --git a/src/Crypto/Sources b/src/Crypto/Sources index 9b1b988c..6eb7b7b4 100644 --- a/src/Crypto/Sources +++ b/src/Crypto/Sources @@ -1,20 +1,20 @@ -TARGETNAME=Crypto -TARGETTYPE=DRIVER_LIBRARY - -INCLUDES = .. - -NTTARGETFILES = \ - "$(OBJ_PATH)\$(O)\Aes_$(TC_ARCH).obj" \ - "$(OBJ_PATH)\$(O)\Aes_hw_cpu.obj" - -SOURCES = \ - Aes_$(TC_ARCH).asm \ - Aes_hw_cpu.asm \ - Aeskey.c \ - Aestab.c \ - cpu.c \ - Rmd160.c \ - Serpent.c \ - Sha2.c \ - Twofish.c \ - Whirlpool.c +TARGETNAME=Crypto +TARGETTYPE=DRIVER_LIBRARY + +INCLUDES = .. + +NTTARGETFILES = \ + "$(OBJ_PATH)\$(O)\Aes_$(TC_ARCH).obj" \ + "$(OBJ_PATH)\$(O)\Aes_hw_cpu.obj" + +SOURCES = \ + Aes_$(TC_ARCH).asm \ + Aes_hw_cpu.asm \ + Aeskey.c \ + Aestab.c \ + cpu.c \ + Rmd160.c \ + Serpent.c \ + Sha2.c \ + Twofish.c \ + Whirlpool.c diff --git a/src/Crypto/Twofish.c b/src/Crypto/Twofish.c index 2273ac5e..7c58c91e 100644 --- a/src/Crypto/Twofish.c +++ b/src/Crypto/Twofish.c @@ -1,549 +1,549 @@ -/* - --------------------------------------------------------------------------- - Copyright (c) 1999, Dr Brian Gladman, Worcester, UK. All rights reserved. - - LICENSE TERMS - - The free distribution and use of this software is allowed (with or without - changes) provided that: - - 1. source code distributions include the above copyright notice, this - list of conditions and the following disclaimer; - - 2. binary distributions include the above copyright notice, this list - of conditions and the following disclaimer in their documentation; - - 3. the name of the copyright holder is not used to endorse products - built using this software without specific written permission. - - DISCLAIMER - - This software is provided 'as is' with no explicit or implied warranties - in respect of its properties, including, but not limited to, correctness - and/or fitness for purpose. - --------------------------------------------------------------------------- - - My thanks to Doug Whiting and Niels Ferguson for comments that led - to improvements in this implementation. - - Issue Date: 14th January 1999 -*/ - -/* Adapted for TrueCrypt */ -/* Adapted for VeraCrypt */ - - -#ifdef TC_WINDOWS_BOOT -#pragma optimize ("tl", on) -#endif - -#include "Twofish.h" -#include "Common/Endian.h" - -#define Q_TABLES -#define M_TABLE - -#if !defined (TC_MINIMIZE_CODE_SIZE) || defined (TC_WINDOWS_BOOT_TWOFISH) -# define MK_TABLE -# define ONE_STEP -#endif - -/* finite field arithmetic for GF(2**8) with the modular */ -/* polynomial x^8 + x^6 + x^5 + x^3 + 1 (0x169) */ - -#define G_M 0x0169 - -static u1byte tab_5b[4] = { 0, G_M >> 2, G_M >> 1, (G_M >> 1) ^ (G_M >> 2) }; -static u1byte tab_ef[4] = { 0, (G_M >> 1) ^ (G_M >> 2), G_M >> 1, G_M >> 2 }; - -#define ffm_01(x) (x) -#define ffm_5b(x) ((x) ^ ((x) >> 2) ^ tab_5b[(x) & 3]) -#define ffm_ef(x) ((x) ^ ((x) >> 1) ^ ((x) >> 2) ^ tab_ef[(x) & 3]) - -static u1byte ror4[16] = { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 }; -static u1byte ashx[16] = { 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12, 5, 14, 7 }; - -static u1byte qt0[2][16] = -{ { 8, 1, 7, 13, 6, 15, 3, 2, 0, 11, 5, 9, 14, 12, 10, 4 }, - { 2, 8, 11, 13, 15, 7, 6, 14, 3, 1, 9, 4, 0, 10, 12, 5 } -}; - -static u1byte qt1[2][16] = -{ { 14, 12, 11, 8, 1, 2, 3, 5, 15, 4, 10, 6, 7, 0, 9, 13 }, - { 1, 14, 2, 11, 4, 12, 3, 7, 6, 13, 10, 5, 15, 9, 0, 8 } -}; - -static u1byte qt2[2][16] = -{ { 11, 10, 5, 14, 6, 13, 9, 0, 12, 8, 15, 3, 2, 4, 7, 1 }, - { 4, 12, 7, 5, 1, 6, 9, 10, 0, 14, 13, 8, 2, 11, 3, 15 } -}; - -static u1byte qt3[2][16] = -{ { 13, 7, 15, 4, 1, 2, 6, 14, 9, 11, 3, 0, 8, 5, 12, 10 }, - { 11, 9, 5, 1, 12, 3, 13, 14, 6, 4, 7, 15, 2, 0, 8, 10 } -}; - -static u1byte qp(const u4byte n, const u1byte x) -{ u1byte a0, a1, a2, a3, a4, b0, b1, b2, b3, b4; - - a0 = x >> 4; b0 = x & 15; - a1 = a0 ^ b0; b1 = ror4[b0] ^ ashx[a0]; - a2 = qt0[n][a1]; b2 = qt1[n][b1]; - a3 = a2 ^ b2; b3 = ror4[b2] ^ ashx[a2]; - a4 = qt2[n][a3]; b4 = qt3[n][b3]; - return (b4 << 4) | a4; -}; - -#ifdef Q_TABLES - -static u4byte qt_gen = 0; -static u1byte q_tab[2][256]; - -#define q(n,x) q_tab[n][x] - -static void gen_qtab(void) -{ u4byte i; - - for(i = 0; i < 256; ++i) - { - q(0,i) = qp(0, (u1byte)i); - q(1,i) = qp(1, (u1byte)i); - } -}; - -#else - -#define q(n,x) qp(n, x) - -#endif - -#ifdef M_TABLE - -static u4byte mt_gen = 0; -static u4byte m_tab[4][256]; - -static void gen_mtab(void) -{ u4byte i, f01, f5b, fef; - - for(i = 0; i < 256; ++i) - { - f01 = q(1,i); f5b = ffm_5b(f01); fef = ffm_ef(f01); - m_tab[0][i] = f01 + (f5b << 8) + (fef << 16) + (fef << 24); - m_tab[2][i] = f5b + (fef << 8) + (f01 << 16) + (fef << 24); - - f01 = q(0,i); f5b = ffm_5b(f01); fef = ffm_ef(f01); - m_tab[1][i] = fef + (fef << 8) + (f5b << 16) + (f01 << 24); - m_tab[3][i] = f5b + (f01 << 8) + (fef << 16) + (f5b << 24); - } -}; - -#define mds(n,x) m_tab[n][x] - -#else - -#define fm_00 ffm_01 -#define fm_10 ffm_5b -#define fm_20 ffm_ef -#define fm_30 ffm_ef -#define q_0(x) q(1,x) - -#define fm_01 ffm_ef -#define fm_11 ffm_ef -#define fm_21 ffm_5b -#define fm_31 ffm_01 -#define q_1(x) q(0,x) - -#define fm_02 ffm_5b -#define fm_12 ffm_ef -#define fm_22 ffm_01 -#define fm_32 ffm_ef -#define q_2(x) q(1,x) - -#define fm_03 ffm_5b -#define fm_13 ffm_01 -#define fm_23 ffm_ef -#define fm_33 ffm_5b -#define q_3(x) q(0,x) - -#define f_0(n,x) ((u4byte)fm_0##n(x)) -#define f_1(n,x) ((u4byte)fm_1##n(x) << 8) -#define f_2(n,x) ((u4byte)fm_2##n(x) << 16) -#define f_3(n,x) ((u4byte)fm_3##n(x) << 24) - -#define mds(n,x) f_0(n,q_##n(x)) ^ f_1(n,q_##n(x)) ^ f_2(n,q_##n(x)) ^ f_3(n,q_##n(x)) - -#endif - -static u4byte h_fun(TwofishInstance *instance, const u4byte x, const u4byte key[]) -{ u4byte b0, b1, b2, b3; - -#ifndef M_TABLE - u4byte m5b_b0, m5b_b1, m5b_b2, m5b_b3; - u4byte mef_b0, mef_b1, mef_b2, mef_b3; -#endif - - b0 = extract_byte(x, 0); b1 = extract_byte(x, 1); b2 = extract_byte(x, 2); b3 = extract_byte(x, 3); - - switch(instance->k_len) - { - case 4: b0 = q(1, (u1byte) b0) ^ extract_byte(key[3],0); - b1 = q(0, (u1byte) b1) ^ extract_byte(key[3],1); - b2 = q(0, (u1byte) b2) ^ extract_byte(key[3],2); - b3 = q(1, (u1byte) b3) ^ extract_byte(key[3],3); - case 3: b0 = q(1, (u1byte) b0) ^ extract_byte(key[2],0); - b1 = q(1, (u1byte) b1) ^ extract_byte(key[2],1); - b2 = q(0, (u1byte) b2) ^ extract_byte(key[2],2); - b3 = q(0, (u1byte) b3) ^ extract_byte(key[2],3); - case 2: b0 = q(0, (u1byte) (q(0, (u1byte) b0) ^ extract_byte(key[1],0))) ^ extract_byte(key[0],0); - b1 = q(0, (u1byte) (q(1, (u1byte) b1) ^ extract_byte(key[1],1))) ^ extract_byte(key[0],1); - b2 = q(1, (u1byte) (q(0, (u1byte) b2) ^ extract_byte(key[1],2))) ^ extract_byte(key[0],2); - b3 = q(1, (u1byte) (q(1, (u1byte) b3) ^ extract_byte(key[1],3))) ^ extract_byte(key[0],3); - } -#ifdef M_TABLE - - return mds(0, b0) ^ mds(1, b1) ^ mds(2, b2) ^ mds(3, b3); - -#else - - b0 = q(1, (u1byte) b0); b1 = q(0, (u1byte) b1); b2 = q(1, (u1byte) b2); b3 = q(0, (u1byte) b3); - m5b_b0 = ffm_5b(b0); m5b_b1 = ffm_5b(b1); m5b_b2 = ffm_5b(b2); m5b_b3 = ffm_5b(b3); - mef_b0 = ffm_ef(b0); mef_b1 = ffm_ef(b1); mef_b2 = ffm_ef(b2); mef_b3 = ffm_ef(b3); - b0 ^= mef_b1 ^ m5b_b2 ^ m5b_b3; b3 ^= m5b_b0 ^ mef_b1 ^ mef_b2; - b2 ^= mef_b0 ^ m5b_b1 ^ mef_b3; b1 ^= mef_b0 ^ mef_b2 ^ m5b_b3; - - return b0 | (b3 << 8) | (b2 << 16) | (b1 << 24); - -#endif -}; - -#ifdef MK_TABLE - -#ifdef ONE_STEP -//u4byte mk_tab[4][256]; -#else -static u1byte sb[4][256]; -#endif - -#define q20(x) q(0,q(0,x) ^ extract_byte(key[1],0)) ^ extract_byte(key[0],0) -#define q21(x) q(0,q(1,x) ^ extract_byte(key[1],1)) ^ extract_byte(key[0],1) -#define q22(x) q(1,q(0,x) ^ extract_byte(key[1],2)) ^ extract_byte(key[0],2) -#define q23(x) q(1,q(1,x) ^ extract_byte(key[1],3)) ^ extract_byte(key[0],3) - -#define q30(x) q(0,q(0,q(1, x) ^ extract_byte(key[2],0)) ^ extract_byte(key[1],0)) ^ extract_byte(key[0],0) -#define q31(x) q(0,q(1,q(1, x) ^ extract_byte(key[2],1)) ^ extract_byte(key[1],1)) ^ extract_byte(key[0],1) -#define q32(x) q(1,q(0,q(0, x) ^ extract_byte(key[2],2)) ^ extract_byte(key[1],2)) ^ extract_byte(key[0],2) -#define q33(x) q(1,q(1,q(0, x) ^ extract_byte(key[2],3)) ^ extract_byte(key[1],3)) ^ extract_byte(key[0],3) - -#define q40(x) q(0,q(0,q(1, q(1, x) ^ extract_byte(key[3],0)) ^ extract_byte(key[2],0)) ^ extract_byte(key[1],0)) ^ extract_byte(key[0],0) -#define q41(x) q(0,q(1,q(1, q(0, x) ^ extract_byte(key[3],1)) ^ extract_byte(key[2],1)) ^ extract_byte(key[1],1)) ^ extract_byte(key[0],1) -#define q42(x) q(1,q(0,q(0, q(0, x) ^ extract_byte(key[3],2)) ^ extract_byte(key[2],2)) ^ extract_byte(key[1],2)) ^ extract_byte(key[0],2) -#define q43(x) q(1,q(1,q(0, q(1, x) ^ extract_byte(key[3],3)) ^ extract_byte(key[2],3)) ^ extract_byte(key[1],3)) ^ extract_byte(key[0],3) - -static void gen_mk_tab(TwofishInstance *instance, u4byte key[]) -{ u4byte i; - u1byte by; - - u4byte *mk_tab = instance->mk_tab; - - switch(instance->k_len) - { - case 2: for(i = 0; i < 256; ++i) - { - by = (u1byte)i; -#ifdef ONE_STEP - mk_tab[0 + 4*i] = mds(0, q20(by)); mk_tab[1 + 4*i] = mds(1, q21(by)); - mk_tab[2 + 4*i] = mds(2, q22(by)); mk_tab[3 + 4*i] = mds(3, q23(by)); -#else - sb[0][i] = q20(by); sb[1][i] = q21(by); - sb[2][i] = q22(by); sb[3][i] = q23(by); -#endif - } - break; - - case 3: for(i = 0; i < 256; ++i) - { - by = (u1byte)i; -#ifdef ONE_STEP - mk_tab[0 + 4*i] = mds(0, q30(by)); mk_tab[1 + 4*i] = mds(1, q31(by)); - mk_tab[2 + 4*i] = mds(2, q32(by)); mk_tab[3 + 4*i] = mds(3, q33(by)); -#else - sb[0][i] = q30(by); sb[1][i] = q31(by); - sb[2][i] = q32(by); sb[3][i] = q33(by); -#endif - } - break; - - case 4: for(i = 0; i < 256; ++i) - { - by = (u1byte)i; -#ifdef ONE_STEP - mk_tab[0 + 4*i] = mds(0, q40(by)); mk_tab[1 + 4*i] = mds(1, q41(by)); - mk_tab[2 + 4*i] = mds(2, q42(by)); mk_tab[3 + 4*i] = mds(3, q43(by)); -#else - sb[0][i] = q40(by); sb[1][i] = q41(by); - sb[2][i] = q42(by); sb[3][i] = q43(by); -#endif - } - } -}; - -# ifdef ONE_STEP -# define g0_fun(x) ( mk_tab[0 + 4*extract_byte(x,0)] ^ mk_tab[1 + 4*extract_byte(x,1)] \ - ^ mk_tab[2 + 4*extract_byte(x,2)] ^ mk_tab[3 + 4*extract_byte(x,3)] ) -# define g1_fun(x) ( mk_tab[0 + 4*extract_byte(x,3)] ^ mk_tab[1 + 4*extract_byte(x,0)] \ - ^ mk_tab[2 + 4*extract_byte(x,1)] ^ mk_tab[3 + 4*extract_byte(x,2)] ) - - -# else -# define g0_fun(x) ( mds(0, sb[0][extract_byte(x,0)]) ^ mds(1, sb[1][extract_byte(x,1)]) \ - ^ mds(2, sb[2][extract_byte(x,2)]) ^ mds(3, sb[3][extract_byte(x,3)]) ) -# define g1_fun(x) ( mds(0, sb[0][extract_byte(x,3)]) ^ mds(1, sb[1][extract_byte(x,0)]) \ - ^ mds(2, sb[2][extract_byte(x,1)]) ^ mds(3, sb[3][extract_byte(x,2)]) ) -# endif - -#else - -#define g0_fun(x) h_fun(instance, x, instance->s_key) -#define g1_fun(x) h_fun(instance, rotl(x,8), instance->s_key) - -#endif - -/* The (12,8) Reed Soloman code has the generator polynomial - - g(x) = x^4 + (a + 1/a) * x^3 + a * x^2 + (a + 1/a) * x + 1 - -where the coefficients are in the finite field GF(2^8) with a -modular polynomial a^8 + a^6 + a^3 + a^2 + 1. To generate the -remainder we have to start with a 12th order polynomial with our -eight input bytes as the coefficients of the 4th to 11th terms. -That is: - - m[7] * x^11 + m[6] * x^10 ... + m[0] * x^4 + 0 * x^3 +... + 0 - -We then multiply the generator polynomial by m[7] * x^7 and subtract -it - xor in GF(2^8) - from the above to eliminate the x^7 term (the -artihmetic on the coefficients is done in GF(2^8). We then multiply -the generator polynomial by x^6 * coeff(x^10) and use this to remove -the x^10 term. We carry on in this way until the x^4 term is removed -so that we are left with: - - r[3] * x^3 + r[2] * x^2 + r[1] 8 x^1 + r[0] - -which give the resulting 4 bytes of the remainder. This is equivalent -to the matrix multiplication in the Twofish description but much faster -to implement. - -*/ - -#define G_MOD 0x0000014d - -static u4byte mds_rem(u4byte p0, u4byte p1) -{ u4byte i, t, u; - - for(i = 0; i < 8; ++i) - { - t = p1 >> 24; // get most significant coefficient - - p1 = (p1 << 8) | (p0 >> 24); p0 <<= 8; // shift others up - - // multiply t by a (the primitive element - i.e. left shift) - - u = (t << 1); - - if(t & 0x80) // subtract modular polynomial on overflow - - u ^= G_MOD; - - p1 ^= t ^ (u << 16); // remove t * (a * x^2 + 1) - - u ^= (t >> 1); // form u = a * t + t / a = t * (a + 1 / a); - - if(t & 0x01) // add the modular polynomial on underflow - - u ^= G_MOD >> 1; - - p1 ^= (u << 24) | (u << 8); // remove t * (a + 1/a) * (x^3 + x) - } - - return p1; -}; - -/* initialise the key schedule from the user supplied key */ - -u4byte *twofish_set_key(TwofishInstance *instance, const u4byte in_key[]) -{ u4byte i, a, b, me_key[4], mo_key[4]; - u4byte *l_key, *s_key; - - l_key = instance->l_key; - s_key = instance->s_key; - -#ifdef Q_TABLES - if(!qt_gen) - { - gen_qtab(); qt_gen = 1; - } -#endif - -#ifdef M_TABLE - if(!mt_gen) - { - gen_mtab(); mt_gen = 1; - } -#endif - - instance->k_len = 4; - - for(i = 0; i < instance->k_len; ++i) - { - a = LE32(in_key[i + i]); me_key[i] = a; - b = LE32(in_key[i + i + 1]); mo_key[i] = b; - s_key[instance->k_len - i - 1] = mds_rem(a, b); - } - - for(i = 0; i < 40; i += 2) - { - a = 0x01010101 * i; b = a + 0x01010101; - a = h_fun(instance, a, me_key); - b = rotl(h_fun(instance, b, mo_key), 8); - l_key[i] = a + b; - l_key[i + 1] = rotl(a + 2 * b, 9); - } - -#ifdef MK_TABLE - gen_mk_tab(instance, s_key); -#endif - - return l_key; -}; - -/* encrypt a block of text */ - -#ifndef TC_MINIMIZE_CODE_SIZE - -#define f_rnd(i) \ - t1 = g1_fun(blk[1]); t0 = g0_fun(blk[0]); \ - blk[2] = rotr(blk[2] ^ (t0 + t1 + l_key[4 * (i) + 8]), 1); \ - blk[3] = rotl(blk[3], 1) ^ (t0 + 2 * t1 + l_key[4 * (i) + 9]); \ - t1 = g1_fun(blk[3]); t0 = g0_fun(blk[2]); \ - blk[0] = rotr(blk[0] ^ (t0 + t1 + l_key[4 * (i) + 10]), 1); \ - blk[1] = rotl(blk[1], 1) ^ (t0 + 2 * t1 + l_key[4 * (i) + 11]) - -void twofish_encrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[]) -{ u4byte t0, t1, blk[4]; - - u4byte *l_key = instance->l_key; - u4byte *mk_tab = instance->mk_tab; - - blk[0] = LE32(in_blk[0]) ^ l_key[0]; - blk[1] = LE32(in_blk[1]) ^ l_key[1]; - blk[2] = LE32(in_blk[2]) ^ l_key[2]; - blk[3] = LE32(in_blk[3]) ^ l_key[3]; - - f_rnd(0); f_rnd(1); f_rnd(2); f_rnd(3); - f_rnd(4); f_rnd(5); f_rnd(6); f_rnd(7); - - out_blk[0] = LE32(blk[2] ^ l_key[4]); - out_blk[1] = LE32(blk[3] ^ l_key[5]); - out_blk[2] = LE32(blk[0] ^ l_key[6]); - out_blk[3] = LE32(blk[1] ^ l_key[7]); -}; - -#else // TC_MINIMIZE_CODE_SIZE - -void twofish_encrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[]) -{ u4byte t0, t1, blk[4]; - - u4byte *l_key = instance->l_key; -#ifdef TC_WINDOWS_BOOT_TWOFISH - u4byte *mk_tab = instance->mk_tab; -#endif - int i; - - blk[0] = LE32(in_blk[0]) ^ l_key[0]; - blk[1] = LE32(in_blk[1]) ^ l_key[1]; - blk[2] = LE32(in_blk[2]) ^ l_key[2]; - blk[3] = LE32(in_blk[3]) ^ l_key[3]; - - for (i = 0; i <= 7; ++i) - { - t1 = g1_fun(blk[1]); t0 = g0_fun(blk[0]); - blk[2] = rotr(blk[2] ^ (t0 + t1 + l_key[4 * (i) + 8]), 1); - blk[3] = rotl(blk[3], 1) ^ (t0 + 2 * t1 + l_key[4 * (i) + 9]); - t1 = g1_fun(blk[3]); t0 = g0_fun(blk[2]); - blk[0] = rotr(blk[0] ^ (t0 + t1 + l_key[4 * (i) + 10]), 1); - blk[1] = rotl(blk[1], 1) ^ (t0 + 2 * t1 + l_key[4 * (i) + 11]); - } - - out_blk[0] = LE32(blk[2] ^ l_key[4]); - out_blk[1] = LE32(blk[3] ^ l_key[5]); - out_blk[2] = LE32(blk[0] ^ l_key[6]); - out_blk[3] = LE32(blk[1] ^ l_key[7]); -}; - -#endif // TC_MINIMIZE_CODE_SIZE - -/* decrypt a block of text */ - -#ifndef TC_MINIMIZE_CODE_SIZE - -#define i_rnd(i) \ - t1 = g1_fun(blk[1]); t0 = g0_fun(blk[0]); \ - blk[2] = rotl(blk[2], 1) ^ (t0 + t1 + l_key[4 * (i) + 10]); \ - blk[3] = rotr(blk[3] ^ (t0 + 2 * t1 + l_key[4 * (i) + 11]), 1); \ - t1 = g1_fun(blk[3]); t0 = g0_fun(blk[2]); \ - blk[0] = rotl(blk[0], 1) ^ (t0 + t1 + l_key[4 * (i) + 8]); \ - blk[1] = rotr(blk[1] ^ (t0 + 2 * t1 + l_key[4 * (i) + 9]), 1) - -void twofish_decrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[4]) -{ u4byte t0, t1, blk[4]; - - u4byte *l_key = instance->l_key; - u4byte *mk_tab = instance->mk_tab; - - blk[0] = LE32(in_blk[0]) ^ l_key[4]; - blk[1] = LE32(in_blk[1]) ^ l_key[5]; - blk[2] = LE32(in_blk[2]) ^ l_key[6]; - blk[3] = LE32(in_blk[3]) ^ l_key[7]; - - i_rnd(7); i_rnd(6); i_rnd(5); i_rnd(4); - i_rnd(3); i_rnd(2); i_rnd(1); i_rnd(0); - - out_blk[0] = LE32(blk[2] ^ l_key[0]); - out_blk[1] = LE32(blk[3] ^ l_key[1]); - out_blk[2] = LE32(blk[0] ^ l_key[2]); - out_blk[3] = LE32(blk[1] ^ l_key[3]); -}; - -#else // TC_MINIMIZE_CODE_SIZE - -void twofish_decrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[4]) -{ u4byte t0, t1, blk[4]; - - u4byte *l_key = instance->l_key; -#ifdef TC_WINDOWS_BOOT_TWOFISH - u4byte *mk_tab = instance->mk_tab; -#endif - int i; - - blk[0] = LE32(in_blk[0]) ^ l_key[4]; - blk[1] = LE32(in_blk[1]) ^ l_key[5]; - blk[2] = LE32(in_blk[2]) ^ l_key[6]; - blk[3] = LE32(in_blk[3]) ^ l_key[7]; - - for (i = 7; i >= 0; --i) - { - t1 = g1_fun(blk[1]); t0 = g0_fun(blk[0]); - blk[2] = rotl(blk[2], 1) ^ (t0 + t1 + l_key[4 * (i) + 10]); - blk[3] = rotr(blk[3] ^ (t0 + 2 * t1 + l_key[4 * (i) + 11]), 1); - t1 = g1_fun(blk[3]); t0 = g0_fun(blk[2]); - blk[0] = rotl(blk[0], 1) ^ (t0 + t1 + l_key[4 * (i) + 8]); - blk[1] = rotr(blk[1] ^ (t0 + 2 * t1 + l_key[4 * (i) + 9]), 1); - } - - out_blk[0] = LE32(blk[2] ^ l_key[0]); - out_blk[1] = LE32(blk[3] ^ l_key[1]); - out_blk[2] = LE32(blk[0] ^ l_key[2]); - out_blk[3] = LE32(blk[1] ^ l_key[3]); -}; - -#endif // TC_MINIMIZE_CODE_SIZE +/* + --------------------------------------------------------------------------- + Copyright (c) 1999, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + + My thanks to Doug Whiting and Niels Ferguson for comments that led + to improvements in this implementation. + + Issue Date: 14th January 1999 +*/ + +/* Adapted for TrueCrypt */ +/* Adapted for VeraCrypt */ + + +#ifdef TC_WINDOWS_BOOT +#pragma optimize ("tl", on) +#endif + +#include "Twofish.h" +#include "Common/Endian.h" + +#define Q_TABLES +#define M_TABLE + +#if !defined (TC_MINIMIZE_CODE_SIZE) || defined (TC_WINDOWS_BOOT_TWOFISH) +# define MK_TABLE +# define ONE_STEP +#endif + +/* finite field arithmetic for GF(2**8) with the modular */ +/* polynomial x^8 + x^6 + x^5 + x^3 + 1 (0x169) */ + +#define G_M 0x0169 + +static u1byte tab_5b[4] = { 0, G_M >> 2, G_M >> 1, (G_M >> 1) ^ (G_M >> 2) }; +static u1byte tab_ef[4] = { 0, (G_M >> 1) ^ (G_M >> 2), G_M >> 1, G_M >> 2 }; + +#define ffm_01(x) (x) +#define ffm_5b(x) ((x) ^ ((x) >> 2) ^ tab_5b[(x) & 3]) +#define ffm_ef(x) ((x) ^ ((x) >> 1) ^ ((x) >> 2) ^ tab_ef[(x) & 3]) + +static u1byte ror4[16] = { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 }; +static u1byte ashx[16] = { 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12, 5, 14, 7 }; + +static u1byte qt0[2][16] = +{ { 8, 1, 7, 13, 6, 15, 3, 2, 0, 11, 5, 9, 14, 12, 10, 4 }, + { 2, 8, 11, 13, 15, 7, 6, 14, 3, 1, 9, 4, 0, 10, 12, 5 } +}; + +static u1byte qt1[2][16] = +{ { 14, 12, 11, 8, 1, 2, 3, 5, 15, 4, 10, 6, 7, 0, 9, 13 }, + { 1, 14, 2, 11, 4, 12, 3, 7, 6, 13, 10, 5, 15, 9, 0, 8 } +}; + +static u1byte qt2[2][16] = +{ { 11, 10, 5, 14, 6, 13, 9, 0, 12, 8, 15, 3, 2, 4, 7, 1 }, + { 4, 12, 7, 5, 1, 6, 9, 10, 0, 14, 13, 8, 2, 11, 3, 15 } +}; + +static u1byte qt3[2][16] = +{ { 13, 7, 15, 4, 1, 2, 6, 14, 9, 11, 3, 0, 8, 5, 12, 10 }, + { 11, 9, 5, 1, 12, 3, 13, 14, 6, 4, 7, 15, 2, 0, 8, 10 } +}; + +static u1byte qp(const u4byte n, const u1byte x) +{ u1byte a0, a1, a2, a3, a4, b0, b1, b2, b3, b4; + + a0 = x >> 4; b0 = x & 15; + a1 = a0 ^ b0; b1 = ror4[b0] ^ ashx[a0]; + a2 = qt0[n][a1]; b2 = qt1[n][b1]; + a3 = a2 ^ b2; b3 = ror4[b2] ^ ashx[a2]; + a4 = qt2[n][a3]; b4 = qt3[n][b3]; + return (b4 << 4) | a4; +}; + +#ifdef Q_TABLES + +static u4byte qt_gen = 0; +static u1byte q_tab[2][256]; + +#define q(n,x) q_tab[n][x] + +static void gen_qtab(void) +{ u4byte i; + + for(i = 0; i < 256; ++i) + { + q(0,i) = qp(0, (u1byte)i); + q(1,i) = qp(1, (u1byte)i); + } +}; + +#else + +#define q(n,x) qp(n, x) + +#endif + +#ifdef M_TABLE + +static u4byte mt_gen = 0; +static u4byte m_tab[4][256]; + +static void gen_mtab(void) +{ u4byte i, f01, f5b, fef; + + for(i = 0; i < 256; ++i) + { + f01 = q(1,i); f5b = ffm_5b(f01); fef = ffm_ef(f01); + m_tab[0][i] = f01 + (f5b << 8) + (fef << 16) + (fef << 24); + m_tab[2][i] = f5b + (fef << 8) + (f01 << 16) + (fef << 24); + + f01 = q(0,i); f5b = ffm_5b(f01); fef = ffm_ef(f01); + m_tab[1][i] = fef + (fef << 8) + (f5b << 16) + (f01 << 24); + m_tab[3][i] = f5b + (f01 << 8) + (fef << 16) + (f5b << 24); + } +}; + +#define mds(n,x) m_tab[n][x] + +#else + +#define fm_00 ffm_01 +#define fm_10 ffm_5b +#define fm_20 ffm_ef +#define fm_30 ffm_ef +#define q_0(x) q(1,x) + +#define fm_01 ffm_ef +#define fm_11 ffm_ef +#define fm_21 ffm_5b +#define fm_31 ffm_01 +#define q_1(x) q(0,x) + +#define fm_02 ffm_5b +#define fm_12 ffm_ef +#define fm_22 ffm_01 +#define fm_32 ffm_ef +#define q_2(x) q(1,x) + +#define fm_03 ffm_5b +#define fm_13 ffm_01 +#define fm_23 ffm_ef +#define fm_33 ffm_5b +#define q_3(x) q(0,x) + +#define f_0(n,x) ((u4byte)fm_0##n(x)) +#define f_1(n,x) ((u4byte)fm_1##n(x) << 8) +#define f_2(n,x) ((u4byte)fm_2##n(x) << 16) +#define f_3(n,x) ((u4byte)fm_3##n(x) << 24) + +#define mds(n,x) f_0(n,q_##n(x)) ^ f_1(n,q_##n(x)) ^ f_2(n,q_##n(x)) ^ f_3(n,q_##n(x)) + +#endif + +static u4byte h_fun(TwofishInstance *instance, const u4byte x, const u4byte key[]) +{ u4byte b0, b1, b2, b3; + +#ifndef M_TABLE + u4byte m5b_b0, m5b_b1, m5b_b2, m5b_b3; + u4byte mef_b0, mef_b1, mef_b2, mef_b3; +#endif + + b0 = extract_byte(x, 0); b1 = extract_byte(x, 1); b2 = extract_byte(x, 2); b3 = extract_byte(x, 3); + + switch(instance->k_len) + { + case 4: b0 = q(1, (u1byte) b0) ^ extract_byte(key[3],0); + b1 = q(0, (u1byte) b1) ^ extract_byte(key[3],1); + b2 = q(0, (u1byte) b2) ^ extract_byte(key[3],2); + b3 = q(1, (u1byte) b3) ^ extract_byte(key[3],3); + case 3: b0 = q(1, (u1byte) b0) ^ extract_byte(key[2],0); + b1 = q(1, (u1byte) b1) ^ extract_byte(key[2],1); + b2 = q(0, (u1byte) b2) ^ extract_byte(key[2],2); + b3 = q(0, (u1byte) b3) ^ extract_byte(key[2],3); + case 2: b0 = q(0, (u1byte) (q(0, (u1byte) b0) ^ extract_byte(key[1],0))) ^ extract_byte(key[0],0); + b1 = q(0, (u1byte) (q(1, (u1byte) b1) ^ extract_byte(key[1],1))) ^ extract_byte(key[0],1); + b2 = q(1, (u1byte) (q(0, (u1byte) b2) ^ extract_byte(key[1],2))) ^ extract_byte(key[0],2); + b3 = q(1, (u1byte) (q(1, (u1byte) b3) ^ extract_byte(key[1],3))) ^ extract_byte(key[0],3); + } +#ifdef M_TABLE + + return mds(0, b0) ^ mds(1, b1) ^ mds(2, b2) ^ mds(3, b3); + +#else + + b0 = q(1, (u1byte) b0); b1 = q(0, (u1byte) b1); b2 = q(1, (u1byte) b2); b3 = q(0, (u1byte) b3); + m5b_b0 = ffm_5b(b0); m5b_b1 = ffm_5b(b1); m5b_b2 = ffm_5b(b2); m5b_b3 = ffm_5b(b3); + mef_b0 = ffm_ef(b0); mef_b1 = ffm_ef(b1); mef_b2 = ffm_ef(b2); mef_b3 = ffm_ef(b3); + b0 ^= mef_b1 ^ m5b_b2 ^ m5b_b3; b3 ^= m5b_b0 ^ mef_b1 ^ mef_b2; + b2 ^= mef_b0 ^ m5b_b1 ^ mef_b3; b1 ^= mef_b0 ^ mef_b2 ^ m5b_b3; + + return b0 | (b3 << 8) | (b2 << 16) | (b1 << 24); + +#endif +}; + +#ifdef MK_TABLE + +#ifdef ONE_STEP +//u4byte mk_tab[4][256]; +#else +static u1byte sb[4][256]; +#endif + +#define q20(x) q(0,q(0,x) ^ extract_byte(key[1],0)) ^ extract_byte(key[0],0) +#define q21(x) q(0,q(1,x) ^ extract_byte(key[1],1)) ^ extract_byte(key[0],1) +#define q22(x) q(1,q(0,x) ^ extract_byte(key[1],2)) ^ extract_byte(key[0],2) +#define q23(x) q(1,q(1,x) ^ extract_byte(key[1],3)) ^ extract_byte(key[0],3) + +#define q30(x) q(0,q(0,q(1, x) ^ extract_byte(key[2],0)) ^ extract_byte(key[1],0)) ^ extract_byte(key[0],0) +#define q31(x) q(0,q(1,q(1, x) ^ extract_byte(key[2],1)) ^ extract_byte(key[1],1)) ^ extract_byte(key[0],1) +#define q32(x) q(1,q(0,q(0, x) ^ extract_byte(key[2],2)) ^ extract_byte(key[1],2)) ^ extract_byte(key[0],2) +#define q33(x) q(1,q(1,q(0, x) ^ extract_byte(key[2],3)) ^ extract_byte(key[1],3)) ^ extract_byte(key[0],3) + +#define q40(x) q(0,q(0,q(1, q(1, x) ^ extract_byte(key[3],0)) ^ extract_byte(key[2],0)) ^ extract_byte(key[1],0)) ^ extract_byte(key[0],0) +#define q41(x) q(0,q(1,q(1, q(0, x) ^ extract_byte(key[3],1)) ^ extract_byte(key[2],1)) ^ extract_byte(key[1],1)) ^ extract_byte(key[0],1) +#define q42(x) q(1,q(0,q(0, q(0, x) ^ extract_byte(key[3],2)) ^ extract_byte(key[2],2)) ^ extract_byte(key[1],2)) ^ extract_byte(key[0],2) +#define q43(x) q(1,q(1,q(0, q(1, x) ^ extract_byte(key[3],3)) ^ extract_byte(key[2],3)) ^ extract_byte(key[1],3)) ^ extract_byte(key[0],3) + +static void gen_mk_tab(TwofishInstance *instance, u4byte key[]) +{ u4byte i; + u1byte by; + + u4byte *mk_tab = instance->mk_tab; + + switch(instance->k_len) + { + case 2: for(i = 0; i < 256; ++i) + { + by = (u1byte)i; +#ifdef ONE_STEP + mk_tab[0 + 4*i] = mds(0, q20(by)); mk_tab[1 + 4*i] = mds(1, q21(by)); + mk_tab[2 + 4*i] = mds(2, q22(by)); mk_tab[3 + 4*i] = mds(3, q23(by)); +#else + sb[0][i] = q20(by); sb[1][i] = q21(by); + sb[2][i] = q22(by); sb[3][i] = q23(by); +#endif + } + break; + + case 3: for(i = 0; i < 256; ++i) + { + by = (u1byte)i; +#ifdef ONE_STEP + mk_tab[0 + 4*i] = mds(0, q30(by)); mk_tab[1 + 4*i] = mds(1, q31(by)); + mk_tab[2 + 4*i] = mds(2, q32(by)); mk_tab[3 + 4*i] = mds(3, q33(by)); +#else + sb[0][i] = q30(by); sb[1][i] = q31(by); + sb[2][i] = q32(by); sb[3][i] = q33(by); +#endif + } + break; + + case 4: for(i = 0; i < 256; ++i) + { + by = (u1byte)i; +#ifdef ONE_STEP + mk_tab[0 + 4*i] = mds(0, q40(by)); mk_tab[1 + 4*i] = mds(1, q41(by)); + mk_tab[2 + 4*i] = mds(2, q42(by)); mk_tab[3 + 4*i] = mds(3, q43(by)); +#else + sb[0][i] = q40(by); sb[1][i] = q41(by); + sb[2][i] = q42(by); sb[3][i] = q43(by); +#endif + } + } +}; + +# ifdef ONE_STEP +# define g0_fun(x) ( mk_tab[0 + 4*extract_byte(x,0)] ^ mk_tab[1 + 4*extract_byte(x,1)] \ + ^ mk_tab[2 + 4*extract_byte(x,2)] ^ mk_tab[3 + 4*extract_byte(x,3)] ) +# define g1_fun(x) ( mk_tab[0 + 4*extract_byte(x,3)] ^ mk_tab[1 + 4*extract_byte(x,0)] \ + ^ mk_tab[2 + 4*extract_byte(x,1)] ^ mk_tab[3 + 4*extract_byte(x,2)] ) + + +# else +# define g0_fun(x) ( mds(0, sb[0][extract_byte(x,0)]) ^ mds(1, sb[1][extract_byte(x,1)]) \ + ^ mds(2, sb[2][extract_byte(x,2)]) ^ mds(3, sb[3][extract_byte(x,3)]) ) +# define g1_fun(x) ( mds(0, sb[0][extract_byte(x,3)]) ^ mds(1, sb[1][extract_byte(x,0)]) \ + ^ mds(2, sb[2][extract_byte(x,1)]) ^ mds(3, sb[3][extract_byte(x,2)]) ) +# endif + +#else + +#define g0_fun(x) h_fun(instance, x, instance->s_key) +#define g1_fun(x) h_fun(instance, rotl(x,8), instance->s_key) + +#endif + +/* The (12,8) Reed Soloman code has the generator polynomial + + g(x) = x^4 + (a + 1/a) * x^3 + a * x^2 + (a + 1/a) * x + 1 + +where the coefficients are in the finite field GF(2^8) with a +modular polynomial a^8 + a^6 + a^3 + a^2 + 1. To generate the +remainder we have to start with a 12th order polynomial with our +eight input bytes as the coefficients of the 4th to 11th terms. +That is: + + m[7] * x^11 + m[6] * x^10 ... + m[0] * x^4 + 0 * x^3 +... + 0 + +We then multiply the generator polynomial by m[7] * x^7 and subtract +it - xor in GF(2^8) - from the above to eliminate the x^7 term (the +artihmetic on the coefficients is done in GF(2^8). We then multiply +the generator polynomial by x^6 * coeff(x^10) and use this to remove +the x^10 term. We carry on in this way until the x^4 term is removed +so that we are left with: + + r[3] * x^3 + r[2] * x^2 + r[1] 8 x^1 + r[0] + +which give the resulting 4 bytes of the remainder. This is equivalent +to the matrix multiplication in the Twofish description but much faster +to implement. + +*/ + +#define G_MOD 0x0000014d + +static u4byte mds_rem(u4byte p0, u4byte p1) +{ u4byte i, t, u; + + for(i = 0; i < 8; ++i) + { + t = p1 >> 24; // get most significant coefficient + + p1 = (p1 << 8) | (p0 >> 24); p0 <<= 8; // shift others up + + // multiply t by a (the primitive element - i.e. left shift) + + u = (t << 1); + + if(t & 0x80) // subtract modular polynomial on overflow + + u ^= G_MOD; + + p1 ^= t ^ (u << 16); // remove t * (a * x^2 + 1) + + u ^= (t >> 1); // form u = a * t + t / a = t * (a + 1 / a); + + if(t & 0x01) // add the modular polynomial on underflow + + u ^= G_MOD >> 1; + + p1 ^= (u << 24) | (u << 8); // remove t * (a + 1/a) * (x^3 + x) + } + + return p1; +}; + +/* initialise the key schedule from the user supplied key */ + +u4byte *twofish_set_key(TwofishInstance *instance, const u4byte in_key[]) +{ u4byte i, a, b, me_key[4], mo_key[4]; + u4byte *l_key, *s_key; + + l_key = instance->l_key; + s_key = instance->s_key; + +#ifdef Q_TABLES + if(!qt_gen) + { + gen_qtab(); qt_gen = 1; + } +#endif + +#ifdef M_TABLE + if(!mt_gen) + { + gen_mtab(); mt_gen = 1; + } +#endif + + instance->k_len = 4; + + for(i = 0; i < instance->k_len; ++i) + { + a = LE32(in_key[i + i]); me_key[i] = a; + b = LE32(in_key[i + i + 1]); mo_key[i] = b; + s_key[instance->k_len - i - 1] = mds_rem(a, b); + } + + for(i = 0; i < 40; i += 2) + { + a = 0x01010101 * i; b = a + 0x01010101; + a = h_fun(instance, a, me_key); + b = rotl(h_fun(instance, b, mo_key), 8); + l_key[i] = a + b; + l_key[i + 1] = rotl(a + 2 * b, 9); + } + +#ifdef MK_TABLE + gen_mk_tab(instance, s_key); +#endif + + return l_key; +}; + +/* encrypt a block of text */ + +#ifndef TC_MINIMIZE_CODE_SIZE + +#define f_rnd(i) \ + t1 = g1_fun(blk[1]); t0 = g0_fun(blk[0]); \ + blk[2] = rotr(blk[2] ^ (t0 + t1 + l_key[4 * (i) + 8]), 1); \ + blk[3] = rotl(blk[3], 1) ^ (t0 + 2 * t1 + l_key[4 * (i) + 9]); \ + t1 = g1_fun(blk[3]); t0 = g0_fun(blk[2]); \ + blk[0] = rotr(blk[0] ^ (t0 + t1 + l_key[4 * (i) + 10]), 1); \ + blk[1] = rotl(blk[1], 1) ^ (t0 + 2 * t1 + l_key[4 * (i) + 11]) + +void twofish_encrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[]) +{ u4byte t0, t1, blk[4]; + + u4byte *l_key = instance->l_key; + u4byte *mk_tab = instance->mk_tab; + + blk[0] = LE32(in_blk[0]) ^ l_key[0]; + blk[1] = LE32(in_blk[1]) ^ l_key[1]; + blk[2] = LE32(in_blk[2]) ^ l_key[2]; + blk[3] = LE32(in_blk[3]) ^ l_key[3]; + + f_rnd(0); f_rnd(1); f_rnd(2); f_rnd(3); + f_rnd(4); f_rnd(5); f_rnd(6); f_rnd(7); + + out_blk[0] = LE32(blk[2] ^ l_key[4]); + out_blk[1] = LE32(blk[3] ^ l_key[5]); + out_blk[2] = LE32(blk[0] ^ l_key[6]); + out_blk[3] = LE32(blk[1] ^ l_key[7]); +}; + +#else // TC_MINIMIZE_CODE_SIZE + +void twofish_encrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[]) +{ u4byte t0, t1, blk[4]; + + u4byte *l_key = instance->l_key; +#ifdef TC_WINDOWS_BOOT_TWOFISH + u4byte *mk_tab = instance->mk_tab; +#endif + int i; + + blk[0] = LE32(in_blk[0]) ^ l_key[0]; + blk[1] = LE32(in_blk[1]) ^ l_key[1]; + blk[2] = LE32(in_blk[2]) ^ l_key[2]; + blk[3] = LE32(in_blk[3]) ^ l_key[3]; + + for (i = 0; i <= 7; ++i) + { + t1 = g1_fun(blk[1]); t0 = g0_fun(blk[0]); + blk[2] = rotr(blk[2] ^ (t0 + t1 + l_key[4 * (i) + 8]), 1); + blk[3] = rotl(blk[3], 1) ^ (t0 + 2 * t1 + l_key[4 * (i) + 9]); + t1 = g1_fun(blk[3]); t0 = g0_fun(blk[2]); + blk[0] = rotr(blk[0] ^ (t0 + t1 + l_key[4 * (i) + 10]), 1); + blk[1] = rotl(blk[1], 1) ^ (t0 + 2 * t1 + l_key[4 * (i) + 11]); + } + + out_blk[0] = LE32(blk[2] ^ l_key[4]); + out_blk[1] = LE32(blk[3] ^ l_key[5]); + out_blk[2] = LE32(blk[0] ^ l_key[6]); + out_blk[3] = LE32(blk[1] ^ l_key[7]); +}; + +#endif // TC_MINIMIZE_CODE_SIZE + +/* decrypt a block of text */ + +#ifndef TC_MINIMIZE_CODE_SIZE + +#define i_rnd(i) \ + t1 = g1_fun(blk[1]); t0 = g0_fun(blk[0]); \ + blk[2] = rotl(blk[2], 1) ^ (t0 + t1 + l_key[4 * (i) + 10]); \ + blk[3] = rotr(blk[3] ^ (t0 + 2 * t1 + l_key[4 * (i) + 11]), 1); \ + t1 = g1_fun(blk[3]); t0 = g0_fun(blk[2]); \ + blk[0] = rotl(blk[0], 1) ^ (t0 + t1 + l_key[4 * (i) + 8]); \ + blk[1] = rotr(blk[1] ^ (t0 + 2 * t1 + l_key[4 * (i) + 9]), 1) + +void twofish_decrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[4]) +{ u4byte t0, t1, blk[4]; + + u4byte *l_key = instance->l_key; + u4byte *mk_tab = instance->mk_tab; + + blk[0] = LE32(in_blk[0]) ^ l_key[4]; + blk[1] = LE32(in_blk[1]) ^ l_key[5]; + blk[2] = LE32(in_blk[2]) ^ l_key[6]; + blk[3] = LE32(in_blk[3]) ^ l_key[7]; + + i_rnd(7); i_rnd(6); i_rnd(5); i_rnd(4); + i_rnd(3); i_rnd(2); i_rnd(1); i_rnd(0); + + out_blk[0] = LE32(blk[2] ^ l_key[0]); + out_blk[1] = LE32(blk[3] ^ l_key[1]); + out_blk[2] = LE32(blk[0] ^ l_key[2]); + out_blk[3] = LE32(blk[1] ^ l_key[3]); +}; + +#else // TC_MINIMIZE_CODE_SIZE + +void twofish_decrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[4]) +{ u4byte t0, t1, blk[4]; + + u4byte *l_key = instance->l_key; +#ifdef TC_WINDOWS_BOOT_TWOFISH + u4byte *mk_tab = instance->mk_tab; +#endif + int i; + + blk[0] = LE32(in_blk[0]) ^ l_key[4]; + blk[1] = LE32(in_blk[1]) ^ l_key[5]; + blk[2] = LE32(in_blk[2]) ^ l_key[6]; + blk[3] = LE32(in_blk[3]) ^ l_key[7]; + + for (i = 7; i >= 0; --i) + { + t1 = g1_fun(blk[1]); t0 = g0_fun(blk[0]); + blk[2] = rotl(blk[2], 1) ^ (t0 + t1 + l_key[4 * (i) + 10]); + blk[3] = rotr(blk[3] ^ (t0 + 2 * t1 + l_key[4 * (i) + 11]), 1); + t1 = g1_fun(blk[3]); t0 = g0_fun(blk[2]); + blk[0] = rotl(blk[0], 1) ^ (t0 + t1 + l_key[4 * (i) + 8]); + blk[1] = rotr(blk[1] ^ (t0 + 2 * t1 + l_key[4 * (i) + 9]), 1); + } + + out_blk[0] = LE32(blk[2] ^ l_key[0]); + out_blk[1] = LE32(blk[3] ^ l_key[1]); + out_blk[2] = LE32(blk[0] ^ l_key[2]); + out_blk[3] = LE32(blk[1] ^ l_key[3]); +}; + +#endif // TC_MINIMIZE_CODE_SIZE diff --git a/src/Crypto/Twofish.h b/src/Crypto/Twofish.h index ed400257..1011608e 100644 --- a/src/Crypto/Twofish.h +++ b/src/Crypto/Twofish.h @@ -1,56 +1,56 @@ -#ifndef TWOFISH_H -#define TWOFISH_H - -#include "Common/Tcdefs.h" - -#if defined(__cplusplus) -extern "C" -{ -#endif - -#ifndef u4byte -#define u4byte unsigned __int32 -#endif -#ifndef u1byte -#define u1byte unsigned char -#endif - -#ifndef extract_byte -#define extract_byte(x,n) ((u1byte)((x) >> (8 * n))) -#endif - -#ifndef rotl - -#ifdef _WIN32 -#include -#pragma intrinsic(_lrotr,_lrotl) -#define rotr(x,n) _lrotr(x,n) -#define rotl(x,n) _lrotl(x,n) -#else -#define rotr(x,n) (((x)>>(n))|((x)<<(32-(n)))) -#define rotl(x,n) (((x)<<(n))|((x)>>(32-(n)))) -#endif - -#endif -typedef struct -{ - u4byte l_key[40]; - u4byte s_key[4]; -#if !defined (TC_MINIMIZE_CODE_SIZE) || defined (TC_WINDOWS_BOOT_TWOFISH) - u4byte mk_tab[4 * 256]; -#endif - u4byte k_len; -} TwofishInstance; - -#define TWOFISH_KS sizeof(TwofishInstance) - -/* in_key must be 32-bytes long */ -u4byte * twofish_set_key(TwofishInstance *instance, const u4byte in_key[]); -void twofish_encrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[]); -void twofish_decrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[4]); - -#if defined(__cplusplus) -} -#endif - -#endif // TWOFISH_H +#ifndef TWOFISH_H +#define TWOFISH_H + +#include "Common/Tcdefs.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#ifndef u4byte +#define u4byte unsigned __int32 +#endif +#ifndef u1byte +#define u1byte unsigned char +#endif + +#ifndef extract_byte +#define extract_byte(x,n) ((u1byte)((x) >> (8 * n))) +#endif + +#ifndef rotl + +#ifdef _WIN32 +#include +#pragma intrinsic(_lrotr,_lrotl) +#define rotr(x,n) _lrotr(x,n) +#define rotl(x,n) _lrotl(x,n) +#else +#define rotr(x,n) (((x)>>(n))|((x)<<(32-(n)))) +#define rotl(x,n) (((x)<<(n))|((x)>>(32-(n)))) +#endif + +#endif +typedef struct +{ + u4byte l_key[40]; + u4byte s_key[4]; +#if !defined (TC_MINIMIZE_CODE_SIZE) || defined (TC_WINDOWS_BOOT_TWOFISH) + u4byte mk_tab[4 * 256]; +#endif + u4byte k_len; +} TwofishInstance; + +#define TWOFISH_KS sizeof(TwofishInstance) + +/* in_key must be 32-bytes long */ +u4byte * twofish_set_key(TwofishInstance *instance, const u4byte in_key[]); +void twofish_encrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[]); +void twofish_decrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[4]); + +#if defined(__cplusplus) +} +#endif + +#endif // TWOFISH_H diff --git a/src/Crypto/Whirlpool.h b/src/Crypto/Whirlpool.h index df8aa7ac..9e771935 100644 --- a/src/Crypto/Whirlpool.h +++ b/src/Crypto/Whirlpool.h @@ -1,27 +1,27 @@ -#ifndef WHIRLPOOL_H -#define WHIRLPOOL_H 1 - -#include "Common/Tcdefs.h" -#include "config.h" - -typedef struct WHIRLPOOL_CTX { - uint64 countLo; - uint64 countHi; - CRYPTOPP_ALIGN_DATA(16) uint64 data[8]; - CRYPTOPP_ALIGN_DATA(16) uint64 state[8]; -} WHIRLPOOL_CTX; - -// ------------- -#if defined(__cplusplus) -extern "C" { -#endif - -void WHIRLPOOL_add(const unsigned char * source, unsigned __int32 sourceBits, WHIRLPOOL_CTX * const ctx); -void WHIRLPOOL_finalize(WHIRLPOOL_CTX* const ctx, unsigned char * result); -void WHIRLPOOL_init(WHIRLPOOL_CTX* const ctx); - -#if defined(__cplusplus) -} -#endif - -#endif /* WHIRLPOOL_H */ +#ifndef WHIRLPOOL_H +#define WHIRLPOOL_H 1 + +#include "Common/Tcdefs.h" +#include "config.h" + +typedef struct WHIRLPOOL_CTX { + uint64 countLo; + uint64 countHi; + CRYPTOPP_ALIGN_DATA(16) uint64 data[8]; + CRYPTOPP_ALIGN_DATA(16) uint64 state[8]; +} WHIRLPOOL_CTX; + +// ------------- +#if defined(__cplusplus) +extern "C" { +#endif + +void WHIRLPOOL_add(const unsigned char * source, unsigned __int32 sourceBits, WHIRLPOOL_CTX * const ctx); +void WHIRLPOOL_finalize(WHIRLPOOL_CTX* const ctx, unsigned char * result); +void WHIRLPOOL_init(WHIRLPOOL_CTX* const ctx); + +#if defined(__cplusplus) +} +#endif + +#endif /* WHIRLPOOL_H */ diff --git a/src/Crypto/cpu.c b/src/Crypto/cpu.c index 58a131af..4274a8ae 100644 --- a/src/Crypto/cpu.c +++ b/src/Crypto/cpu.c @@ -1,231 +1,231 @@ -/* cpu.c - written and placed in the public domain by Wei Dai */ - -#include "cpu.h" -#include "misc.h" - -#ifndef EXCEPTION_EXECUTE_HANDLER -#define EXCEPTION_EXECUTE_HANDLER 1 -#endif - -#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY -#include -#include -#endif - -#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE -#include -#endif - -#ifdef CRYPTOPP_CPUID_AVAILABLE - -#if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64 - -int CpuId(uint32 input, uint32 output[4]) -{ - __cpuid((int *)output, input); - return 1; -} - -#else - -#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY - -#if defined(__cplusplus) -extern "C" { -#endif - -typedef void (*SigHandler)(int); - -static jmp_buf s_jmpNoCPUID; -static void SigIllHandlerCPUID(int p) -{ - longjmp(s_jmpNoCPUID, 1); -} - -#if CRYPTOPP_BOOL_X64 == 0 -static jmp_buf s_jmpNoSSE2; -static void SigIllHandlerSSE2(int p) -{ - longjmp(s_jmpNoSSE2, 1); -} -#endif - -#if defined(__cplusplus) -} -#endif -#endif - -int CpuId(uint32 input, uint32 output[4]) -{ -#ifdef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY - __try - { - __asm - { - mov eax, input - mov ecx, 0 - cpuid - mov edi, output - mov [edi], eax - mov [edi+4], ebx - mov [edi+8], ecx - mov [edi+12], edx - } - } - __except (EXCEPTION_EXECUTE_HANDLER) - { - return 0; - } - - // function 0 returns the highest basic function understood in EAX - if(input == 0) - return !!output[0]? 1 : 0; - - return 1; -#else - // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 - // http://stackoverflow.com/q/7721854 - volatile int result = 1; - - SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID); - if (oldHandler == SIG_ERR) - result = 0; - - if (setjmp(s_jmpNoCPUID)) - result = 0; - else - { - asm volatile - ( - // save ebx in case -fPIC is being used - // TODO: this might need an early clobber on EDI. -#if CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 - "pushq %%rbx; cpuid; mov %%ebx, %%edi; popq %%rbx" -#else - "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx" -#endif - : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3]) - : "a" (input), "c" (0) - ); - } - - signal(SIGILL, oldHandler); - return result; -#endif -} - -#endif - -static int TrySSE2() -{ -#if CRYPTOPP_BOOL_X64 - return 1; -#elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) - __try - { -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE - AS2(por xmm0, xmm0) // executing SSE2 instruction -#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE - __m128i x = _mm_setzero_si128(); - return _mm_cvtsi128_si32(x) == 0 ? 1 : 0; -#endif - } - __except (EXCEPTION_EXECUTE_HANDLER) - { - return 0; - } - return 1; -#else - // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 - // http://stackoverflow.com/q/7721854 - volatile int result = 1; - - SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2); - if (oldHandler == SIG_ERR) - return 0; - - if (setjmp(s_jmpNoSSE2)) - result = 1; - else - { -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE - __asm __volatile ("por %xmm0, %xmm0"); -#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE - __m128i x = _mm_setzero_si128(); - result = _mm_cvtsi128_si32(x) == 0? 1 : 0; -#endif - } - - signal(SIGILL, oldHandler); - return result; -#endif -} - -int g_x86DetectionDone = 0; -int g_hasISSE = 0, g_hasSSE2 = 0, g_hasSSSE3 = 0, g_hasMMX = 0, g_hasAESNI = 0, g_hasCLMUL = 0, g_isP4 = 0; -uint32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; - -VC_INLINE int IsIntel(const uint32 output[4]) -{ - // This is the "GenuineIntel" string - return (output[1] /*EBX*/ == 0x756e6547) && - (output[2] /*ECX*/ == 0x6c65746e) && - (output[3] /*EDX*/ == 0x49656e69); -} - -VC_INLINE int IsAMD(const uint32 output[4]) -{ - // This is the "AuthenticAMD" string - return (output[1] /*EBX*/ == 0x68747541) && - (output[2] /*ECX*/ == 0x69746E65) && - (output[3] /*EDX*/ == 0x444D4163); -} - -void DetectX86Features() -{ - uint32 cpuid[4], cpuid1[4]; - if (!CpuId(0, cpuid)) - return; - if (!CpuId(1, cpuid1)) - return; - - g_hasMMX = (cpuid1[3] & (1 << 23)) != 0; - if ((cpuid1[3] & (1 << 26)) != 0) - g_hasSSE2 = TrySSE2(); - g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9)); - g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25)); - g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1)); - - if ((cpuid1[3] & (1 << 25)) != 0) - g_hasISSE = 1; - else - { - uint32 cpuid2[4]; - CpuId(0x080000000, cpuid2); - if (cpuid2[0] >= 0x080000001) - { - CpuId(0x080000001, cpuid2); - g_hasISSE = (cpuid2[3] & (1 << 22)) != 0; - } - } - - if (IsIntel(cpuid)) - { - g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf; - g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1); - } - else if (IsAMD(cpuid)) - { - CpuId(0x80000005, cpuid); - g_cacheLineSize = GETBYTE(cpuid[2], 0); - } - - if (!g_cacheLineSize) - g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; - - *((volatile int*)&g_x86DetectionDone) = 1; -} - -#endif +/* cpu.c - written and placed in the public domain by Wei Dai */ + +#include "cpu.h" +#include "misc.h" + +#ifndef EXCEPTION_EXECUTE_HANDLER +#define EXCEPTION_EXECUTE_HANDLER 1 +#endif + +#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY +#include +#include +#endif + +#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE +#include +#endif + +#ifdef CRYPTOPP_CPUID_AVAILABLE + +#if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64 + +int CpuId(uint32 input, uint32 output[4]) +{ + __cpuid((int *)output, input); + return 1; +} + +#else + +#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY + +#if defined(__cplusplus) +extern "C" { +#endif + +typedef void (*SigHandler)(int); + +static jmp_buf s_jmpNoCPUID; +static void SigIllHandlerCPUID(int p) +{ + longjmp(s_jmpNoCPUID, 1); +} + +#if CRYPTOPP_BOOL_X64 == 0 +static jmp_buf s_jmpNoSSE2; +static void SigIllHandlerSSE2(int p) +{ + longjmp(s_jmpNoSSE2, 1); +} +#endif + +#if defined(__cplusplus) +} +#endif +#endif + +int CpuId(uint32 input, uint32 output[4]) +{ +#ifdef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY + __try + { + __asm + { + mov eax, input + mov ecx, 0 + cpuid + mov edi, output + mov [edi], eax + mov [edi+4], ebx + mov [edi+8], ecx + mov [edi+12], edx + } + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + return 0; + } + + // function 0 returns the highest basic function understood in EAX + if(input == 0) + return !!output[0]? 1 : 0; + + return 1; +#else + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 + // http://stackoverflow.com/q/7721854 + volatile int result = 1; + + SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID); + if (oldHandler == SIG_ERR) + result = 0; + + if (setjmp(s_jmpNoCPUID)) + result = 0; + else + { + asm volatile + ( + // save ebx in case -fPIC is being used + // TODO: this might need an early clobber on EDI. +#if CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 + "pushq %%rbx; cpuid; mov %%ebx, %%edi; popq %%rbx" +#else + "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx" +#endif + : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3]) + : "a" (input), "c" (0) + ); + } + + signal(SIGILL, oldHandler); + return result; +#endif +} + +#endif + +static int TrySSE2() +{ +#if CRYPTOPP_BOOL_X64 + return 1; +#elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + __try + { +#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE + AS2(por xmm0, xmm0) // executing SSE2 instruction +#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE + __m128i x = _mm_setzero_si128(); + return _mm_cvtsi128_si32(x) == 0 ? 1 : 0; +#endif + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + return 0; + } + return 1; +#else + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 + // http://stackoverflow.com/q/7721854 + volatile int result = 1; + + SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2); + if (oldHandler == SIG_ERR) + return 0; + + if (setjmp(s_jmpNoSSE2)) + result = 1; + else + { +#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE + __asm __volatile ("por %xmm0, %xmm0"); +#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE + __m128i x = _mm_setzero_si128(); + result = _mm_cvtsi128_si32(x) == 0? 1 : 0; +#endif + } + + signal(SIGILL, oldHandler); + return result; +#endif +} + +int g_x86DetectionDone = 0; +int g_hasISSE = 0, g_hasSSE2 = 0, g_hasSSSE3 = 0, g_hasMMX = 0, g_hasAESNI = 0, g_hasCLMUL = 0, g_isP4 = 0; +uint32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; + +VC_INLINE int IsIntel(const uint32 output[4]) +{ + // This is the "GenuineIntel" string + return (output[1] /*EBX*/ == 0x756e6547) && + (output[2] /*ECX*/ == 0x6c65746e) && + (output[3] /*EDX*/ == 0x49656e69); +} + +VC_INLINE int IsAMD(const uint32 output[4]) +{ + // This is the "AuthenticAMD" string + return (output[1] /*EBX*/ == 0x68747541) && + (output[2] /*ECX*/ == 0x69746E65) && + (output[3] /*EDX*/ == 0x444D4163); +} + +void DetectX86Features() +{ + uint32 cpuid[4], cpuid1[4]; + if (!CpuId(0, cpuid)) + return; + if (!CpuId(1, cpuid1)) + return; + + g_hasMMX = (cpuid1[3] & (1 << 23)) != 0; + if ((cpuid1[3] & (1 << 26)) != 0) + g_hasSSE2 = TrySSE2(); + g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9)); + g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25)); + g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1)); + + if ((cpuid1[3] & (1 << 25)) != 0) + g_hasISSE = 1; + else + { + uint32 cpuid2[4]; + CpuId(0x080000000, cpuid2); + if (cpuid2[0] >= 0x080000001) + { + CpuId(0x080000001, cpuid2); + g_hasISSE = (cpuid2[3] & (1 << 22)) != 0; + } + } + + if (IsIntel(cpuid)) + { + g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf; + g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1); + } + else if (IsAMD(cpuid)) + { + CpuId(0x80000005, cpuid); + g_cacheLineSize = GETBYTE(cpuid[2], 0); + } + + if (!g_cacheLineSize) + g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; + + *((volatile int*)&g_x86DetectionDone) = 1; +} + +#endif diff --git a/src/Crypto/cpu.h b/src/Crypto/cpu.h index da8d14cb..7ef509ec 100644 --- a/src/Crypto/cpu.h +++ b/src/Crypto/cpu.h @@ -1,308 +1,308 @@ -#ifndef CRYPTOPP_CPU_H -#define CRYPTOPP_CPU_H - -#include "Common/Tcdefs.h" -#include "config.h" - -#ifdef CRYPTOPP_GENERATE_X64_MASM - -#define CRYPTOPP_X86_ASM_AVAILABLE -#define CRYPTOPP_BOOL_X64 1 -#define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 1 - -#else - -#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE -#include -#endif - -#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE -#if defined(__SSSE3__) || defined(__INTEL_COMPILER) -#ifdef TC_WINDOWS_DRIVER -extern __m128i _mm_shuffle_epi8 (__m128i a, __m128i b); -#else -#include -#endif -#endif - -#if defined(__SSE4_1__) || defined(__INTEL_COMPILER) -#ifdef TC_WINDOWS_DRIVER -extern int _mm_extract_epi32(__m128i src, const int ndx); -extern __m128i _mm_insert_epi32(__m128i dst, int s, const int ndx); -#else -#include -#endif -#endif - -#if (defined(__AES__) && defined(__PCLMUL__)) || defined(__INTEL_COMPILER) -#ifdef TC_WINDOWS_DRIVER -extern __m128i _mm_clmulepi64_si128(__m128i v1, __m128i v2, - const int imm8); -extern __m128i _mm_aeskeygenassist_si128(__m128i ckey, const int rcon); -extern __m128i _mm_aesimc_si128(__m128i v); -extern __m128i _mm_aesenc_si128(__m128i v, __m128i rkey); -extern __m128i _mm_aesenclast_si128(__m128i v, __m128i rkey); -extern __m128i _mm_aesdec_si128(__m128i v, __m128i rkey); -extern __m128i _mm_aesdeclast_si128(__m128i v, __m128i rkey); -#else -#include -#endif -#endif -#endif - -#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 - -#define CRYPTOPP_CPUID_AVAILABLE - -#if defined(__cplusplus) -extern "C" { -#endif - -// these should not be used directly -extern int g_x86DetectionDone; -extern int g_hasSSSE3; -extern int g_hasAESNI; -extern int g_hasCLMUL; -extern int g_isP4; -extern uint32 g_cacheLineSize; -void DetectX86Features(); // must be called at the start of the program/driver -int CpuId(uint32 input, uint32 *output); - -#if CRYPTOPP_BOOL_X64 -#define HasSSE2() 1 -#define HasISSE() 1 -#define HasMMX() 1 -#else - -extern int g_hasSSE2; -extern int g_hasISSE; -extern int g_hasMMX; - -#define HasSSE2() g_hasSSE2 -#define HasISSE() g_hasISSE -#define HasMMX() g_hasMMX - -#endif - -#define HasSSSE3() g_hasSSSE3 -#define HasAESNI() g_hasAESNI -#define HasCLMUL() g_hasCLMUL -#define IsP4() g_isP4 -#define GetCacheLineSize() g_cacheLineSize - -#if defined(__cplusplus) -} -#endif - -#else - -#define GetCacheLineSize() CRYPTOPP_L1_CACHE_LINE_SIZE - -#endif - -#endif - -#ifdef CRYPTOPP_GENERATE_X64_MASM - #define AS1(x) x*newline* - #define AS2(x, y) x, y*newline* - #define AS3(x, y, z) x, y, z*newline* - #define ASS(x, y, a, b, c, d) x, y, a*64+b*16+c*4+d*newline* - #define ASL(x) label##x:*newline* - #define ASJ(x, y, z) x label##y*newline* - #define ASC(x, y) x label##y*newline* - #define AS_HEX(y) 0##y##h -#elif defined(_MSC_VER) || defined(__BORLANDC__) - #define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY - #define AS1(x) __asm {x} - #define AS2(x, y) __asm {x, y} - #define AS3(x, y, z) __asm {x, y, z} - #define ASS(x, y, a, b, c, d) __asm {x, y, (a)*64+(b)*16+(c)*4+(d)} - #define ASL(x) __asm {label##x:} - #define ASJ(x, y, z) __asm {x label##y} - #define ASC(x, y) __asm {x label##y} - #define CRYPTOPP_NAKED __declspec(naked) - #define AS_HEX(y) 0x##y -#else - #define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY - - #if defined(CRYPTOPP_CLANG_VERSION) || defined(CRYPTOPP_APPLE_CLANG_VERSION) - #define NEW_LINE "\n" - #define INTEL_PREFIX ".intel_syntax;" - #define INTEL_NOPREFIX ".intel_syntax;" - #define ATT_PREFIX ".att_syntax;" - #define ATT_NOPREFIX ".att_syntax;" - #else - #define NEW_LINE - #define INTEL_PREFIX ".intel_syntax prefix;" - #define INTEL_NOPREFIX ".intel_syntax noprefix;" - #define ATT_PREFIX ".att_syntax prefix;" - #define ATT_NOPREFIX ".att_syntax noprefix;" - #endif - - // define these in two steps to allow arguments to be expanded - #define GNU_AS1(x) #x ";" NEW_LINE - #define GNU_AS2(x, y) #x ", " #y ";" NEW_LINE - #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";" NEW_LINE - #define GNU_ASL(x) "\n" #x ":" NEW_LINE - #define GNU_ASJ(x, y, z) #x " " #y #z ";" NEW_LINE - #define AS1(x) GNU_AS1(x) - #define AS2(x, y) GNU_AS2(x, y) - #define AS3(x, y, z) GNU_AS3(x, y, z) - #define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";" - #define ASL(x) GNU_ASL(x) - #define ASJ(x, y, z) GNU_ASJ(x, y, z) - #define ASC(x, y) #x " " #y ";" - #define CRYPTOPP_NAKED - #define AS_HEX(y) 0x##y -#endif - -#define IF0(y) -#define IF1(y) y - -// Should be confined to GCC, but its used to help manage Clang 3.4 compiler error. -// Also see LLVM Bug 24232, http://llvm.org/bugs/show_bug.cgi?id=24232 . -#ifndef INTEL_PREFIX -#define INTEL_PREFIX -#endif -#ifndef INTEL_NOPREFIX -#define INTEL_NOPREFIX -#endif -#ifndef ATT_PREFIX -#define ATT_PREFIX -#endif -#ifndef ATT_NOPREFIX -#define ATT_NOPREFIX -#endif - -#ifdef CRYPTOPP_GENERATE_X64_MASM -#define ASM_MOD(x, y) ((x) MOD (y)) -#define XMMWORD_PTR XMMWORD PTR -#else -// GNU assembler doesn't seem to have mod operator -#define ASM_MOD(x, y) ((x)-((x)/(y))*(y)) -// GAS 2.15 doesn't support XMMWORD PTR. it seems necessary only for MASM -#define XMMWORD_PTR -#endif - -#if CRYPTOPP_BOOL_X86 - #define AS_REG_1 ecx - #define AS_REG_2 edx - #define AS_REG_3 esi - #define AS_REG_4 edi - #define AS_REG_5 eax - #define AS_REG_6 ebx - #define AS_REG_7 ebp - #define AS_REG_1d ecx - #define AS_REG_2d edx - #define AS_REG_3d esi - #define AS_REG_4d edi - #define AS_REG_5d eax - #define AS_REG_6d ebx - #define AS_REG_7d ebp - #define WORD_SZ 4 - #define WORD_REG(x) e##x - #define WORD_PTR DWORD PTR - #define AS_PUSH_IF86(x) AS1(push e##x) - #define AS_POP_IF86(x) AS1(pop e##x) - #define AS_JCXZ jecxz -#elif CRYPTOPP_BOOL_X32 - #define AS_REG_1 ecx - #define AS_REG_2 edx - #define AS_REG_3 r8d - #define AS_REG_4 r9d - #define AS_REG_5 eax - #define AS_REG_6 r10d - #define AS_REG_7 r11d - #define AS_REG_1d ecx - #define AS_REG_2d edx - #define AS_REG_3d r8d - #define AS_REG_4d r9d - #define AS_REG_5d eax - #define AS_REG_6d r10d - #define AS_REG_7d r11d - #define WORD_SZ 4 - #define WORD_REG(x) e##x - #define WORD_PTR DWORD PTR - #define AS_PUSH_IF86(x) AS1(push r##x) - #define AS_POP_IF86(x) AS1(pop r##x) - #define AS_JCXZ jecxz -#elif CRYPTOPP_BOOL_X64 - #ifdef CRYPTOPP_GENERATE_X64_MASM - #define AS_REG_1 rcx - #define AS_REG_2 rdx - #define AS_REG_3 r8 - #define AS_REG_4 r9 - #define AS_REG_5 rax - #define AS_REG_6 r10 - #define AS_REG_7 r11 - #define AS_REG_1d ecx - #define AS_REG_2d edx - #define AS_REG_3d r8d - #define AS_REG_4d r9d - #define AS_REG_5d eax - #define AS_REG_6d r10d - #define AS_REG_7d r11d - #else - #define AS_REG_1 rdi - #define AS_REG_2 rsi - #define AS_REG_3 rdx - #define AS_REG_4 rcx - #define AS_REG_5 r8 - #define AS_REG_6 r9 - #define AS_REG_7 r10 - #define AS_REG_1d edi - #define AS_REG_2d esi - #define AS_REG_3d edx - #define AS_REG_4d ecx - #define AS_REG_5d r8d - #define AS_REG_6d r9d - #define AS_REG_7d r10d - #endif - #define WORD_SZ 8 - #define WORD_REG(x) r##x - #define WORD_PTR QWORD PTR - #define AS_PUSH_IF86(x) - #define AS_POP_IF86(x) - #define AS_JCXZ jrcxz -#endif - -// helper macro for stream cipher output -#define AS_XMM_OUTPUT4(labelPrefix, inputPtr, outputPtr, x0, x1, x2, x3, t, p0, p1, p2, p3, increment)\ - AS2( test inputPtr, inputPtr)\ - ASC( jz, labelPrefix##3)\ - AS2( test inputPtr, 15)\ - ASC( jnz, labelPrefix##7)\ - AS2( pxor xmm##x0, [inputPtr+p0*16])\ - AS2( pxor xmm##x1, [inputPtr+p1*16])\ - AS2( pxor xmm##x2, [inputPtr+p2*16])\ - AS2( pxor xmm##x3, [inputPtr+p3*16])\ - AS2( add inputPtr, increment*16)\ - ASC( jmp, labelPrefix##3)\ - ASL(labelPrefix##7)\ - AS2( movdqu xmm##t, [inputPtr+p0*16])\ - AS2( pxor xmm##x0, xmm##t)\ - AS2( movdqu xmm##t, [inputPtr+p1*16])\ - AS2( pxor xmm##x1, xmm##t)\ - AS2( movdqu xmm##t, [inputPtr+p2*16])\ - AS2( pxor xmm##x2, xmm##t)\ - AS2( movdqu xmm##t, [inputPtr+p3*16])\ - AS2( pxor xmm##x3, xmm##t)\ - AS2( add inputPtr, increment*16)\ - ASL(labelPrefix##3)\ - AS2( test outputPtr, 15)\ - ASC( jnz, labelPrefix##8)\ - AS2( movdqa [outputPtr+p0*16], xmm##x0)\ - AS2( movdqa [outputPtr+p1*16], xmm##x1)\ - AS2( movdqa [outputPtr+p2*16], xmm##x2)\ - AS2( movdqa [outputPtr+p3*16], xmm##x3)\ - ASC( jmp, labelPrefix##9)\ - ASL(labelPrefix##8)\ - AS2( movdqu [outputPtr+p0*16], xmm##x0)\ - AS2( movdqu [outputPtr+p1*16], xmm##x1)\ - AS2( movdqu [outputPtr+p2*16], xmm##x2)\ - AS2( movdqu [outputPtr+p3*16], xmm##x3)\ - ASL(labelPrefix##9)\ - AS2( add outputPtr, increment*16) - - -#endif +#ifndef CRYPTOPP_CPU_H +#define CRYPTOPP_CPU_H + +#include "Common/Tcdefs.h" +#include "config.h" + +#ifdef CRYPTOPP_GENERATE_X64_MASM + +#define CRYPTOPP_X86_ASM_AVAILABLE +#define CRYPTOPP_BOOL_X64 1 +#define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 1 + +#else + +#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE +#include +#endif + +#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE +#if defined(__SSSE3__) || defined(__INTEL_COMPILER) +#ifdef TC_WINDOWS_DRIVER +extern __m128i _mm_shuffle_epi8 (__m128i a, __m128i b); +#else +#include +#endif +#endif + +#if defined(__SSE4_1__) || defined(__INTEL_COMPILER) +#ifdef TC_WINDOWS_DRIVER +extern int _mm_extract_epi32(__m128i src, const int ndx); +extern __m128i _mm_insert_epi32(__m128i dst, int s, const int ndx); +#else +#include +#endif +#endif + +#if (defined(__AES__) && defined(__PCLMUL__)) || defined(__INTEL_COMPILER) +#ifdef TC_WINDOWS_DRIVER +extern __m128i _mm_clmulepi64_si128(__m128i v1, __m128i v2, + const int imm8); +extern __m128i _mm_aeskeygenassist_si128(__m128i ckey, const int rcon); +extern __m128i _mm_aesimc_si128(__m128i v); +extern __m128i _mm_aesenc_si128(__m128i v, __m128i rkey); +extern __m128i _mm_aesenclast_si128(__m128i v, __m128i rkey); +extern __m128i _mm_aesdec_si128(__m128i v, __m128i rkey); +extern __m128i _mm_aesdeclast_si128(__m128i v, __m128i rkey); +#else +#include +#endif +#endif +#endif + +#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 + +#define CRYPTOPP_CPUID_AVAILABLE + +#if defined(__cplusplus) +extern "C" { +#endif + +// these should not be used directly +extern int g_x86DetectionDone; +extern int g_hasSSSE3; +extern int g_hasAESNI; +extern int g_hasCLMUL; +extern int g_isP4; +extern uint32 g_cacheLineSize; +void DetectX86Features(); // must be called at the start of the program/driver +int CpuId(uint32 input, uint32 *output); + +#if CRYPTOPP_BOOL_X64 +#define HasSSE2() 1 +#define HasISSE() 1 +#define HasMMX() 1 +#else + +extern int g_hasSSE2; +extern int g_hasISSE; +extern int g_hasMMX; + +#define HasSSE2() g_hasSSE2 +#define HasISSE() g_hasISSE +#define HasMMX() g_hasMMX + +#endif + +#define HasSSSE3() g_hasSSSE3 +#define HasAESNI() g_hasAESNI +#define HasCLMUL() g_hasCLMUL +#define IsP4() g_isP4 +#define GetCacheLineSize() g_cacheLineSize + +#if defined(__cplusplus) +} +#endif + +#else + +#define GetCacheLineSize() CRYPTOPP_L1_CACHE_LINE_SIZE + +#endif + +#endif + +#ifdef CRYPTOPP_GENERATE_X64_MASM + #define AS1(x) x*newline* + #define AS2(x, y) x, y*newline* + #define AS3(x, y, z) x, y, z*newline* + #define ASS(x, y, a, b, c, d) x, y, a*64+b*16+c*4+d*newline* + #define ASL(x) label##x:*newline* + #define ASJ(x, y, z) x label##y*newline* + #define ASC(x, y) x label##y*newline* + #define AS_HEX(y) 0##y##h +#elif defined(_MSC_VER) || defined(__BORLANDC__) + #define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY + #define AS1(x) __asm {x} + #define AS2(x, y) __asm {x, y} + #define AS3(x, y, z) __asm {x, y, z} + #define ASS(x, y, a, b, c, d) __asm {x, y, (a)*64+(b)*16+(c)*4+(d)} + #define ASL(x) __asm {label##x:} + #define ASJ(x, y, z) __asm {x label##y} + #define ASC(x, y) __asm {x label##y} + #define CRYPTOPP_NAKED __declspec(naked) + #define AS_HEX(y) 0x##y +#else + #define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY + + #if defined(CRYPTOPP_CLANG_VERSION) || defined(CRYPTOPP_APPLE_CLANG_VERSION) + #define NEW_LINE "\n" + #define INTEL_PREFIX ".intel_syntax;" + #define INTEL_NOPREFIX ".intel_syntax;" + #define ATT_PREFIX ".att_syntax;" + #define ATT_NOPREFIX ".att_syntax;" + #else + #define NEW_LINE + #define INTEL_PREFIX ".intel_syntax prefix;" + #define INTEL_NOPREFIX ".intel_syntax noprefix;" + #define ATT_PREFIX ".att_syntax prefix;" + #define ATT_NOPREFIX ".att_syntax noprefix;" + #endif + + // define these in two steps to allow arguments to be expanded + #define GNU_AS1(x) #x ";" NEW_LINE + #define GNU_AS2(x, y) #x ", " #y ";" NEW_LINE + #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";" NEW_LINE + #define GNU_ASL(x) "\n" #x ":" NEW_LINE + #define GNU_ASJ(x, y, z) #x " " #y #z ";" NEW_LINE + #define AS1(x) GNU_AS1(x) + #define AS2(x, y) GNU_AS2(x, y) + #define AS3(x, y, z) GNU_AS3(x, y, z) + #define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";" + #define ASL(x) GNU_ASL(x) + #define ASJ(x, y, z) GNU_ASJ(x, y, z) + #define ASC(x, y) #x " " #y ";" + #define CRYPTOPP_NAKED + #define AS_HEX(y) 0x##y +#endif + +#define IF0(y) +#define IF1(y) y + +// Should be confined to GCC, but its used to help manage Clang 3.4 compiler error. +// Also see LLVM Bug 24232, http://llvm.org/bugs/show_bug.cgi?id=24232 . +#ifndef INTEL_PREFIX +#define INTEL_PREFIX +#endif +#ifndef INTEL_NOPREFIX +#define INTEL_NOPREFIX +#endif +#ifndef ATT_PREFIX +#define ATT_PREFIX +#endif +#ifndef ATT_NOPREFIX +#define ATT_NOPREFIX +#endif + +#ifdef CRYPTOPP_GENERATE_X64_MASM +#define ASM_MOD(x, y) ((x) MOD (y)) +#define XMMWORD_PTR XMMWORD PTR +#else +// GNU assembler doesn't seem to have mod operator +#define ASM_MOD(x, y) ((x)-((x)/(y))*(y)) +// GAS 2.15 doesn't support XMMWORD PTR. it seems necessary only for MASM +#define XMMWORD_PTR +#endif + +#if CRYPTOPP_BOOL_X86 + #define AS_REG_1 ecx + #define AS_REG_2 edx + #define AS_REG_3 esi + #define AS_REG_4 edi + #define AS_REG_5 eax + #define AS_REG_6 ebx + #define AS_REG_7 ebp + #define AS_REG_1d ecx + #define AS_REG_2d edx + #define AS_REG_3d esi + #define AS_REG_4d edi + #define AS_REG_5d eax + #define AS_REG_6d ebx + #define AS_REG_7d ebp + #define WORD_SZ 4 + #define WORD_REG(x) e##x + #define WORD_PTR DWORD PTR + #define AS_PUSH_IF86(x) AS1(push e##x) + #define AS_POP_IF86(x) AS1(pop e##x) + #define AS_JCXZ jecxz +#elif CRYPTOPP_BOOL_X32 + #define AS_REG_1 ecx + #define AS_REG_2 edx + #define AS_REG_3 r8d + #define AS_REG_4 r9d + #define AS_REG_5 eax + #define AS_REG_6 r10d + #define AS_REG_7 r11d + #define AS_REG_1d ecx + #define AS_REG_2d edx + #define AS_REG_3d r8d + #define AS_REG_4d r9d + #define AS_REG_5d eax + #define AS_REG_6d r10d + #define AS_REG_7d r11d + #define WORD_SZ 4 + #define WORD_REG(x) e##x + #define WORD_PTR DWORD PTR + #define AS_PUSH_IF86(x) AS1(push r##x) + #define AS_POP_IF86(x) AS1(pop r##x) + #define AS_JCXZ jecxz +#elif CRYPTOPP_BOOL_X64 + #ifdef CRYPTOPP_GENERATE_X64_MASM + #define AS_REG_1 rcx + #define AS_REG_2 rdx + #define AS_REG_3 r8 + #define AS_REG_4 r9 + #define AS_REG_5 rax + #define AS_REG_6 r10 + #define AS_REG_7 r11 + #define AS_REG_1d ecx + #define AS_REG_2d edx + #define AS_REG_3d r8d + #define AS_REG_4d r9d + #define AS_REG_5d eax + #define AS_REG_6d r10d + #define AS_REG_7d r11d + #else + #define AS_REG_1 rdi + #define AS_REG_2 rsi + #define AS_REG_3 rdx + #define AS_REG_4 rcx + #define AS_REG_5 r8 + #define AS_REG_6 r9 + #define AS_REG_7 r10 + #define AS_REG_1d edi + #define AS_REG_2d esi + #define AS_REG_3d edx + #define AS_REG_4d ecx + #define AS_REG_5d r8d + #define AS_REG_6d r9d + #define AS_REG_7d r10d + #endif + #define WORD_SZ 8 + #define WORD_REG(x) r##x + #define WORD_PTR QWORD PTR + #define AS_PUSH_IF86(x) + #define AS_POP_IF86(x) + #define AS_JCXZ jrcxz +#endif + +// helper macro for stream cipher output +#define AS_XMM_OUTPUT4(labelPrefix, inputPtr, outputPtr, x0, x1, x2, x3, t, p0, p1, p2, p3, increment)\ + AS2( test inputPtr, inputPtr)\ + ASC( jz, labelPrefix##3)\ + AS2( test inputPtr, 15)\ + ASC( jnz, labelPrefix##7)\ + AS2( pxor xmm##x0, [inputPtr+p0*16])\ + AS2( pxor xmm##x1, [inputPtr+p1*16])\ + AS2( pxor xmm##x2, [inputPtr+p2*16])\ + AS2( pxor xmm##x3, [inputPtr+p3*16])\ + AS2( add inputPtr, increment*16)\ + ASC( jmp, labelPrefix##3)\ + ASL(labelPrefix##7)\ + AS2( movdqu xmm##t, [inputPtr+p0*16])\ + AS2( pxor xmm##x0, xmm##t)\ + AS2( movdqu xmm##t, [inputPtr+p1*16])\ + AS2( pxor xmm##x1, xmm##t)\ + AS2( movdqu xmm##t, [inputPtr+p2*16])\ + AS2( pxor xmm##x2, xmm##t)\ + AS2( movdqu xmm##t, [inputPtr+p3*16])\ + AS2( pxor xmm##x3, xmm##t)\ + AS2( add inputPtr, increment*16)\ + ASL(labelPrefix##3)\ + AS2( test outputPtr, 15)\ + ASC( jnz, labelPrefix##8)\ + AS2( movdqa [outputPtr+p0*16], xmm##x0)\ + AS2( movdqa [outputPtr+p1*16], xmm##x1)\ + AS2( movdqa [outputPtr+p2*16], xmm##x2)\ + AS2( movdqa [outputPtr+p3*16], xmm##x3)\ + ASC( jmp, labelPrefix##9)\ + ASL(labelPrefix##8)\ + AS2( movdqu [outputPtr+p0*16], xmm##x0)\ + AS2( movdqu [outputPtr+p1*16], xmm##x1)\ + AS2( movdqu [outputPtr+p2*16], xmm##x2)\ + AS2( movdqu [outputPtr+p3*16], xmm##x3)\ + ASL(labelPrefix##9)\ + AS2( add outputPtr, increment*16) + + +#endif -- cgit v1.2.3