From 263abeee3a8c97e98fec49ee0ce628d6c5c5df50 Mon Sep 17 00:00:00 2001 From: Mounir IDRASSI Date: Mon, 28 Nov 2016 00:29:36 +0100 Subject: Crypto: Add optimized Twofish assembly implementation for x86_64. --- src/Build/Include/Makefile.inc | 4 + src/Common/Crypto.c | 13 + src/Crypto/Crypto.vcxproj | 11 + src/Crypto/Crypto.vcxproj.filters | 3 + src/Crypto/Makefile.inc | 6 + src/Crypto/Sources | 4 +- src/Crypto/Twofish.c | 1056 ++++++++++++++++++++++--------------- src/Crypto/Twofish.h | 20 +- src/Crypto/Twofish_x64.S | 314 +++++++++++ src/Crypto/Twofish_x86.S | 0 src/Volume/Cipher.cpp | 33 ++ src/Volume/Cipher.h | 3 +- src/Volume/Volume.make | 5 + 13 files changed, 1050 insertions(+), 422 deletions(-) create mode 100644 src/Crypto/Twofish_x64.S create mode 100644 src/Crypto/Twofish_x86.S diff --git a/src/Build/Include/Makefile.inc b/src/Build/Include/Makefile.inc index 35d1f29c..9a38dcee 100644 --- a/src/Build/Include/Makefile.inc +++ b/src/Build/Include/Makefile.inc @@ -23,6 +23,10 @@ clean: %.o: %.cpp @echo Compiling $(false + + + true + true + Document + echo %(Filename)%(Extension) & yasm.exe -p gas -D WINABI -f win64 -o "$(TargetDir)\%(Filename).obj" -l "$(TargetDir)\%(Filename).lst" "%(FullPath)" + $(TargetDir)\%(Filename).obj;%(Outputs) + echo %(Filename)%(Extension) & yasm.exe -p gas -D WINABI -f win64 -o "$(TargetDir)\%(Filename).obj" -l "$(TargetDir)\%(Filename).lst" "%(FullPath)" + $(TargetDir)\%(Filename).obj;%(Outputs) + + diff --git a/src/Crypto/Crypto.vcxproj.filters b/src/Crypto/Crypto.vcxproj.filters index ad933b0a..702dedbf 100644 --- a/src/Crypto/Crypto.vcxproj.filters +++ b/src/Crypto/Crypto.vcxproj.filters @@ -121,5 +121,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/src/Crypto/Makefile.inc b/src/Crypto/Makefile.inc index bd5b80d9..e014976a 100644 --- a/src/Crypto/Makefile.inc +++ b/src/Crypto/Makefile.inc @@ -1,9 +1,12 @@ TC_ASFLAGS = -Xvc -Ox +VC_YASMFLAGS = -Xvc -p gas -D WINABI !if "$(TC_ARCH)" == "x86" TC_ASFLAGS = $(TC_ASFLAGS) -f win32 --prefix _ -D MS_STDCALL -D DLL_EXPORT +VC_YASMFLAGS = $(VC_YASMFLAGS) -f win32 !else TC_ASFLAGS = $(TC_ASFLAGS) -f win64 +VC_YASMFLAGS = $(VC_YASMFLAGS) -f win64 !endif TC_ASM_ERR_LOG = ..\Driver\build_errors_asm.log @@ -16,3 +19,6 @@ TC_ASM_ERR_LOG = ..\Driver\build_errors_asm.log "$(OBJ_PATH)\$(O)\Aes_hw_cpu.obj": Aes_hw_cpu.asm nasm.exe $(TC_ASFLAGS) -o "$@" -l "$(OBJ_PATH)\$(O)\Aes_hw_cpu.lst" Aes_hw_cpu.asm 2>$(TC_ASM_ERR_LOG) + +"$(OBJ_PATH)\$(O)\Twofish_$(TC_ARCH).obj": Twofish_$(TC_ARCH).S + yasm.exe $(VC_YASMFLAGS) -o "$@" -l "$(OBJ_PATH)\$(O)\Twofish_$(TC_ARCH).lst" Twofish_$(TC_ARCH).S 2>$(TC_ASM_ERR_LOG) diff --git a/src/Crypto/Sources b/src/Crypto/Sources index 07e66b71..edddd4c6 100644 --- a/src/Crypto/Sources +++ b/src/Crypto/Sources @@ -6,7 +6,8 @@ INCLUDES = .. NTTARGETFILES = \ "$(OBJ_PATH)\$(O)\Aes_$(TC_ARCH).obj" \ "$(OBJ_PATH)\$(O)\Aes_hw_cpu.obj" \ - "$(OBJ_PATH)\$(O)\gost89_$(TC_ARCH).obj" + "$(OBJ_PATH)\$(O)\gost89_$(TC_ARCH).obj" \ + "$(OBJ_PATH)\$(O)\Twofish_$(TC_ARCH).obj" SOURCES = \ Aes_$(TC_ARCH).asm \ @@ -20,6 +21,7 @@ SOURCES = \ SerpentFast_simd.cpp \ Sha2.c \ Twofish.c \ + Twofish_$(TC_ARCH).S \ GostCipher.c \ Streebog.c \ kuznyechik.c \ diff --git a/src/Crypto/Twofish.c b/src/Crypto/Twofish.c index a04f36e9..e6ba86c1 100644 --- a/src/Crypto/Twofish.c +++ b/src/Crypto/Twofish.c @@ -44,354 +44,608 @@ #include "misc.h" -/* Implementation based on ideas from Botan library - (C) 1999-2007 Jack Lloyd +/* C implementation based on code written by kerukuro for cppcrypto library + (http://cppcrypto.sourceforge.net/) and released into public domain. + With ideas from Botan library (C) 1999-2007 Jack Lloyd Botan is released under the Simplified BSD License (see license.txt) */ -#define get_byte(i,n) (byte) (n >> (((~i)&3) << 3)) - -static const byte Q0[256] = { - 0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78, - 0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C, - 0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30, - 0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82, - 0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE, - 0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B, - 0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45, - 0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7, - 0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF, - 0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8, - 0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED, - 0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90, - 0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B, - 0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B, - 0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F, - 0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A, - 0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17, - 0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72, - 0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68, - 0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4, - 0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42, - 0x4A, 0x5E, 0xC1, 0xE0 }; - -static const byte Q1[256] = { - 0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B, - 0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1, - 0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B, - 0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5, - 0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54, - 0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96, - 0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7, - 0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8, - 0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF, - 0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9, - 0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D, - 0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E, - 0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21, - 0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01, - 0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E, - 0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64, - 0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44, - 0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E, - 0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B, - 0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9, - 0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56, - 0x55, 0x09, 0xBE, 0x91 }; - -static const byte RS[32] = { - 0x01, 0xA4, 0x02, 0xA4, 0xA4, 0x56, 0xA1, 0x55, 0x55, 0x82, 0xFC, 0x87, - 0x87, 0xF3, 0xC1, 0x5A, 0x5A, 0x1E, 0x47, 0x58, 0x58, 0xC6, 0xAE, 0xDB, - 0xDB, 0x68, 0x3D, 0x9E, 0x9E, 0xE5, 0x19, 0x03 }; - -static const byte EXP_TO_POLY[255] = { - 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2, - 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03, - 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6, - 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A, - 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63, - 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C, - 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07, - 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88, - 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12, - 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7, - 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C, - 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8, - 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25, - 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A, - 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE, - 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC, - 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E, - 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92, - 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89, - 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, 0xDB, 0xFB, 0xBB, - 0x3B, 0x76, 0xEC, 0x95, 0x67, 0xCE, 0xD1, 0xEF, 0x93, 0x6B, 0xD6, 0xE1, - 0x8F, 0x53, 0xA6 }; - -static const byte POLY_TO_EXP[255] = { - 0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19, - 0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A, - 0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C, - 0x9B, 0xB7, 0xC1, 0x31, 0x2B, 0xA7, 0xA3, 0x95, 0x98, 0x4C, 0xCA, 0x1B, - 0xE6, 0x8D, 0x73, 0x36, 0xCD, 0x82, 0x12, 0x56, 0x62, 0xAB, 0xF0, 0x47, - 0x4F, 0x0E, 0xBD, 0x06, 0xD4, 0x25, 0xD2, 0x5E, 0x27, 0x88, 0x66, 0x6D, - 0xD6, 0x9C, 0x79, 0xB8, 0x08, 0xC2, 0xDF, 0x32, 0x68, 0x2C, 0xFD, 0xA8, - 0x8A, 0xA4, 0x5A, 0x96, 0x29, 0x99, 0x22, 0x4D, 0x60, 0xCB, 0xE4, 0x1C, - 0x7B, 0xE7, 0x3B, 0x8E, 0x9E, 0x74, 0xF4, 0x37, 0xD8, 0xCE, 0xF9, 0x83, - 0x6F, 0x13, 0xB2, 0x57, 0xE1, 0x63, 0xDC, 0xAC, 0xC4, 0xF1, 0xAF, 0x48, - 0x0A, 0x50, 0x42, 0x0F, 0xBA, 0xBE, 0xC7, 0x07, 0xDE, 0xD5, 0x78, 0x26, - 0x65, 0xD3, 0xD1, 0x5F, 0xE3, 0x28, 0x21, 0x89, 0x59, 0x67, 0xFC, 0x6E, - 0xB1, 0xD7, 0xF8, 0x9D, 0xF3, 0x7A, 0x3A, 0xB9, 0xC6, 0x09, 0x41, 0xC3, - 0xAE, 0xE0, 0xDB, 0x33, 0x44, 0x69, 0x92, 0x2D, 0x52, 0xFE, 0x16, 0xA9, - 0x0C, 0x8B, 0x80, 0xA5, 0x4A, 0x5B, 0xB5, 0x97, 0xC9, 0x2A, 0xA2, 0x9A, - 0xC0, 0x23, 0x86, 0x4E, 0xBC, 0x61, 0xEF, 0xCC, 0x11, 0xE5, 0x72, 0x1D, - 0x3D, 0x7C, 0xEB, 0xE8, 0xE9, 0x3C, 0xEA, 0x8F, 0x7D, 0x9F, 0xEC, 0x75, - 0x1E, 0xF5, 0x3E, 0x38, 0xF6, 0xD9, 0x3F, 0xCF, 0x76, 0xFA, 0x1F, 0x84, - 0xA0, 0x70, 0xED, 0x14, 0x90, 0xB3, 0x7E, 0x58, 0xFB, 0xE2, 0x20, 0x64, - 0xD0, 0xDD, 0x77, 0xAD, 0xDA, 0xC5, 0x40, 0xF2, 0x39, 0xB0, 0xF7, 0x49, - 0xB4, 0x0B, 0x7F, 0x51, 0x15, 0x43, 0x91, 0x10, 0x71, 0xBB, 0xEE, 0xBF, - 0x85, 0xC8, 0xA1 }; - -static const uint32 MDS0[256] = { - 0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B, - 0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B, - 0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32, - 0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1, - 0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA, - 0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B, - 0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1, - 0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5, - 0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490, - 0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154, - 0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0, - 0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796, - 0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228, - 0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7, - 0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3, - 0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8, - 0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477, - 0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF, - 0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C, - 0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9, - 0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA, - 0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D, - 0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72, - 0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E, - 0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76, - 0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321, - 0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39, - 0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01, - 0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D, - 0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E, - 0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5, - 0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64, - 0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7, - 0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544, - 0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E, - 0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E, - 0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A, - 0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B, - 0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2, - 0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9, - 0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504, - 0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756, - 0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91 }; - -static const uint32 MDS1[256] = { - 0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252, - 0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A, - 0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020, - 0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141, - 0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444, - 0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424, - 0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A, - 0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757, - 0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383, - 0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A, - 0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9, - 0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656, - 0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1, - 0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898, - 0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414, - 0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3, - 0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1, - 0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989, - 0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5, - 0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282, - 0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E, - 0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E, - 0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202, - 0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC, - 0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565, - 0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A, - 0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808, - 0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272, - 0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A, - 0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969, - 0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505, - 0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5, - 0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D, - 0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343, - 0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF, - 0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3, - 0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F, - 0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646, - 0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6, - 0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF, - 0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A, - 0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7, - 0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8 }; - -static const uint32 MDS2[256] = { - 0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B, - 0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F, - 0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A, - 0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783, - 0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70, - 0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3, - 0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB, - 0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA, - 0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4, - 0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41, - 0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C, - 0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07, - 0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622, - 0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18, - 0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035, - 0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96, - 0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84, - 0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E, - 0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F, - 0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD, - 0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558, - 0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40, - 0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA, - 0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85, - 0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF, - 0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773, - 0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D, - 0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B, - 0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C, - 0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19, - 0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086, - 0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D, - 0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74, - 0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755, - 0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691, - 0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D, - 0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4, - 0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53, - 0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E, - 0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9, - 0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705, - 0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7, - 0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF }; - -static const uint32 MDS3[256] = { - 0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98, - 0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866, - 0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643, - 0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77, - 0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9, - 0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C, - 0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3, - 0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216, - 0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F, - 0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25, - 0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF, - 0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7, - 0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4, - 0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E, - 0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA, - 0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C, - 0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12, - 0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A, - 0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D, - 0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE, - 0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A, - 0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C, - 0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B, - 0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4, - 0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B, - 0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3, - 0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE, - 0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB, - 0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85, - 0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA, - 0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E, - 0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8, - 0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33, - 0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC, - 0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718, - 0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA, - 0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8, - 0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872, - 0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882, - 0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D, - 0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10, - 0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6, - 0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8 }; +#if !defined (_MSC_VER) || defined(_WIN64) +#define UNROLL_TWOFISH +#endif +#if CRYPTOPP_BOOL_X64 +/* these are 64-bit assembly implementation taken from https://github.com/jkivilin/supercop-blockciphers + Copyright © 2011-2013 Jussi Kivilinna + */ +#if defined(__cplusplus) +extern "C" +{ +#endif -/* -* Do one column of the RS matrix multiplcation -*/ -static void rs_mul(byte S[4], byte key, size_t offset) +void twofish_enc_blk(TwofishInstance *ks, byte *dst, const byte *src); +void twofish_dec_blk(TwofishInstance *ks, byte *dst, const byte *src); +void twofish_enc_blk2(TwofishInstance *ks, byte *dst, const byte *src); +void twofish_dec_blk2(TwofishInstance *ks, byte *dst, const byte *src); +void twofish_enc_blk3(TwofishInstance *ks, byte *dst, const byte *src); +void twofish_dec_blk3(TwofishInstance *ks, byte *dst, const byte *src); + +#if defined(__cplusplus) +} +#endif + +void twofish_encrypt_blocks(TwofishInstance *instance, const byte* in_blk, byte* out_blk, uint32 blockCount) { - if(key) + while (blockCount >= 3) { - byte X = POLY_TO_EXP[key - 1]; - - byte RS1 = RS[(4*offset ) % 32]; - byte RS2 = RS[(4*offset+1) % 32]; - byte RS3 = RS[(4*offset+2) % 32]; - byte RS4 = RS[(4*offset+3) % 32]; + twofish_enc_blk3 (instance, out_blk, in_blk); + out_blk += 3 * 16; + in_blk += 3 * 16; + blockCount -= 3; + } + + if (blockCount == 2) + { + twofish_enc_blk2 (instance, out_blk, in_blk); + } + else + { + twofish_enc_blk (instance, out_blk, in_blk); + } + +} - S[0] ^= EXP_TO_POLY[(X + POLY_TO_EXP[RS1 - 1]) % 255]; - S[1] ^= EXP_TO_POLY[(X + POLY_TO_EXP[RS2 - 1]) % 255]; - S[2] ^= EXP_TO_POLY[(X + POLY_TO_EXP[RS3 - 1]) % 255]; - S[3] ^= EXP_TO_POLY[(X + POLY_TO_EXP[RS4 - 1]) % 255]; +void twofish_decrypt_blocks(TwofishInstance *instance, const byte* in_blk, byte* out_blk, uint32 blockCount) +{ + while (blockCount >= 3) + { + twofish_dec_blk3 (instance, out_blk, in_blk); + out_blk += 3 * 16; + in_blk += 3 * 16; + blockCount -= 3; + } + + if (blockCount == 2) + { + twofish_dec_blk2 (instance, out_blk, in_blk); + } + else + { + twofish_dec_blk (instance, out_blk, in_blk); } } -u4byte *twofish_set_key(TwofishInstance *instance, const u4byte in_key[]) + + +#endif + +static const byte Q[2][256] = { + { + 0xa9, 0x67, 0xb3, 0xe8, 0x04, 0xfd, 0xa3, 0x76, 0x9a, 0x92, 0x80, 0x78, 0xe4, 0xdd, 0xd1, 0x38, + 0x0d, 0xc6, 0x35, 0x98, 0x18, 0xf7, 0xec, 0x6c, 0x43, 0x75, 0x37, 0x26, 0xfa, 0x13, 0x94, 0x48, + 0xf2, 0xd0, 0x8b, 0x30, 0x84, 0x54, 0xdf, 0x23, 0x19, 0x5b, 0x3d, 0x59, 0xf3, 0xae, 0xa2, 0x82, + 0x63, 0x01, 0x83, 0x2e, 0xd9, 0x51, 0x9b, 0x7c, 0xa6, 0xeb, 0xa5, 0xbe, 0x16, 0x0c, 0xe3, 0x61, + 0xc0, 0x8c, 0x3a, 0xf5, 0x73, 0x2c, 0x25, 0x0b, 0xbb, 0x4e, 0x89, 0x6b, 0x53, 0x6a, 0xb4, 0xf1, + 0xe1, 0xe6, 0xbd, 0x45, 0xe2, 0xf4, 0xb6, 0x66, 0xcc, 0x95, 0x03, 0x56, 0xd4, 0x1c, 0x1e, 0xd7, + 0xfb, 0xc3, 0x8e, 0xb5, 0xe9, 0xcf, 0xbf, 0xba, 0xea, 0x77, 0x39, 0xaf, 0x33, 0xc9, 0x62, 0x71, + 0x81, 0x79, 0x09, 0xad, 0x24, 0xcd, 0xf9, 0xd8, 0xe5, 0xc5, 0xb9, 0x4d, 0x44, 0x08, 0x86, 0xe7, + 0xa1, 0x1d, 0xaa, 0xed, 0x06, 0x70, 0xb2, 0xd2, 0x41, 0x7b, 0xa0, 0x11, 0x31, 0xc2, 0x27, 0x90, + 0x20, 0xf6, 0x60, 0xff, 0x96, 0x5c, 0xb1, 0xab, 0x9e, 0x9c, 0x52, 0x1b, 0x5f, 0x93, 0x0a, 0xef, + 0x91, 0x85, 0x49, 0xee, 0x2d, 0x4f, 0x8f, 0x3b, 0x47, 0x87, 0x6d, 0x46, 0xd6, 0x3e, 0x69, 0x64, + 0x2a, 0xce, 0xcb, 0x2f, 0xfc, 0x97, 0x05, 0x7a, 0xac, 0x7f, 0xd5, 0x1a, 0x4b, 0x0e, 0xa7, 0x5a, + 0x28, 0x14, 0x3f, 0x29, 0x88, 0x3c, 0x4c, 0x02, 0xb8, 0xda, 0xb0, 0x17, 0x55, 0x1f, 0x8a, 0x7d, + 0x57, 0xc7, 0x8d, 0x74, 0xb7, 0xc4, 0x9f, 0x72, 0x7e, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, + 0x6e, 0x50, 0xde, 0x68, 0x65, 0xbc, 0xdb, 0xf8, 0xc8, 0xa8, 0x2b, 0x40, 0xdc, 0xfe, 0x32, 0xa4, + 0xca, 0x10, 0x21, 0xf0, 0xd3, 0x5d, 0x0f, 0x00, 0x6f, 0x9d, 0x36, 0x42, 0x4a, 0x5e, 0xc1, 0xe0 + }, + { + 0x75, 0xf3, 0xc6, 0xf4, 0xdb, 0x7b, 0xfb, 0xc8, 0x4a, 0xd3, 0xe6, 0x6b, 0x45, 0x7d, 0xe8, 0x4b, + 0xd6, 0x32, 0xd8, 0xfd, 0x37, 0x71, 0xf1, 0xe1, 0x30, 0x0f, 0xf8, 0x1b, 0x87, 0xfa, 0x06, 0x3f, + 0x5e, 0xba, 0xae, 0x5b, 0x8a, 0x00, 0xbc, 0x9d, 0x6d, 0xc1, 0xb1, 0x0e, 0x80, 0x5d, 0xd2, 0xd5, + 0xa0, 0x84, 0x07, 0x14, 0xb5, 0x90, 0x2c, 0xa3, 0xb2, 0x73, 0x4c, 0x54, 0x92, 0x74, 0x36, 0x51, + 0x38, 0xb0, 0xbd, 0x5a, 0xfc, 0x60, 0x62, 0x96, 0x6c, 0x42, 0xf7, 0x10, 0x7c, 0x28, 0x27, 0x8c, + 0x13, 0x95, 0x9c, 0xc7, 0x24, 0x46, 0x3b, 0x70, 0xca, 0xe3, 0x85, 0xcb, 0x11, 0xd0, 0x93, 0xb8, + 0xa6, 0x83, 0x20, 0xff, 0x9f, 0x77, 0xc3, 0xcc, 0x03, 0x6f, 0x08, 0xbf, 0x40, 0xe7, 0x2b, 0xe2, + 0x79, 0x0c, 0xaa, 0x82, 0x41, 0x3a, 0xea, 0xb9, 0xe4, 0x9a, 0xa4, 0x97, 0x7e, 0xda, 0x7a, 0x17, + 0x66, 0x94, 0xa1, 0x1d, 0x3d, 0xf0, 0xde, 0xb3, 0x0b, 0x72, 0xa7, 0x1c, 0xef, 0xd1, 0x53, 0x3e, + 0x8f, 0x33, 0x26, 0x5f, 0xec, 0x76, 0x2a, 0x49, 0x81, 0x88, 0xee, 0x21, 0xc4, 0x1a, 0xeb, 0xd9, + 0xc5, 0x39, 0x99, 0xcd, 0xad, 0x31, 0x8b, 0x01, 0x18, 0x23, 0xdd, 0x1f, 0x4e, 0x2d, 0xf9, 0x48, + 0x4f, 0xf2, 0x65, 0x8e, 0x78, 0x5c, 0x58, 0x19, 0x8d, 0xe5, 0x98, 0x57, 0x67, 0x7f, 0x05, 0x64, + 0xaf, 0x63, 0xb6, 0xfe, 0xf5, 0xb7, 0x3c, 0xa5, 0xce, 0xe9, 0x68, 0x44, 0xe0, 0x4d, 0x43, 0x69, + 0x29, 0x2e, 0xac, 0x15, 0x59, 0xa8, 0x0a, 0x9e, 0x6e, 0x47, 0xdf, 0x34, 0x35, 0x6a, 0xcf, 0xdc, + 0x22, 0xc9, 0xc0, 0x9b, 0x89, 0xd4, 0xed, 0xab, 0x12, 0xa2, 0x0d, 0x52, 0xbb, 0x02, 0x2f, 0xa9, + 0xd7, 0x61, 0x1e, 0xb4, 0x50, 0x04, 0xf6, 0xc2, 0x16, 0x25, 0x86, 0x56, 0x55, 0x09, 0xbe, 0x91 + } +}; + +static const uint32 MDSQ[4][256] = { + { + 0xbcbc3275U, 0xecec21f3U, 0x202043c6U, 0xb3b3c9f4U, 0xdada03dbU, 0x02028b7bU, 0xe2e22bfbU, 0x9e9efac8U, + 0xc9c9ec4aU, 0xd4d409d3U, 0x18186be6U, 0x1e1e9f6bU, 0x98980e45U, 0xb2b2387dU, 0xa6a6d2e8U, 0x2626b74bU, + 0x3c3c57d6U, 0x93938a32U, 0x8282eed8U, 0x525298fdU, 0x7b7bd437U, 0xbbbb3771U, 0x5b5b97f1U, 0x474783e1U, + 0x24243c30U, 0x5151e20fU, 0xbabac6f8U, 0x4a4af31bU, 0xbfbf4887U, 0x0d0d70faU, 0xb0b0b306U, 0x7575de3fU, + 0xd2d2fd5eU, 0x7d7d20baU, 0x666631aeU, 0x3a3aa35bU, 0x59591c8aU, 0x00000000U, 0xcdcd93bcU, 0x1a1ae09dU, + 0xaeae2c6dU, 0x7f7fabc1U, 0x2b2bc7b1U, 0xbebeb90eU, 0xe0e0a080U, 0x8a8a105dU, 0x3b3b52d2U, 0x6464bad5U, + 0xd8d888a0U, 0xe7e7a584U, 0x5f5fe807U, 0x1b1b1114U, 0x2c2cc2b5U, 0xfcfcb490U, 0x3131272cU, 0x808065a3U, + 0x73732ab2U, 0x0c0c8173U, 0x79795f4cU, 0x6b6b4154U, 0x4b4b0292U, 0x53536974U, 0x94948f36U, 0x83831f51U, + 0x2a2a3638U, 0xc4c49cb0U, 0x2222c8bdU, 0xd5d5f85aU, 0xbdbdc3fcU, 0x48487860U, 0xffffce62U, 0x4c4c0796U, + 0x4141776cU, 0xc7c7e642U, 0xebeb24f7U, 0x1c1c1410U, 0x5d5d637cU, 0x36362228U, 0x6767c027U, 0xe9e9af8cU, + 0x4444f913U, 0x1414ea95U, 0xf5f5bb9cU, 0xcfcf18c7U, 0x3f3f2d24U, 0xc0c0e346U, 0x7272db3bU, 0x54546c70U, + 0x29294ccaU, 0xf0f035e3U, 0x0808fe85U, 0xc6c617cbU, 0xf3f34f11U, 0x8c8ce4d0U, 0xa4a45993U, 0xcaca96b8U, + 0x68683ba6U, 0xb8b84d83U, 0x38382820U, 0xe5e52effU, 0xadad569fU, 0x0b0b8477U, 0xc8c81dc3U, 0x9999ffccU, + 0x5858ed03U, 0x19199a6fU, 0x0e0e0a08U, 0x95957ebfU, 0x70705040U, 0xf7f730e7U, 0x6e6ecf2bU, 0x1f1f6ee2U, + 0xb5b53d79U, 0x09090f0cU, 0x616134aaU, 0x57571682U, 0x9f9f0b41U, 0x9d9d803aU, 0x111164eaU, 0x2525cdb9U, + 0xafafdde4U, 0x4545089aU, 0xdfdf8da4U, 0xa3a35c97U, 0xeaead57eU, 0x353558daU, 0xededd07aU, 0x4343fc17U, + 0xf8f8cb66U, 0xfbfbb194U, 0x3737d3a1U, 0xfafa401dU, 0xc2c2683dU, 0xb4b4ccf0U, 0x32325ddeU, 0x9c9c71b3U, + 0x5656e70bU, 0xe3e3da72U, 0x878760a7U, 0x15151b1cU, 0xf9f93aefU, 0x6363bfd1U, 0x3434a953U, 0x9a9a853eU, + 0xb1b1428fU, 0x7c7cd133U, 0x88889b26U, 0x3d3da65fU, 0xa1a1d7ecU, 0xe4e4df76U, 0x8181942aU, 0x91910149U, + 0x0f0ffb81U, 0xeeeeaa88U, 0x161661eeU, 0xd7d77321U, 0x9797f5c4U, 0xa5a5a81aU, 0xfefe3febU, 0x6d6db5d9U, + 0x7878aec5U, 0xc5c56d39U, 0x1d1de599U, 0x7676a4cdU, 0x3e3edcadU, 0xcbcb6731U, 0xb6b6478bU, 0xefef5b01U, + 0x12121e18U, 0x6060c523U, 0x6a6ab0ddU, 0x4d4df61fU, 0xcecee94eU, 0xdede7c2dU, 0x55559df9U, 0x7e7e5a48U, + 0x2121b24fU, 0x03037af2U, 0xa0a02665U, 0x5e5e198eU, 0x5a5a6678U, 0x65654b5cU, 0x62624e58U, 0xfdfd4519U, + 0x0606f48dU, 0x404086e5U, 0xf2f2be98U, 0x3333ac57U, 0x17179067U, 0x05058e7fU, 0xe8e85e05U, 0x4f4f7d64U, + 0x89896aafU, 0x10109563U, 0x74742fb6U, 0x0a0a75feU, 0x5c5c92f5U, 0x9b9b74b7U, 0x2d2d333cU, 0x3030d6a5U, + 0x2e2e49ceU, 0x494989e9U, 0x46467268U, 0x77775544U, 0xa8a8d8e0U, 0x9696044dU, 0x2828bd43U, 0xa9a92969U, + 0xd9d97929U, 0x8686912eU, 0xd1d187acU, 0xf4f44a15U, 0x8d8d1559U, 0xd6d682a8U, 0xb9b9bc0aU, 0x42420d9eU, + 0xf6f6c16eU, 0x2f2fb847U, 0xdddd06dfU, 0x23233934U, 0xcccc6235U, 0xf1f1c46aU, 0xc1c112cfU, 0x8585ebdcU, + 0x8f8f9e22U, 0x7171a1c9U, 0x9090f0c0U, 0xaaaa539bU, 0x0101f189U, 0x8b8be1d4U, 0x4e4e8cedU, 0x8e8e6fabU, + 0xababa212U, 0x6f6f3ea2U, 0xe6e6540dU, 0xdbdbf252U, 0x92927bbbU, 0xb7b7b602U, 0x6969ca2fU, 0x3939d9a9U, + 0xd3d30cd7U, 0xa7a72361U, 0xa2a2ad1eU, 0xc3c399b4U, 0x6c6c4450U, 0x07070504U, 0x04047ff6U, 0x272746c2U, + 0xacaca716U, 0xd0d07625U, 0x50501386U, 0xdcdcf756U, 0x84841a55U, 0xe1e15109U, 0x7a7a25beU, 0x1313ef91U + }, + { + 0xa9d93939U, 0x67901717U, 0xb3719c9cU, 0xe8d2a6a6U, 0x04050707U, 0xfd985252U, 0xa3658080U, 0x76dfe4e4U, + 0x9a084545U, 0x92024b4bU, 0x80a0e0e0U, 0x78665a5aU, 0xe4ddafafU, 0xddb06a6aU, 0xd1bf6363U, 0x38362a2aU, + 0x0d54e6e6U, 0xc6432020U, 0x3562ccccU, 0x98bef2f2U, 0x181e1212U, 0xf724ebebU, 0xecd7a1a1U, 0x6c774141U, + 0x43bd2828U, 0x7532bcbcU, 0x37d47b7bU, 0x269b8888U, 0xfa700d0dU, 0x13f94444U, 0x94b1fbfbU, 0x485a7e7eU, + 0xf27a0303U, 0xd0e48c8cU, 0x8b47b6b6U, 0x303c2424U, 0x84a5e7e7U, 0x54416b6bU, 0xdf06ddddU, 0x23c56060U, + 0x1945fdfdU, 0x5ba33a3aU, 0x3d68c2c2U, 0x59158d8dU, 0xf321ececU, 0xae316666U, 0xa23e6f6fU, 0x82165757U, + 0x63951010U, 0x015befefU, 0x834db8b8U, 0x2e918686U, 0xd9b56d6dU, 0x511f8383U, 0x9b53aaaaU, 0x7c635d5dU, + 0xa63b6868U, 0xeb3ffefeU, 0xa5d63030U, 0xbe257a7aU, 0x16a7acacU, 0x0c0f0909U, 0xe335f0f0U, 0x6123a7a7U, + 0xc0f09090U, 0x8cafe9e9U, 0x3a809d9dU, 0xf5925c5cU, 0x73810c0cU, 0x2c273131U, 0x2576d0d0U, 0x0be75656U, + 0xbb7b9292U, 0x4ee9ceceU, 0x89f10101U, 0x6b9f1e1eU, 0x53a93434U, 0x6ac4f1f1U, 0xb499c3c3U, 0xf1975b5bU, + 0xe1834747U, 0xe66b1818U, 0xbdc82222U, 0x450e9898U, 0xe26e1f1fU, 0xf4c9b3b3U, 0xb62f7474U, 0x66cbf8f8U, + 0xccff9999U, 0x95ea1414U, 0x03ed5858U, 0x56f7dcdcU, 0xd4e18b8bU, 0x1c1b1515U, 0x1eada2a2U, 0xd70cd3d3U, + 0xfb2be2e2U, 0xc31dc8c8U, 0x8e195e5eU, 0xb5c22c2cU, 0xe9894949U, 0xcf12c1c1U, 0xbf7e9595U, 0xba207d7dU, + 0xea641111U, 0x77840b0bU, 0x396dc5c5U, 0xaf6a8989U, 0x33d17c7cU, 0xc9a17171U, 0x62ceffffU, 0x7137bbbbU, + 0x81fb0f0fU, 0x793db5b5U, 0x0951e1e1U, 0xaddc3e3eU, 0x242d3f3fU, 0xcda47676U, 0xf99d5555U, 0xd8ee8282U, + 0xe5864040U, 0xc5ae7878U, 0xb9cd2525U, 0x4d049696U, 0x44557777U, 0x080a0e0eU, 0x86135050U, 0xe730f7f7U, + 0xa1d33737U, 0x1d40fafaU, 0xaa346161U, 0xed8c4e4eU, 0x06b3b0b0U, 0x706c5454U, 0xb22a7373U, 0xd2523b3bU, + 0x410b9f9fU, 0x7b8b0202U, 0xa088d8d8U, 0x114ff3f3U, 0x3167cbcbU, 0xc2462727U, 0x27c06767U, 0x90b4fcfcU, + 0x20283838U, 0xf67f0404U, 0x60784848U, 0xff2ee5e5U, 0x96074c4cU, 0x5c4b6565U, 0xb1c72b2bU, 0xab6f8e8eU, + 0x9e0d4242U, 0x9cbbf5f5U, 0x52f2dbdbU, 0x1bf34a4aU, 0x5fa63d3dU, 0x9359a4a4U, 0x0abcb9b9U, 0xef3af9f9U, + 0x91ef1313U, 0x85fe0808U, 0x49019191U, 0xee611616U, 0x2d7cdedeU, 0x4fb22121U, 0x8f42b1b1U, 0x3bdb7272U, + 0x47b82f2fU, 0x8748bfbfU, 0x6d2caeaeU, 0x46e3c0c0U, 0xd6573c3cU, 0x3e859a9aU, 0x6929a9a9U, 0x647d4f4fU, + 0x2a948181U, 0xce492e2eU, 0xcb17c6c6U, 0x2fca6969U, 0xfcc3bdbdU, 0x975ca3a3U, 0x055ee8e8U, 0x7ad0ededU, + 0xac87d1d1U, 0x7f8e0505U, 0xd5ba6464U, 0x1aa8a5a5U, 0x4bb72626U, 0x0eb9bebeU, 0xa7608787U, 0x5af8d5d5U, + 0x28223636U, 0x14111b1bU, 0x3fde7575U, 0x2979d9d9U, 0x88aaeeeeU, 0x3c332d2dU, 0x4c5f7979U, 0x02b6b7b7U, + 0xb896cacaU, 0xda583535U, 0xb09cc4c4U, 0x17fc4343U, 0x551a8484U, 0x1ff64d4dU, 0x8a1c5959U, 0x7d38b2b2U, + 0x57ac3333U, 0xc718cfcfU, 0x8df40606U, 0x74695353U, 0xb7749b9bU, 0xc4f59797U, 0x9f56adadU, 0x72dae3e3U, + 0x7ed5eaeaU, 0x154af4f4U, 0x229e8f8fU, 0x12a2ababU, 0x584e6262U, 0x07e85f5fU, 0x99e51d1dU, 0x34392323U, + 0x6ec1f6f6U, 0x50446c6cU, 0xde5d3232U, 0x68724646U, 0x6526a0a0U, 0xbc93cdcdU, 0xdb03dadaU, 0xf8c6babaU, + 0xc8fa9e9eU, 0xa882d6d6U, 0x2bcf6e6eU, 0x40507070U, 0xdceb8585U, 0xfe750a0aU, 0x328a9393U, 0xa48ddfdfU, + 0xca4c2929U, 0x10141c1cU, 0x2173d7d7U, 0xf0ccb4b4U, 0xd309d4d4U, 0x5d108a8aU, 0x0fe25151U, 0x00000000U, + 0x6f9a1919U, 0x9de01a1aU, 0x368f9494U, 0x42e6c7c7U, 0x4aecc9c9U, 0x5efdd2d2U, 0xc1ab7f7fU, 0xe0d8a8a8U + }, + { + 0xbc75bc32U, 0xecf3ec21U, 0x20c62043U, 0xb3f4b3c9U, 0xdadbda03U, 0x027b028bU, 0xe2fbe22bU, 0x9ec89efaU, + 0xc94ac9ecU, 0xd4d3d409U, 0x18e6186bU, 0x1e6b1e9fU, 0x9845980eU, 0xb27db238U, 0xa6e8a6d2U, 0x264b26b7U, + 0x3cd63c57U, 0x9332938aU, 0x82d882eeU, 0x52fd5298U, 0x7b377bd4U, 0xbb71bb37U, 0x5bf15b97U, 0x47e14783U, + 0x2430243cU, 0x510f51e2U, 0xbaf8bac6U, 0x4a1b4af3U, 0xbf87bf48U, 0x0dfa0d70U, 0xb006b0b3U, 0x753f75deU, + 0xd25ed2fdU, 0x7dba7d20U, 0x66ae6631U, 0x3a5b3aa3U, 0x598a591cU, 0x00000000U, 0xcdbccd93U, 0x1a9d1ae0U, + 0xae6dae2cU, 0x7fc17fabU, 0x2bb12bc7U, 0xbe0ebeb9U, 0xe080e0a0U, 0x8a5d8a10U, 0x3bd23b52U, 0x64d564baU, + 0xd8a0d888U, 0xe784e7a5U, 0x5f075fe8U, 0x1b141b11U, 0x2cb52cc2U, 0xfc90fcb4U, 0x312c3127U, 0x80a38065U, + 0x73b2732aU, 0x0c730c81U, 0x794c795fU, 0x6b546b41U, 0x4b924b02U, 0x53745369U, 0x9436948fU, 0x8351831fU, + 0x2a382a36U, 0xc4b0c49cU, 0x22bd22c8U, 0xd55ad5f8U, 0xbdfcbdc3U, 0x48604878U, 0xff62ffceU, 0x4c964c07U, + 0x416c4177U, 0xc742c7e6U, 0xebf7eb24U, 0x1c101c14U, 0x5d7c5d63U, 0x36283622U, 0x672767c0U, 0xe98ce9afU, + 0x441344f9U, 0x149514eaU, 0xf59cf5bbU, 0xcfc7cf18U, 0x3f243f2dU, 0xc046c0e3U, 0x723b72dbU, 0x5470546cU, + 0x29ca294cU, 0xf0e3f035U, 0x088508feU, 0xc6cbc617U, 0xf311f34fU, 0x8cd08ce4U, 0xa493a459U, 0xcab8ca96U, + 0x68a6683bU, 0xb883b84dU, 0x38203828U, 0xe5ffe52eU, 0xad9fad56U, 0x0b770b84U, 0xc8c3c81dU, 0x99cc99ffU, + 0x580358edU, 0x196f199aU, 0x0e080e0aU, 0x95bf957eU, 0x70407050U, 0xf7e7f730U, 0x6e2b6ecfU, 0x1fe21f6eU, + 0xb579b53dU, 0x090c090fU, 0x61aa6134U, 0x57825716U, 0x9f419f0bU, 0x9d3a9d80U, 0x11ea1164U, 0x25b925cdU, + 0xafe4afddU, 0x459a4508U, 0xdfa4df8dU, 0xa397a35cU, 0xea7eead5U, 0x35da3558U, 0xed7aedd0U, 0x431743fcU, + 0xf866f8cbU, 0xfb94fbb1U, 0x37a137d3U, 0xfa1dfa40U, 0xc23dc268U, 0xb4f0b4ccU, 0x32de325dU, 0x9cb39c71U, + 0x560b56e7U, 0xe372e3daU, 0x87a78760U, 0x151c151bU, 0xf9eff93aU, 0x63d163bfU, 0x345334a9U, 0x9a3e9a85U, + 0xb18fb142U, 0x7c337cd1U, 0x8826889bU, 0x3d5f3da6U, 0xa1eca1d7U, 0xe476e4dfU, 0x812a8194U, 0x91499101U, + 0x0f810ffbU, 0xee88eeaaU, 0x16ee1661U, 0xd721d773U, 0x97c497f5U, 0xa51aa5a8U, 0xfeebfe3fU, 0x6dd96db5U, + 0x78c578aeU, 0xc539c56dU, 0x1d991de5U, 0x76cd76a4U, 0x3ead3edcU, 0xcb31cb67U, 0xb68bb647U, 0xef01ef5bU, + 0x1218121eU, 0x602360c5U, 0x6add6ab0U, 0x4d1f4df6U, 0xce4ecee9U, 0xde2dde7cU, 0x55f9559dU, 0x7e487e5aU, + 0x214f21b2U, 0x03f2037aU, 0xa065a026U, 0x5e8e5e19U, 0x5a785a66U, 0x655c654bU, 0x6258624eU, 0xfd19fd45U, + 0x068d06f4U, 0x40e54086U, 0xf298f2beU, 0x335733acU, 0x17671790U, 0x057f058eU, 0xe805e85eU, 0x4f644f7dU, + 0x89af896aU, 0x10631095U, 0x74b6742fU, 0x0afe0a75U, 0x5cf55c92U, 0x9bb79b74U, 0x2d3c2d33U, 0x30a530d6U, + 0x2ece2e49U, 0x49e94989U, 0x46684672U, 0x77447755U, 0xa8e0a8d8U, 0x964d9604U, 0x284328bdU, 0xa969a929U, + 0xd929d979U, 0x862e8691U, 0xd1acd187U, 0xf415f44aU, 0x8d598d15U, 0xd6a8d682U, 0xb90ab9bcU, 0x429e420dU, + 0xf66ef6c1U, 0x2f472fb8U, 0xdddfdd06U, 0x23342339U, 0xcc35cc62U, 0xf16af1c4U, 0xc1cfc112U, 0x85dc85ebU, + 0x8f228f9eU, 0x71c971a1U, 0x90c090f0U, 0xaa9baa53U, 0x018901f1U, 0x8bd48be1U, 0x4eed4e8cU, 0x8eab8e6fU, + 0xab12aba2U, 0x6fa26f3eU, 0xe60de654U, 0xdb52dbf2U, 0x92bb927bU, 0xb702b7b6U, 0x692f69caU, 0x39a939d9U, + 0xd3d7d30cU, 0xa761a723U, 0xa21ea2adU, 0xc3b4c399U, 0x6c506c44U, 0x07040705U, 0x04f6047fU, 0x27c22746U, + 0xac16aca7U, 0xd025d076U, 0x50865013U, 0xdc56dcf7U, 0x8455841aU, 0xe109e151U, 0x7abe7a25U, 0x139113efU + }, + { + 0xd939a9d9U, 0x90176790U, 0x719cb371U, 0xd2a6e8d2U, 0x05070405U, 0x9852fd98U, 0x6580a365U, 0xdfe476dfU, + 0x08459a08U, 0x024b9202U, 0xa0e080a0U, 0x665a7866U, 0xddafe4ddU, 0xb06addb0U, 0xbf63d1bfU, 0x362a3836U, + 0x54e60d54U, 0x4320c643U, 0x62cc3562U, 0xbef298beU, 0x1e12181eU, 0x24ebf724U, 0xd7a1ecd7U, 0x77416c77U, + 0xbd2843bdU, 0x32bc7532U, 0xd47b37d4U, 0x9b88269bU, 0x700dfa70U, 0xf94413f9U, 0xb1fb94b1U, 0x5a7e485aU, + 0x7a03f27aU, 0xe48cd0e4U, 0x47b68b47U, 0x3c24303cU, 0xa5e784a5U, 0x416b5441U, 0x06dddf06U, 0xc56023c5U, + 0x45fd1945U, 0xa33a5ba3U, 0x68c23d68U, 0x158d5915U, 0x21ecf321U, 0x3166ae31U, 0x3e6fa23eU, 0x16578216U, + 0x95106395U, 0x5bef015bU, 0x4db8834dU, 0x91862e91U, 0xb56dd9b5U, 0x1f83511fU, 0x53aa9b53U, 0x635d7c63U, + 0x3b68a63bU, 0x3ffeeb3fU, 0xd630a5d6U, 0x257abe25U, 0xa7ac16a7U, 0x0f090c0fU, 0x35f0e335U, 0x23a76123U, + 0xf090c0f0U, 0xafe98cafU, 0x809d3a80U, 0x925cf592U, 0x810c7381U, 0x27312c27U, 0x76d02576U, 0xe7560be7U, + 0x7b92bb7bU, 0xe9ce4ee9U, 0xf10189f1U, 0x9f1e6b9fU, 0xa93453a9U, 0xc4f16ac4U, 0x99c3b499U, 0x975bf197U, + 0x8347e183U, 0x6b18e66bU, 0xc822bdc8U, 0x0e98450eU, 0x6e1fe26eU, 0xc9b3f4c9U, 0x2f74b62fU, 0xcbf866cbU, + 0xff99ccffU, 0xea1495eaU, 0xed5803edU, 0xf7dc56f7U, 0xe18bd4e1U, 0x1b151c1bU, 0xada21eadU, 0x0cd3d70cU, + 0x2be2fb2bU, 0x1dc8c31dU, 0x195e8e19U, 0xc22cb5c2U, 0x8949e989U, 0x12c1cf12U, 0x7e95bf7eU, 0x207dba20U, + 0x6411ea64U, 0x840b7784U, 0x6dc5396dU, 0x6a89af6aU, 0xd17c33d1U, 0xa171c9a1U, 0xceff62ceU, 0x37bb7137U, + 0xfb0f81fbU, 0x3db5793dU, 0x51e10951U, 0xdc3eaddcU, 0x2d3f242dU, 0xa476cda4U, 0x9d55f99dU, 0xee82d8eeU, + 0x8640e586U, 0xae78c5aeU, 0xcd25b9cdU, 0x04964d04U, 0x55774455U, 0x0a0e080aU, 0x13508613U, 0x30f7e730U, + 0xd337a1d3U, 0x40fa1d40U, 0x3461aa34U, 0x8c4eed8cU, 0xb3b006b3U, 0x6c54706cU, 0x2a73b22aU, 0x523bd252U, + 0x0b9f410bU, 0x8b027b8bU, 0x88d8a088U, 0x4ff3114fU, 0x67cb3167U, 0x4627c246U, 0xc06727c0U, 0xb4fc90b4U, + 0x28382028U, 0x7f04f67fU, 0x78486078U, 0x2ee5ff2eU, 0x074c9607U, 0x4b655c4bU, 0xc72bb1c7U, 0x6f8eab6fU, + 0x0d429e0dU, 0xbbf59cbbU, 0xf2db52f2U, 0xf34a1bf3U, 0xa63d5fa6U, 0x59a49359U, 0xbcb90abcU, 0x3af9ef3aU, + 0xef1391efU, 0xfe0885feU, 0x01914901U, 0x6116ee61U, 0x7cde2d7cU, 0xb2214fb2U, 0x42b18f42U, 0xdb723bdbU, + 0xb82f47b8U, 0x48bf8748U, 0x2cae6d2cU, 0xe3c046e3U, 0x573cd657U, 0x859a3e85U, 0x29a96929U, 0x7d4f647dU, + 0x94812a94U, 0x492ece49U, 0x17c6cb17U, 0xca692fcaU, 0xc3bdfcc3U, 0x5ca3975cU, 0x5ee8055eU, 0xd0ed7ad0U, + 0x87d1ac87U, 0x8e057f8eU, 0xba64d5baU, 0xa8a51aa8U, 0xb7264bb7U, 0xb9be0eb9U, 0x6087a760U, 0xf8d55af8U, + 0x22362822U, 0x111b1411U, 0xde753fdeU, 0x79d92979U, 0xaaee88aaU, 0x332d3c33U, 0x5f794c5fU, 0xb6b702b6U, + 0x96cab896U, 0x5835da58U, 0x9cc4b09cU, 0xfc4317fcU, 0x1a84551aU, 0xf64d1ff6U, 0x1c598a1cU, 0x38b27d38U, + 0xac3357acU, 0x18cfc718U, 0xf4068df4U, 0x69537469U, 0x749bb774U, 0xf597c4f5U, 0x56ad9f56U, 0xdae372daU, + 0xd5ea7ed5U, 0x4af4154aU, 0x9e8f229eU, 0xa2ab12a2U, 0x4e62584eU, 0xe85f07e8U, 0xe51d99e5U, 0x39233439U, + 0xc1f66ec1U, 0x446c5044U, 0x5d32de5dU, 0x72466872U, 0x26a06526U, 0x93cdbc93U, 0x03dadb03U, 0xc6baf8c6U, + 0xfa9ec8faU, 0x82d6a882U, 0xcf6e2bcfU, 0x50704050U, 0xeb85dcebU, 0x750afe75U, 0x8a93328aU, 0x8ddfa48dU, + 0x4c29ca4cU, 0x141c1014U, 0x73d72173U, 0xccb4f0ccU, 0x09d4d309U, 0x108a5d10U, 0xe2510fe2U, 0x00000000U, + 0x9a196f9aU, 0xe01a9de0U, 0x8f94368fU, 0xe6c742e6U, 0xecc94aecU, 0xfdd25efdU, 0xab7fc1abU, 0xd8a8e0d8U + }, +}; + +static const uint32 RS[8][256] = { + { + 0x00000000U, 0xa402a401U, 0x05040502U, 0xa106a103U, 0x0a080a04U, 0xae0aae05U, 0x0f0c0f06U, 0xab0eab07U, + 0x14101408U, 0xb012b009U, 0x1114110aU, 0xb516b50bU, 0x1e181e0cU, 0xba1aba0dU, 0x1b1c1b0eU, 0xbf1ebf0fU, + 0x28202810U, 0x8c228c11U, 0x2d242d12U, 0x89268913U, 0x22282214U, 0x862a8615U, 0x272c2716U, 0x832e8317U, + 0x3c303c18U, 0x98329819U, 0x3934391aU, 0x9d369d1bU, 0x3638361cU, 0x923a921dU, 0x333c331eU, 0x973e971fU, + 0x50405020U, 0xf442f421U, 0x55445522U, 0xf146f123U, 0x5a485a24U, 0xfe4afe25U, 0x5f4c5f26U, 0xfb4efb27U, + 0x44504428U, 0xe052e029U, 0x4154412aU, 0xe556e52bU, 0x4e584e2cU, 0xea5aea2dU, 0x4b5c4b2eU, 0xef5eef2fU, + 0x78607830U, 0xdc62dc31U, 0x7d647d32U, 0xd966d933U, 0x72687234U, 0xd66ad635U, 0x776c7736U, 0xd36ed337U, + 0x6c706c38U, 0xc872c839U, 0x6974693aU, 0xcd76cd3bU, 0x6678663cU, 0xc27ac23dU, 0x637c633eU, 0xc77ec73fU, + 0xa080a040U, 0x04820441U, 0xa584a542U, 0x01860143U, 0xaa88aa44U, 0x0e8a0e45U, 0xaf8caf46U, 0x0b8e0b47U, + 0xb490b448U, 0x10921049U, 0xb194b14aU, 0x1596154bU, 0xbe98be4cU, 0x1a9a1a4dU, 0xbb9cbb4eU, 0x1f9e1f4fU, + 0x88a08850U, 0x2ca22c51U, 0x8da48d52U, 0x29a62953U, 0x82a88254U, 0x26aa2655U, 0x87ac8756U, 0x23ae2357U, + 0x9cb09c58U, 0x38b23859U, 0x99b4995aU, 0x3db63d5bU, 0x96b8965cU, 0x32ba325dU, 0x93bc935eU, 0x37be375fU, + 0xf0c0f060U, 0x54c25461U, 0xf5c4f562U, 0x51c65163U, 0xfac8fa64U, 0x5eca5e65U, 0xffccff66U, 0x5bce5b67U, + 0xe4d0e468U, 0x40d24069U, 0xe1d4e16aU, 0x45d6456bU, 0xeed8ee6cU, 0x4ada4a6dU, 0xebdceb6eU, 0x4fde4f6fU, + 0xd8e0d870U, 0x7ce27c71U, 0xdde4dd72U, 0x79e67973U, 0xd2e8d274U, 0x76ea7675U, 0xd7ecd776U, 0x73ee7377U, + 0xccf0cc78U, 0x68f26879U, 0xc9f4c97aU, 0x6df66d7bU, 0xc6f8c67cU, 0x62fa627dU, 0xc3fcc37eU, 0x67fe677fU, + 0x0d4d0d80U, 0xa94fa981U, 0x08490882U, 0xac4bac83U, 0x07450784U, 0xa347a385U, 0x02410286U, 0xa643a687U, + 0x195d1988U, 0xbd5fbd89U, 0x1c591c8aU, 0xb85bb88bU, 0x1355138cU, 0xb757b78dU, 0x1651168eU, 0xb253b28fU, + 0x256d2590U, 0x816f8191U, 0x20692092U, 0x846b8493U, 0x2f652f94U, 0x8b678b95U, 0x2a612a96U, 0x8e638e97U, + 0x317d3198U, 0x957f9599U, 0x3479349aU, 0x907b909bU, 0x3b753b9cU, 0x9f779f9dU, 0x3e713e9eU, 0x9a739a9fU, + 0x5d0d5da0U, 0xf90ff9a1U, 0x580958a2U, 0xfc0bfca3U, 0x570557a4U, 0xf307f3a5U, 0x520152a6U, 0xf603f6a7U, + 0x491d49a8U, 0xed1feda9U, 0x4c194caaU, 0xe81be8abU, 0x431543acU, 0xe717e7adU, 0x461146aeU, 0xe213e2afU, + 0x752d75b0U, 0xd12fd1b1U, 0x702970b2U, 0xd42bd4b3U, 0x7f257fb4U, 0xdb27dbb5U, 0x7a217ab6U, 0xde23deb7U, + 0x613d61b8U, 0xc53fc5b9U, 0x643964baU, 0xc03bc0bbU, 0x6b356bbcU, 0xcf37cfbdU, 0x6e316ebeU, 0xca33cabfU, + 0xadcdadc0U, 0x09cf09c1U, 0xa8c9a8c2U, 0x0ccb0cc3U, 0xa7c5a7c4U, 0x03c703c5U, 0xa2c1a2c6U, 0x06c306c7U, + 0xb9ddb9c8U, 0x1ddf1dc9U, 0xbcd9bccaU, 0x18db18cbU, 0xb3d5b3ccU, 0x17d717cdU, 0xb6d1b6ceU, 0x12d312cfU, + 0x85ed85d0U, 0x21ef21d1U, 0x80e980d2U, 0x24eb24d3U, 0x8fe58fd4U, 0x2be72bd5U, 0x8ae18ad6U, 0x2ee32ed7U, + 0x91fd91d8U, 0x35ff35d9U, 0x94f994daU, 0x30fb30dbU, 0x9bf59bdcU, 0x3ff73fddU, 0x9ef19edeU, 0x3af33adfU, + 0xfd8dfde0U, 0x598f59e1U, 0xf889f8e2U, 0x5c8b5ce3U, 0xf785f7e4U, 0x538753e5U, 0xf281f2e6U, 0x568356e7U, + 0xe99de9e8U, 0x4d9f4de9U, 0xec99eceaU, 0x489b48ebU, 0xe395e3ecU, 0x479747edU, 0xe691e6eeU, 0x429342efU, + 0xd5add5f0U, 0x71af71f1U, 0xd0a9d0f2U, 0x74ab74f3U, 0xdfa5dff4U, 0x7ba77bf5U, 0xdaa1daf6U, 0x7ea37ef7U, + 0xc1bdc1f8U, 0x65bf65f9U, 0xc4b9c4faU, 0x60bb60fbU, 0xcbb5cbfcU, 0x6fb76ffdU, 0xceb1cefeU, 0x6ab36affU + }, + { + 0x00000000U, 0x55a156a4U, 0xaa0fac05U, 0xffaefaa1U, 0x191e150aU, 0x4cbf43aeU, 0xb311b90fU, 0xe6b0efabU, + 0x323c2a14U, 0x679d7cb0U, 0x98338611U, 0xcd92d0b5U, 0x2b223f1eU, 0x7e8369baU, 0x812d931bU, 0xd48cc5bfU, + 0x64785428U, 0x31d9028cU, 0xce77f82dU, 0x9bd6ae89U, 0x7d664122U, 0x28c71786U, 0xd769ed27U, 0x82c8bb83U, + 0x56447e3cU, 0x03e52898U, 0xfc4bd239U, 0xa9ea849dU, 0x4f5a6b36U, 0x1afb3d92U, 0xe555c733U, 0xb0f49197U, + 0xc8f0a850U, 0x9d51fef4U, 0x62ff0455U, 0x375e52f1U, 0xd1eebd5aU, 0x844febfeU, 0x7be1115fU, 0x2e4047fbU, + 0xfacc8244U, 0xaf6dd4e0U, 0x50c32e41U, 0x056278e5U, 0xe3d2974eU, 0xb673c1eaU, 0x49dd3b4bU, 0x1c7c6defU, + 0xac88fc78U, 0xf929aadcU, 0x0687507dU, 0x532606d9U, 0xb596e972U, 0xe037bfd6U, 0x1f994577U, 0x4a3813d3U, + 0x9eb4d66cU, 0xcb1580c8U, 0x34bb7a69U, 0x611a2ccdU, 0x87aac366U, 0xd20b95c2U, 0x2da56f63U, 0x780439c7U, + 0xddad1da0U, 0x880c4b04U, 0x77a2b1a5U, 0x2203e701U, 0xc4b308aaU, 0x91125e0eU, 0x6ebca4afU, 0x3b1df20bU, + 0xef9137b4U, 0xba306110U, 0x459e9bb1U, 0x103fcd15U, 0xf68f22beU, 0xa32e741aU, 0x5c808ebbU, 0x0921d81fU, + 0xb9d54988U, 0xec741f2cU, 0x13dae58dU, 0x467bb329U, 0xa0cb5c82U, 0xf56a0a26U, 0x0ac4f087U, 0x5f65a623U, + 0x8be9639cU, 0xde483538U, 0x21e6cf99U, 0x7447993dU, 0x92f77696U, 0xc7562032U, 0x38f8da93U, 0x6d598c37U, + 0x155db5f0U, 0x40fce354U, 0xbf5219f5U, 0xeaf34f51U, 0x0c43a0faU, 0x59e2f65eU, 0xa64c0cffU, 0xf3ed5a5bU, + 0x27619fe4U, 0x72c0c940U, 0x8d6e33e1U, 0xd8cf6545U, 0x3e7f8aeeU, 0x6bdedc4aU, 0x947026ebU, 0xc1d1704fU, + 0x7125e1d8U, 0x2484b77cU, 0xdb2a4dddU, 0x8e8b1b79U, 0x683bf4d2U, 0x3d9aa276U, 0xc23458d7U, 0x97950e73U, + 0x4319cbccU, 0x16b89d68U, 0xe91667c9U, 0xbcb7316dU, 0x5a07dec6U, 0x0fa68862U, 0xf00872c3U, 0xa5a92467U, + 0xf7173a0dU, 0xa2b66ca9U, 0x5d189608U, 0x08b9c0acU, 0xee092f07U, 0xbba879a3U, 0x44068302U, 0x11a7d5a6U, + 0xc52b1019U, 0x908a46bdU, 0x6f24bc1cU, 0x3a85eab8U, 0xdc350513U, 0x899453b7U, 0x763aa916U, 0x239bffb2U, + 0x936f6e25U, 0xc6ce3881U, 0x3960c220U, 0x6cc19484U, 0x8a717b2fU, 0xdfd02d8bU, 0x207ed72aU, 0x75df818eU, + 0xa1534431U, 0xf4f21295U, 0x0b5ce834U, 0x5efdbe90U, 0xb84d513bU, 0xedec079fU, 0x1242fd3eU, 0x47e3ab9aU, + 0x3fe7925dU, 0x6a46c4f9U, 0x95e83e58U, 0xc04968fcU, 0x26f98757U, 0x7358d1f3U, 0x8cf62b52U, 0xd9577df6U, + 0x0ddbb849U, 0x587aeeedU, 0xa7d4144cU, 0xf27542e8U, 0x14c5ad43U, 0x4164fbe7U, 0xbeca0146U, 0xeb6b57e2U, + 0x5b9fc675U, 0x0e3e90d1U, 0xf1906a70U, 0xa4313cd4U, 0x4281d37fU, 0x172085dbU, 0xe88e7f7aU, 0xbd2f29deU, + 0x69a3ec61U, 0x3c02bac5U, 0xc3ac4064U, 0x960d16c0U, 0x70bdf96bU, 0x251cafcfU, 0xdab2556eU, 0x8f1303caU, + 0x2aba27adU, 0x7f1b7109U, 0x80b58ba8U, 0xd514dd0cU, 0x33a432a7U, 0x66056403U, 0x99ab9ea2U, 0xcc0ac806U, + 0x18860db9U, 0x4d275b1dU, 0xb289a1bcU, 0xe728f718U, 0x019818b3U, 0x54394e17U, 0xab97b4b6U, 0xfe36e212U, + 0x4ec27385U, 0x1b632521U, 0xe4cddf80U, 0xb16c8924U, 0x57dc668fU, 0x027d302bU, 0xfdd3ca8aU, 0xa8729c2eU, + 0x7cfe5991U, 0x295f0f35U, 0xd6f1f594U, 0x8350a330U, 0x65e04c9bU, 0x30411a3fU, 0xcfefe09eU, 0x9a4eb63aU, + 0xe24a8ffdU, 0xb7ebd959U, 0x484523f8U, 0x1de4755cU, 0xfb549af7U, 0xaef5cc53U, 0x515b36f2U, 0x04fa6056U, + 0xd076a5e9U, 0x85d7f34dU, 0x7a7909ecU, 0x2fd85f48U, 0xc968b0e3U, 0x9cc9e647U, 0x63671ce6U, 0x36c64a42U, + 0x8632dbd5U, 0xd3938d71U, 0x2c3d77d0U, 0x799c2174U, 0x9f2ccedfU, 0xca8d987bU, 0x352362daU, 0x6082347eU, + 0xb40ef1c1U, 0xe1afa765U, 0x1e015dc4U, 0x4ba00b60U, 0xad10e4cbU, 0xf8b1b26fU, 0x071f48ceU, 0x52be1e6aU + }, + { + 0x00000000U, 0x87fc8255U, 0x43b549aaU, 0xc449cbffU, 0x86279219U, 0x01db104cU, 0xc592dbb3U, 0x426e59e6U, + 0x414e6932U, 0xc6b2eb67U, 0x02fb2098U, 0x8507a2cdU, 0xc769fb2bU, 0x4095797eU, 0x84dcb281U, 0x032030d4U, + 0x829cd264U, 0x05605031U, 0xc1299bceU, 0x46d5199bU, 0x04bb407dU, 0x8347c228U, 0x470e09d7U, 0xc0f28b82U, + 0xc3d2bb56U, 0x442e3903U, 0x8067f2fcU, 0x079b70a9U, 0x45f5294fU, 0xc209ab1aU, 0x064060e5U, 0x81bce2b0U, + 0x4975e9c8U, 0xce896b9dU, 0x0ac0a062U, 0x8d3c2237U, 0xcf527bd1U, 0x48aef984U, 0x8ce7327bU, 0x0b1bb02eU, + 0x083b80faU, 0x8fc702afU, 0x4b8ec950U, 0xcc724b05U, 0x8e1c12e3U, 0x09e090b6U, 0xcda95b49U, 0x4a55d91cU, + 0xcbe93bacU, 0x4c15b9f9U, 0x885c7206U, 0x0fa0f053U, 0x4dcea9b5U, 0xca322be0U, 0x0e7be01fU, 0x8987624aU, + 0x8aa7529eU, 0x0d5bd0cbU, 0xc9121b34U, 0x4eee9961U, 0x0c80c087U, 0x8b7c42d2U, 0x4f35892dU, 0xc8c90b78U, + 0x92ea9fddU, 0x15161d88U, 0xd15fd677U, 0x56a35422U, 0x14cd0dc4U, 0x93318f91U, 0x5778446eU, 0xd084c63bU, + 0xd3a4f6efU, 0x545874baU, 0x9011bf45U, 0x17ed3d10U, 0x558364f6U, 0xd27fe6a3U, 0x16362d5cU, 0x91caaf09U, + 0x10764db9U, 0x978acfecU, 0x53c30413U, 0xd43f8646U, 0x9651dfa0U, 0x11ad5df5U, 0xd5e4960aU, 0x5218145fU, + 0x5138248bU, 0xd6c4a6deU, 0x128d6d21U, 0x9571ef74U, 0xd71fb692U, 0x50e334c7U, 0x94aaff38U, 0x13567d6dU, + 0xdb9f7615U, 0x5c63f440U, 0x982a3fbfU, 0x1fd6bdeaU, 0x5db8e40cU, 0xda446659U, 0x1e0dada6U, 0x99f12ff3U, + 0x9ad11f27U, 0x1d2d9d72U, 0xd964568dU, 0x5e98d4d8U, 0x1cf68d3eU, 0x9b0a0f6bU, 0x5f43c494U, 0xd8bf46c1U, + 0x5903a471U, 0xdeff2624U, 0x1ab6eddbU, 0x9d4a6f8eU, 0xdf243668U, 0x58d8b43dU, 0x9c917fc2U, 0x1b6dfd97U, + 0x184dcd43U, 0x9fb14f16U, 0x5bf884e9U, 0xdc0406bcU, 0x9e6a5f5aU, 0x1996dd0fU, 0xdddf16f0U, 0x5a2394a5U, + 0x699973f7U, 0xee65f1a2U, 0x2a2c3a5dU, 0xadd0b808U, 0xefbee1eeU, 0x684263bbU, 0xac0ba844U, 0x2bf72a11U, + 0x28d71ac5U, 0xaf2b9890U, 0x6b62536fU, 0xec9ed13aU, 0xaef088dcU, 0x290c0a89U, 0xed45c176U, 0x6ab94323U, + 0xeb05a193U, 0x6cf923c6U, 0xa8b0e839U, 0x2f4c6a6cU, 0x6d22338aU, 0xeadeb1dfU, 0x2e977a20U, 0xa96bf875U, + 0xaa4bc8a1U, 0x2db74af4U, 0xe9fe810bU, 0x6e02035eU, 0x2c6c5ab8U, 0xab90d8edU, 0x6fd91312U, 0xe8259147U, + 0x20ec9a3fU, 0xa710186aU, 0x6359d395U, 0xe4a551c0U, 0xa6cb0826U, 0x21378a73U, 0xe57e418cU, 0x6282c3d9U, + 0x61a2f30dU, 0xe65e7158U, 0x2217baa7U, 0xa5eb38f2U, 0xe7856114U, 0x6079e341U, 0xa43028beU, 0x23ccaaebU, + 0xa270485bU, 0x258cca0eU, 0xe1c501f1U, 0x663983a4U, 0x2457da42U, 0xa3ab5817U, 0x67e293e8U, 0xe01e11bdU, + 0xe33e2169U, 0x64c2a33cU, 0xa08b68c3U, 0x2777ea96U, 0x6519b370U, 0xe2e53125U, 0x26acfadaU, 0xa150788fU, + 0xfb73ec2aU, 0x7c8f6e7fU, 0xb8c6a580U, 0x3f3a27d5U, 0x7d547e33U, 0xfaa8fc66U, 0x3ee13799U, 0xb91db5ccU, + 0xba3d8518U, 0x3dc1074dU, 0xf988ccb2U, 0x7e744ee7U, 0x3c1a1701U, 0xbbe69554U, 0x7faf5eabU, 0xf853dcfeU, + 0x79ef3e4eU, 0xfe13bc1bU, 0x3a5a77e4U, 0xbda6f5b1U, 0xffc8ac57U, 0x78342e02U, 0xbc7de5fdU, 0x3b8167a8U, + 0x38a1577cU, 0xbf5dd529U, 0x7b141ed6U, 0xfce89c83U, 0xbe86c565U, 0x397a4730U, 0xfd338ccfU, 0x7acf0e9aU, + 0xb20605e2U, 0x35fa87b7U, 0xf1b34c48U, 0x764fce1dU, 0x342197fbU, 0xb3dd15aeU, 0x7794de51U, 0xf0685c04U, + 0xf3486cd0U, 0x74b4ee85U, 0xb0fd257aU, 0x3701a72fU, 0x756ffec9U, 0xf2937c9cU, 0x36dab763U, 0xb1263536U, + 0x309ad786U, 0xb76655d3U, 0x732f9e2cU, 0xf4d31c79U, 0xb6bd459fU, 0x3141c7caU, 0xf5080c35U, 0x72f48e60U, + 0x71d4beb4U, 0xf6283ce1U, 0x3261f71eU, 0xb59d754bU, 0xf7f32cadU, 0x700faef8U, 0xb4466507U, 0x33bae752U + }, + { + 0x00000000U, 0x5ac1f387U, 0xb4cfab43U, 0xee0e58c4U, 0x25d31b86U, 0x7f12e801U, 0x911cb0c5U, 0xcbdd4342U, + 0x4aeb3641U, 0x102ac5c6U, 0xfe249d02U, 0xa4e56e85U, 0x6f382dc7U, 0x35f9de40U, 0xdbf78684U, 0x81367503U, + 0x949b6c82U, 0xce5a9f05U, 0x2054c7c1U, 0x7a953446U, 0xb1487704U, 0xeb898483U, 0x0587dc47U, 0x5f462fc0U, + 0xde705ac3U, 0x84b1a944U, 0x6abff180U, 0x307e0207U, 0xfba34145U, 0xa162b2c2U, 0x4f6cea06U, 0x15ad1981U, + 0x657bd849U, 0x3fba2bceU, 0xd1b4730aU, 0x8b75808dU, 0x40a8c3cfU, 0x1a693048U, 0xf467688cU, 0xaea69b0bU, + 0x2f90ee08U, 0x75511d8fU, 0x9b5f454bU, 0xc19eb6ccU, 0x0a43f58eU, 0x50820609U, 0xbe8c5ecdU, 0xe44dad4aU, + 0xf1e0b4cbU, 0xab21474cU, 0x452f1f88U, 0x1feeec0fU, 0xd433af4dU, 0x8ef25ccaU, 0x60fc040eU, 0x3a3df789U, + 0xbb0b828aU, 0xe1ca710dU, 0x0fc429c9U, 0x5505da4eU, 0x9ed8990cU, 0xc4196a8bU, 0x2a17324fU, 0x70d6c1c8U, + 0xcaf6fd92U, 0x90370e15U, 0x7e3956d1U, 0x24f8a556U, 0xef25e614U, 0xb5e41593U, 0x5bea4d57U, 0x012bbed0U, + 0x801dcbd3U, 0xdadc3854U, 0x34d26090U, 0x6e139317U, 0xa5ced055U, 0xff0f23d2U, 0x11017b16U, 0x4bc08891U, + 0x5e6d9110U, 0x04ac6297U, 0xeaa23a53U, 0xb063c9d4U, 0x7bbe8a96U, 0x217f7911U, 0xcf7121d5U, 0x95b0d252U, + 0x1486a751U, 0x4e4754d6U, 0xa0490c12U, 0xfa88ff95U, 0x3155bcd7U, 0x6b944f50U, 0x859a1794U, 0xdf5be413U, + 0xaf8d25dbU, 0xf54cd65cU, 0x1b428e98U, 0x41837d1fU, 0x8a5e3e5dU, 0xd09fcddaU, 0x3e91951eU, 0x64506699U, + 0xe566139aU, 0xbfa7e01dU, 0x51a9b8d9U, 0x0b684b5eU, 0xc0b5081cU, 0x9a74fb9bU, 0x747aa35fU, 0x2ebb50d8U, + 0x3b164959U, 0x61d7badeU, 0x8fd9e21aU, 0xd518119dU, 0x1ec552dfU, 0x4404a158U, 0xaa0af99cU, 0xf0cb0a1bU, + 0x71fd7f18U, 0x2b3c8c9fU, 0xc532d45bU, 0x9ff327dcU, 0x542e649eU, 0x0eef9719U, 0xe0e1cfddU, 0xba203c5aU, + 0xd9a1b769U, 0x836044eeU, 0x6d6e1c2aU, 0x37afefadU, 0xfc72acefU, 0xa6b35f68U, 0x48bd07acU, 0x127cf42bU, + 0x934a8128U, 0xc98b72afU, 0x27852a6bU, 0x7d44d9ecU, 0xb6999aaeU, 0xec586929U, 0x025631edU, 0x5897c26aU, + 0x4d3adbebU, 0x17fb286cU, 0xf9f570a8U, 0xa334832fU, 0x68e9c06dU, 0x322833eaU, 0xdc266b2eU, 0x86e798a9U, + 0x07d1edaaU, 0x5d101e2dU, 0xb31e46e9U, 0xe9dfb56eU, 0x2202f62cU, 0x78c305abU, 0x96cd5d6fU, 0xcc0caee8U, + 0xbcda6f20U, 0xe61b9ca7U, 0x0815c463U, 0x52d437e4U, 0x990974a6U, 0xc3c88721U, 0x2dc6dfe5U, 0x77072c62U, + 0xf6315961U, 0xacf0aae6U, 0x42fef222U, 0x183f01a5U, 0xd3e242e7U, 0x8923b160U, 0x672de9a4U, 0x3dec1a23U, + 0x284103a2U, 0x7280f025U, 0x9c8ea8e1U, 0xc64f5b66U, 0x0d921824U, 0x5753eba3U, 0xb95db367U, 0xe39c40e0U, + 0x62aa35e3U, 0x386bc664U, 0xd6659ea0U, 0x8ca46d27U, 0x47792e65U, 0x1db8dde2U, 0xf3b68526U, 0xa97776a1U, + 0x13574afbU, 0x4996b97cU, 0xa798e1b8U, 0xfd59123fU, 0x3684517dU, 0x6c45a2faU, 0x824bfa3eU, 0xd88a09b9U, + 0x59bc7cbaU, 0x037d8f3dU, 0xed73d7f9U, 0xb7b2247eU, 0x7c6f673cU, 0x26ae94bbU, 0xc8a0cc7fU, 0x92613ff8U, + 0x87cc2679U, 0xdd0dd5feU, 0x33038d3aU, 0x69c27ebdU, 0xa21f3dffU, 0xf8dece78U, 0x16d096bcU, 0x4c11653bU, + 0xcd271038U, 0x97e6e3bfU, 0x79e8bb7bU, 0x232948fcU, 0xe8f40bbeU, 0xb235f839U, 0x5c3ba0fdU, 0x06fa537aU, + 0x762c92b2U, 0x2ced6135U, 0xc2e339f1U, 0x9822ca76U, 0x53ff8934U, 0x093e7ab3U, 0xe7302277U, 0xbdf1d1f0U, + 0x3cc7a4f3U, 0x66065774U, 0x88080fb0U, 0xd2c9fc37U, 0x1914bf75U, 0x43d54cf2U, 0xaddb1436U, 0xf71ae7b1U, + 0xe2b7fe30U, 0xb8760db7U, 0x56785573U, 0x0cb9a6f4U, 0xc764e5b6U, 0x9da51631U, 0x73ab4ef5U, 0x296abd72U, + 0xa85cc871U, 0xf29d3bf6U, 0x1c936332U, 0x465290b5U, 0x8d8fd3f7U, 0xd74e2070U, 0x394078b4U, 0x63818b33U + }, + { + 0x00000000U, 0x58471e5aU, 0xb08e3cb4U, 0xe8c922eeU, 0x2d517825U, 0x7516667fU, 0x9ddf4491U, 0xc5985acbU, + 0x5aa2f04aU, 0x02e5ee10U, 0xea2cccfeU, 0xb26bd2a4U, 0x77f3886fU, 0x2fb49635U, 0xc77db4dbU, 0x9f3aaa81U, + 0xb409ad94U, 0xec4eb3ceU, 0x04879120U, 0x5cc08f7aU, 0x9958d5b1U, 0xc11fcbebU, 0x29d6e905U, 0x7191f75fU, + 0xeeab5ddeU, 0xb6ec4384U, 0x5e25616aU, 0x06627f30U, 0xc3fa25fbU, 0x9bbd3ba1U, 0x7374194fU, 0x2b330715U, + 0x25121765U, 0x7d55093fU, 0x959c2bd1U, 0xcddb358bU, 0x08436f40U, 0x5004711aU, 0xb8cd53f4U, 0xe08a4daeU, + 0x7fb0e72fU, 0x27f7f975U, 0xcf3edb9bU, 0x9779c5c1U, 0x52e19f0aU, 0x0aa68150U, 0xe26fa3beU, 0xba28bde4U, + 0x911bbaf1U, 0xc95ca4abU, 0x21958645U, 0x79d2981fU, 0xbc4ac2d4U, 0xe40ddc8eU, 0x0cc4fe60U, 0x5483e03aU, + 0xcbb94abbU, 0x93fe54e1U, 0x7b37760fU, 0x23706855U, 0xe6e8329eU, 0xbeaf2cc4U, 0x56660e2aU, 0x0e211070U, + 0x4a242ecaU, 0x12633090U, 0xfaaa127eU, 0xa2ed0c24U, 0x677556efU, 0x3f3248b5U, 0xd7fb6a5bU, 0x8fbc7401U, + 0x1086de80U, 0x48c1c0daU, 0xa008e234U, 0xf84ffc6eU, 0x3dd7a6a5U, 0x6590b8ffU, 0x8d599a11U, 0xd51e844bU, + 0xfe2d835eU, 0xa66a9d04U, 0x4ea3bfeaU, 0x16e4a1b0U, 0xd37cfb7bU, 0x8b3be521U, 0x63f2c7cfU, 0x3bb5d995U, + 0xa48f7314U, 0xfcc86d4eU, 0x14014fa0U, 0x4c4651faU, 0x89de0b31U, 0xd199156bU, 0x39503785U, 0x611729dfU, + 0x6f3639afU, 0x377127f5U, 0xdfb8051bU, 0x87ff1b41U, 0x4267418aU, 0x1a205fd0U, 0xf2e97d3eU, 0xaaae6364U, + 0x3594c9e5U, 0x6dd3d7bfU, 0x851af551U, 0xdd5deb0bU, 0x18c5b1c0U, 0x4082af9aU, 0xa84b8d74U, 0xf00c932eU, + 0xdb3f943bU, 0x83788a61U, 0x6bb1a88fU, 0x33f6b6d5U, 0xf66eec1eU, 0xae29f244U, 0x46e0d0aaU, 0x1ea7cef0U, + 0x819d6471U, 0xd9da7a2bU, 0x311358c5U, 0x6954469fU, 0xaccc1c54U, 0xf48b020eU, 0x1c4220e0U, 0x44053ebaU, + 0x94485cd9U, 0xcc0f4283U, 0x24c6606dU, 0x7c817e37U, 0xb91924fcU, 0xe15e3aa6U, 0x09971848U, 0x51d00612U, + 0xceeaac93U, 0x96adb2c9U, 0x7e649027U, 0x26238e7dU, 0xe3bbd4b6U, 0xbbfccaecU, 0x5335e802U, 0x0b72f658U, + 0x2041f14dU, 0x7806ef17U, 0x90cfcdf9U, 0xc888d3a3U, 0x0d108968U, 0x55579732U, 0xbd9eb5dcU, 0xe5d9ab86U, + 0x7ae30107U, 0x22a41f5dU, 0xca6d3db3U, 0x922a23e9U, 0x57b27922U, 0x0ff56778U, 0xe73c4596U, 0xbf7b5bccU, + 0xb15a4bbcU, 0xe91d55e6U, 0x01d47708U, 0x59936952U, 0x9c0b3399U, 0xc44c2dc3U, 0x2c850f2dU, 0x74c21177U, + 0xebf8bbf6U, 0xb3bfa5acU, 0x5b768742U, 0x03319918U, 0xc6a9c3d3U, 0x9eeedd89U, 0x7627ff67U, 0x2e60e13dU, + 0x0553e628U, 0x5d14f872U, 0xb5ddda9cU, 0xed9ac4c6U, 0x28029e0dU, 0x70458057U, 0x988ca2b9U, 0xc0cbbce3U, + 0x5ff11662U, 0x07b60838U, 0xef7f2ad6U, 0xb738348cU, 0x72a06e47U, 0x2ae7701dU, 0xc22e52f3U, 0x9a694ca9U, + 0xde6c7213U, 0x862b6c49U, 0x6ee24ea7U, 0x36a550fdU, 0xf33d0a36U, 0xab7a146cU, 0x43b33682U, 0x1bf428d8U, + 0x84ce8259U, 0xdc899c03U, 0x3440beedU, 0x6c07a0b7U, 0xa99ffa7cU, 0xf1d8e426U, 0x1911c6c8U, 0x4156d892U, + 0x6a65df87U, 0x3222c1ddU, 0xdaebe333U, 0x82acfd69U, 0x4734a7a2U, 0x1f73b9f8U, 0xf7ba9b16U, 0xaffd854cU, + 0x30c72fcdU, 0x68803197U, 0x80491379U, 0xd80e0d23U, 0x1d9657e8U, 0x45d149b2U, 0xad186b5cU, 0xf55f7506U, + 0xfb7e6576U, 0xa3397b2cU, 0x4bf059c2U, 0x13b74798U, 0xd62f1d53U, 0x8e680309U, 0x66a121e7U, 0x3ee63fbdU, + 0xa1dc953cU, 0xf99b8b66U, 0x1152a988U, 0x4915b7d2U, 0x8c8ded19U, 0xd4caf343U, 0x3c03d1adU, 0x6444cff7U, + 0x4f77c8e2U, 0x1730d6b8U, 0xfff9f456U, 0xa7beea0cU, 0x6226b0c7U, 0x3a61ae9dU, 0xd2a88c73U, 0x8aef9229U, + 0x15d538a8U, 0x4d9226f2U, 0xa55b041cU, 0xfd1c1a46U, 0x3884408dU, 0x60c35ed7U, 0x880a7c39U, 0xd04d6263U + }, + { + 0x00000000U, 0xdbaec658U, 0xfb11c1b0U, 0x20bf07e8U, 0xbb22cf2dU, 0x608c0975U, 0x40330e9dU, 0x9b9dc8c5U, + 0x3b44d35aU, 0xe0ea1502U, 0xc05512eaU, 0x1bfbd4b2U, 0x80661c77U, 0x5bc8da2fU, 0x7b77ddc7U, 0xa0d91b9fU, + 0x7688ebb4U, 0xad262decU, 0x8d992a04U, 0x5637ec5cU, 0xcdaa2499U, 0x1604e2c1U, 0x36bbe529U, 0xed152371U, + 0x4dcc38eeU, 0x9662feb6U, 0xb6ddf95eU, 0x6d733f06U, 0xf6eef7c3U, 0x2d40319bU, 0x0dff3673U, 0xd651f02bU, + 0xec5d9b25U, 0x37f35d7dU, 0x174c5a95U, 0xcce29ccdU, 0x577f5408U, 0x8cd19250U, 0xac6e95b8U, 0x77c053e0U, + 0xd719487fU, 0x0cb78e27U, 0x2c0889cfU, 0xf7a64f97U, 0x6c3b8752U, 0xb795410aU, 0x972a46e2U, 0x4c8480baU, + 0x9ad57091U, 0x417bb6c9U, 0x61c4b121U, 0xba6a7779U, 0x21f7bfbcU, 0xfa5979e4U, 0xdae67e0cU, 0x0148b854U, + 0xa191a3cbU, 0x7a3f6593U, 0x5a80627bU, 0x812ea423U, 0x1ab36ce6U, 0xc11daabeU, 0xe1a2ad56U, 0x3a0c6b0eU, + 0x95ba7b4aU, 0x4e14bd12U, 0x6eabbafaU, 0xb5057ca2U, 0x2e98b467U, 0xf536723fU, 0xd58975d7U, 0x0e27b38fU, + 0xaefea810U, 0x75506e48U, 0x55ef69a0U, 0x8e41aff8U, 0x15dc673dU, 0xce72a165U, 0xeecda68dU, 0x356360d5U, + 0xe33290feU, 0x389c56a6U, 0x1823514eU, 0xc38d9716U, 0x58105fd3U, 0x83be998bU, 0xa3019e63U, 0x78af583bU, + 0xd87643a4U, 0x03d885fcU, 0x23678214U, 0xf8c9444cU, 0x63548c89U, 0xb8fa4ad1U, 0x98454d39U, 0x43eb8b61U, + 0x79e7e06fU, 0xa2492637U, 0x82f621dfU, 0x5958e787U, 0xc2c52f42U, 0x196be91aU, 0x39d4eef2U, 0xe27a28aaU, + 0x42a33335U, 0x990df56dU, 0xb9b2f285U, 0x621c34ddU, 0xf981fc18U, 0x222f3a40U, 0x02903da8U, 0xd93efbf0U, + 0x0f6f0bdbU, 0xd4c1cd83U, 0xf47eca6bU, 0x2fd00c33U, 0xb44dc4f6U, 0x6fe302aeU, 0x4f5c0546U, 0x94f2c31eU, + 0x342bd881U, 0xef851ed9U, 0xcf3a1931U, 0x1494df69U, 0x8f0917acU, 0x54a7d1f4U, 0x7418d61cU, 0xafb61044U, + 0x6739f694U, 0xbc9730ccU, 0x9c283724U, 0x4786f17cU, 0xdc1b39b9U, 0x07b5ffe1U, 0x270af809U, 0xfca43e51U, + 0x5c7d25ceU, 0x87d3e396U, 0xa76ce47eU, 0x7cc22226U, 0xe75feae3U, 0x3cf12cbbU, 0x1c4e2b53U, 0xc7e0ed0bU, + 0x11b11d20U, 0xca1fdb78U, 0xeaa0dc90U, 0x310e1ac8U, 0xaa93d20dU, 0x713d1455U, 0x518213bdU, 0x8a2cd5e5U, + 0x2af5ce7aU, 0xf15b0822U, 0xd1e40fcaU, 0x0a4ac992U, 0x91d70157U, 0x4a79c70fU, 0x6ac6c0e7U, 0xb16806bfU, + 0x8b646db1U, 0x50caabe9U, 0x7075ac01U, 0xabdb6a59U, 0x3046a29cU, 0xebe864c4U, 0xcb57632cU, 0x10f9a574U, + 0xb020beebU, 0x6b8e78b3U, 0x4b317f5bU, 0x909fb903U, 0x0b0271c6U, 0xd0acb79eU, 0xf013b076U, 0x2bbd762eU, + 0xfdec8605U, 0x2642405dU, 0x06fd47b5U, 0xdd5381edU, 0x46ce4928U, 0x9d608f70U, 0xbddf8898U, 0x66714ec0U, + 0xc6a8555fU, 0x1d069307U, 0x3db994efU, 0xe61752b7U, 0x7d8a9a72U, 0xa6245c2aU, 0x869b5bc2U, 0x5d359d9aU, + 0xf2838ddeU, 0x292d4b86U, 0x09924c6eU, 0xd23c8a36U, 0x49a142f3U, 0x920f84abU, 0xb2b08343U, 0x691e451bU, + 0xc9c75e84U, 0x126998dcU, 0x32d69f34U, 0xe978596cU, 0x72e591a9U, 0xa94b57f1U, 0x89f45019U, 0x525a9641U, + 0x840b666aU, 0x5fa5a032U, 0x7f1aa7daU, 0xa4b46182U, 0x3f29a947U, 0xe4876f1fU, 0xc43868f7U, 0x1f96aeafU, + 0xbf4fb530U, 0x64e17368U, 0x445e7480U, 0x9ff0b2d8U, 0x046d7a1dU, 0xdfc3bc45U, 0xff7cbbadU, 0x24d27df5U, + 0x1ede16fbU, 0xc570d0a3U, 0xe5cfd74bU, 0x3e611113U, 0xa5fcd9d6U, 0x7e521f8eU, 0x5eed1866U, 0x8543de3eU, + 0x259ac5a1U, 0xfe3403f9U, 0xde8b0411U, 0x0525c249U, 0x9eb80a8cU, 0x4516ccd4U, 0x65a9cb3cU, 0xbe070d64U, + 0x6856fd4fU, 0xb3f83b17U, 0x93473cffU, 0x48e9faa7U, 0xd3743262U, 0x08daf43aU, 0x2865f3d2U, 0xf3cb358aU, + 0x53122e15U, 0x88bce84dU, 0xa803efa5U, 0x73ad29fdU, 0xe830e138U, 0x339e2760U, 0x13212088U, 0xc88fe6d0U + }, + { + 0x00000000U, 0x9e3d68dbU, 0x717ad0fbU, 0xef47b820U, 0xe2f4edbbU, 0x7cc98560U, 0x938e3d40U, 0x0db3559bU, + 0x89a5973bU, 0x1798ffe0U, 0xf8df47c0U, 0x66e22f1bU, 0x6b517a80U, 0xf56c125bU, 0x1a2baa7bU, 0x8416c2a0U, + 0x5f076376U, 0xc13a0badU, 0x2e7db38dU, 0xb040db56U, 0xbdf38ecdU, 0x23cee616U, 0xcc895e36U, 0x52b436edU, + 0xd6a2f44dU, 0x489f9c96U, 0xa7d824b6U, 0x39e54c6dU, 0x345619f6U, 0xaa6b712dU, 0x452cc90dU, 0xdb11a1d6U, + 0xbe0ec6ecU, 0x2033ae37U, 0xcf741617U, 0x51497eccU, 0x5cfa2b57U, 0xc2c7438cU, 0x2d80fbacU, 0xb3bd9377U, + 0x37ab51d7U, 0xa996390cU, 0x46d1812cU, 0xd8ece9f7U, 0xd55fbc6cU, 0x4b62d4b7U, 0xa4256c97U, 0x3a18044cU, + 0xe109a59aU, 0x7f34cd41U, 0x90737561U, 0x0e4e1dbaU, 0x03fd4821U, 0x9dc020faU, 0x728798daU, 0xecbaf001U, + 0x68ac32a1U, 0xf6915a7aU, 0x19d6e25aU, 0x87eb8a81U, 0x8a58df1aU, 0x1465b7c1U, 0xfb220fe1U, 0x651f673aU, + 0x311cc195U, 0xaf21a94eU, 0x4066116eU, 0xde5b79b5U, 0xd3e82c2eU, 0x4dd544f5U, 0xa292fcd5U, 0x3caf940eU, + 0xb8b956aeU, 0x26843e75U, 0xc9c38655U, 0x57feee8eU, 0x5a4dbb15U, 0xc470d3ceU, 0x2b376beeU, 0xb50a0335U, + 0x6e1ba2e3U, 0xf026ca38U, 0x1f617218U, 0x815c1ac3U, 0x8cef4f58U, 0x12d22783U, 0xfd959fa3U, 0x63a8f778U, + 0xe7be35d8U, 0x79835d03U, 0x96c4e523U, 0x08f98df8U, 0x054ad863U, 0x9b77b0b8U, 0x74300898U, 0xea0d6043U, + 0x8f120779U, 0x112f6fa2U, 0xfe68d782U, 0x6055bf59U, 0x6de6eac2U, 0xf3db8219U, 0x1c9c3a39U, 0x82a152e2U, + 0x06b79042U, 0x988af899U, 0x77cd40b9U, 0xe9f02862U, 0xe4437df9U, 0x7a7e1522U, 0x9539ad02U, 0x0b04c5d9U, + 0xd015640fU, 0x4e280cd4U, 0xa16fb4f4U, 0x3f52dc2fU, 0x32e189b4U, 0xacdce16fU, 0x439b594fU, 0xdda63194U, + 0x59b0f334U, 0xc78d9befU, 0x28ca23cfU, 0xb6f74b14U, 0xbb441e8fU, 0x25797654U, 0xca3ece74U, 0x5403a6afU, + 0x6238cf67U, 0xfc05a7bcU, 0x13421f9cU, 0x8d7f7747U, 0x80cc22dcU, 0x1ef14a07U, 0xf1b6f227U, 0x6f8b9afcU, + 0xeb9d585cU, 0x75a03087U, 0x9ae788a7U, 0x04dae07cU, 0x0969b5e7U, 0x9754dd3cU, 0x7813651cU, 0xe62e0dc7U, + 0x3d3fac11U, 0xa302c4caU, 0x4c457ceaU, 0xd2781431U, 0xdfcb41aaU, 0x41f62971U, 0xaeb19151U, 0x308cf98aU, + 0xb49a3b2aU, 0x2aa753f1U, 0xc5e0ebd1U, 0x5bdd830aU, 0x566ed691U, 0xc853be4aU, 0x2714066aU, 0xb9296eb1U, + 0xdc36098bU, 0x420b6150U, 0xad4cd970U, 0x3371b1abU, 0x3ec2e430U, 0xa0ff8cebU, 0x4fb834cbU, 0xd1855c10U, + 0x55939eb0U, 0xcbaef66bU, 0x24e94e4bU, 0xbad42690U, 0xb767730bU, 0x295a1bd0U, 0xc61da3f0U, 0x5820cb2bU, + 0x83316afdU, 0x1d0c0226U, 0xf24bba06U, 0x6c76d2ddU, 0x61c58746U, 0xfff8ef9dU, 0x10bf57bdU, 0x8e823f66U, + 0x0a94fdc6U, 0x94a9951dU, 0x7bee2d3dU, 0xe5d345e6U, 0xe860107dU, 0x765d78a6U, 0x991ac086U, 0x0727a85dU, + 0x53240ef2U, 0xcd196629U, 0x225ede09U, 0xbc63b6d2U, 0xb1d0e349U, 0x2fed8b92U, 0xc0aa33b2U, 0x5e975b69U, + 0xda8199c9U, 0x44bcf112U, 0xabfb4932U, 0x35c621e9U, 0x38757472U, 0xa6481ca9U, 0x490fa489U, 0xd732cc52U, + 0x0c236d84U, 0x921e055fU, 0x7d59bd7fU, 0xe364d5a4U, 0xeed7803fU, 0x70eae8e4U, 0x9fad50c4U, 0x0190381fU, + 0x8586fabfU, 0x1bbb9264U, 0xf4fc2a44U, 0x6ac1429fU, 0x67721704U, 0xf94f7fdfU, 0x1608c7ffU, 0x8835af24U, + 0xed2ac81eU, 0x7317a0c5U, 0x9c5018e5U, 0x026d703eU, 0x0fde25a5U, 0x91e34d7eU, 0x7ea4f55eU, 0xe0999d85U, + 0x648f5f25U, 0xfab237feU, 0x15f58fdeU, 0x8bc8e705U, 0x867bb29eU, 0x1846da45U, 0xf7016265U, 0x693c0abeU, + 0xb22dab68U, 0x2c10c3b3U, 0xc3577b93U, 0x5d6a1348U, 0x50d946d3U, 0xcee42e08U, 0x21a39628U, 0xbf9efef3U, + 0x3b883c53U, 0xa5b55488U, 0x4af2eca8U, 0xd4cf8473U, 0xd97cd1e8U, 0x4741b933U, 0xa8060113U, 0x363b69c8U + }, + { + 0x00000000U, 0x0319e59eU, 0x06328771U, 0x052b62efU, 0x0c6443e2U, 0x0f7da67cU, 0x0a56c493U, 0x094f210dU, + 0x18c88689U, 0x1bd16317U, 0x1efa01f8U, 0x1de3e466U, 0x14acc56bU, 0x17b520f5U, 0x129e421aU, 0x1187a784U, + 0x30dd415fU, 0x33c4a4c1U, 0x36efc62eU, 0x35f623b0U, 0x3cb902bdU, 0x3fa0e723U, 0x3a8b85ccU, 0x39926052U, + 0x2815c7d6U, 0x2b0c2248U, 0x2e2740a7U, 0x2d3ea539U, 0x24718434U, 0x276861aaU, 0x22430345U, 0x215ae6dbU, + 0x60f782beU, 0x63ee6720U, 0x66c505cfU, 0x65dce051U, 0x6c93c15cU, 0x6f8a24c2U, 0x6aa1462dU, 0x69b8a3b3U, + 0x783f0437U, 0x7b26e1a9U, 0x7e0d8346U, 0x7d1466d8U, 0x745b47d5U, 0x7742a24bU, 0x7269c0a4U, 0x7170253aU, + 0x502ac3e1U, 0x5333267fU, 0x56184490U, 0x5501a10eU, 0x5c4e8003U, 0x5f57659dU, 0x5a7c0772U, 0x5965e2ecU, + 0x48e24568U, 0x4bfba0f6U, 0x4ed0c219U, 0x4dc92787U, 0x4486068aU, 0x479fe314U, 0x42b481fbU, 0x41ad6465U, + 0xc0a34931U, 0xc3baacafU, 0xc691ce40U, 0xc5882bdeU, 0xccc70ad3U, 0xcfdeef4dU, 0xcaf58da2U, 0xc9ec683cU, + 0xd86bcfb8U, 0xdb722a26U, 0xde5948c9U, 0xdd40ad57U, 0xd40f8c5aU, 0xd71669c4U, 0xd23d0b2bU, 0xd124eeb5U, + 0xf07e086eU, 0xf367edf0U, 0xf64c8f1fU, 0xf5556a81U, 0xfc1a4b8cU, 0xff03ae12U, 0xfa28ccfdU, 0xf9312963U, + 0xe8b68ee7U, 0xebaf6b79U, 0xee840996U, 0xed9dec08U, 0xe4d2cd05U, 0xe7cb289bU, 0xe2e04a74U, 0xe1f9afeaU, + 0xa054cb8fU, 0xa34d2e11U, 0xa6664cfeU, 0xa57fa960U, 0xac30886dU, 0xaf296df3U, 0xaa020f1cU, 0xa91bea82U, + 0xb89c4d06U, 0xbb85a898U, 0xbeaeca77U, 0xbdb72fe9U, 0xb4f80ee4U, 0xb7e1eb7aU, 0xb2ca8995U, 0xb1d36c0bU, + 0x90898ad0U, 0x93906f4eU, 0x96bb0da1U, 0x95a2e83fU, 0x9cedc932U, 0x9ff42cacU, 0x9adf4e43U, 0x99c6abddU, + 0x88410c59U, 0x8b58e9c7U, 0x8e738b28U, 0x8d6a6eb6U, 0x84254fbbU, 0x873caa25U, 0x8217c8caU, 0x810e2d54U, + 0xcd0b9262U, 0xce1277fcU, 0xcb391513U, 0xc820f08dU, 0xc16fd180U, 0xc276341eU, 0xc75d56f1U, 0xc444b36fU, + 0xd5c314ebU, 0xd6daf175U, 0xd3f1939aU, 0xd0e87604U, 0xd9a75709U, 0xdabeb297U, 0xdf95d078U, 0xdc8c35e6U, + 0xfdd6d33dU, 0xfecf36a3U, 0xfbe4544cU, 0xf8fdb1d2U, 0xf1b290dfU, 0xf2ab7541U, 0xf78017aeU, 0xf499f230U, + 0xe51e55b4U, 0xe607b02aU, 0xe32cd2c5U, 0xe035375bU, 0xe97a1656U, 0xea63f3c8U, 0xef489127U, 0xec5174b9U, + 0xadfc10dcU, 0xaee5f542U, 0xabce97adU, 0xa8d77233U, 0xa198533eU, 0xa281b6a0U, 0xa7aad44fU, 0xa4b331d1U, + 0xb5349655U, 0xb62d73cbU, 0xb3061124U, 0xb01ff4baU, 0xb950d5b7U, 0xba493029U, 0xbf6252c6U, 0xbc7bb758U, + 0x9d215183U, 0x9e38b41dU, 0x9b13d6f2U, 0x980a336cU, 0x91451261U, 0x925cf7ffU, 0x97779510U, 0x946e708eU, + 0x85e9d70aU, 0x86f03294U, 0x83db507bU, 0x80c2b5e5U, 0x898d94e8U, 0x8a947176U, 0x8fbf1399U, 0x8ca6f607U, + 0x0da8db53U, 0x0eb13ecdU, 0x0b9a5c22U, 0x0883b9bcU, 0x01cc98b1U, 0x02d57d2fU, 0x07fe1fc0U, 0x04e7fa5eU, + 0x15605ddaU, 0x1679b844U, 0x1352daabU, 0x104b3f35U, 0x19041e38U, 0x1a1dfba6U, 0x1f369949U, 0x1c2f7cd7U, + 0x3d759a0cU, 0x3e6c7f92U, 0x3b471d7dU, 0x385ef8e3U, 0x3111d9eeU, 0x32083c70U, 0x37235e9fU, 0x343abb01U, + 0x25bd1c85U, 0x26a4f91bU, 0x238f9bf4U, 0x20967e6aU, 0x29d95f67U, 0x2ac0baf9U, 0x2febd816U, 0x2cf23d88U, + 0x6d5f59edU, 0x6e46bc73U, 0x6b6dde9cU, 0x68743b02U, 0x613b1a0fU, 0x6222ff91U, 0x67099d7eU, 0x641078e0U, + 0x7597df64U, 0x768e3afaU, 0x73a55815U, 0x70bcbd8bU, 0x79f39c86U, 0x7aea7918U, 0x7fc11bf7U, 0x7cd8fe69U, + 0x5d8218b2U, 0x5e9bfd2cU, 0x5bb09fc3U, 0x58a97a5dU, 0x51e65b50U, 0x52ffbeceU, 0x57d4dc21U, 0x54cd39bfU, + 0x454a9e3bU, 0x46537ba5U, 0x4378194aU, 0x4061fcd4U, 0x492eddd9U, 0x4a373847U, 0x4f1c5aa8U, 0x4c05bf36U + }, +}; + +#define ROUNDT(x0, x1, r) \ + f0 = ks->mk_tab[0][x0 & 0xFF] ^ ks->mk_tab[1][(x0 >> 8) & 0xFF] ^ ks->mk_tab[2][(x0 >> 16) & 0xFF] ^ ks->mk_tab[3][(x0 >> 24) & 0xFF]; \ + f1 = ks->mk_tab[0][(x1 >> 24) & 0xFF] ^ ks->mk_tab[1][x1 & 0xFF] ^ ks->mk_tab[2][(x1 >> 8) & 0xFF] ^ ks->mk_tab[3][(x1 >> 16) & 0xFF]; \ + f0 += f1; \ + f1 += f0 + rk[2 * (r) + 9]; \ + f0 += rk[2 * (r) + 8]; + +#define ROUNDA(r) \ + ROUNDT(x0, x1, r) \ + x2 = rotr32(x2 ^ f0, 1); \ + x3 = rotl32(x3, 1) ^ f1; + +#define ROUNDB(r) \ + ROUNDT(x2, x3, r) \ + x0 = rotr32(x0 ^ f0, 1); \ + x1 = rotl32(x1, 1) ^ f1; + +#define RROUNDA(r) \ + ROUNDT(x0, x1, r) \ + x2 = rotl32(x2, 1) ^ f0; \ + x3 = rotr32(x3 ^ f1, 1); + +#define RROUNDB(r) \ + ROUNDT(x2, x3, r) \ + x0 = rotl32(x0, 1) ^ f0; \ + x1 = rotr32(x1 ^ f1, 1); + + +void twofish_set_key(TwofishInstance *instance, const u4byte in_key[]) { + union { + byte S8[16]; + uint32 S32[4]; + } us; int i; - byte S[16] = {0}; - const byte* key = (const byte*) in_key; - u4byte *l_key = instance->l_key; - u4byte *mk_tab = instance->mk_tab; + const byte* key = (const byte*) in_key; - for(i = 0; i != 32; ++i) - rs_mul(&S[4*(i/8)], key[i], i); + us.S32[0] = RS[0][key[0]] ^ RS[1][key[1]] ^ RS[2][key[2]] ^ RS[3][key[3]] ^ RS[4][key[4]] ^ RS[5][key[5]] ^ RS[6][key[6]] ^ RS[7][key[7]]; + us.S32[1] = RS[0][key[8]] ^ RS[1][key[9]] ^ RS[2][key[10]] ^ RS[3][key[11]] ^ RS[4][key[12]] ^ RS[5][key[13]] ^ RS[6][key[14]] ^ RS[7][key[15]]; + us.S32[2] = RS[0][key[16]] ^ RS[1][key[17]] ^ RS[2][key[18]] ^ RS[3][key[19]] ^ RS[4][key[20]] ^ RS[5][key[21]] ^ RS[6][key[22]] ^ RS[7][key[23]]; + us.S32[3] = RS[0][key[24]] ^ RS[1][key[25]] ^ RS[2][key[26]] ^ RS[3][key[27]] ^ RS[4][key[28]] ^ RS[5][key[29]] ^ RS[6][key[30]] ^ RS[7][key[31]]; - for(i = 0; i != 256; ++i) + for (i = 0; i < 256; ++i) { - mk_tab[ i] = MDS0[Q0[Q0[Q1[Q1[i]^S[ 0]]^S[ 4]]^S[ 8]]^S[12]]; - mk_tab[256+i] = MDS1[Q0[Q1[Q1[Q0[i]^S[ 1]]^S[ 5]]^S[ 9]]^S[13]]; - mk_tab[512+i] = MDS2[Q1[Q0[Q0[Q0[i]^S[ 2]]^S[ 6]]^S[10]]^S[14]]; - mk_tab[768+i] = MDS3[Q1[Q1[Q0[Q1[i]^S[ 3]]^S[ 7]]^S[11]]^S[15]]; + instance->mk_tab[0][i] = MDSQ[0][Q[0][Q[0][Q[1][Q[1][i] ^ us.S8[0]] ^ us.S8[4]] ^ us.S8[8]] ^ us.S8[12]]; + instance->mk_tab[1][i] = MDSQ[1][Q[0][Q[1][Q[1][Q[0][i] ^ us.S8[1]] ^ us.S8[5]] ^ us.S8[9]] ^ us.S8[13]]; + instance->mk_tab[2][i] = MDSQ[2][Q[1][Q[0][Q[0][Q[0][i] ^ us.S8[2]] ^ us.S8[6]] ^ us.S8[10]] ^ us.S8[14]]; + instance->mk_tab[3][i] = MDSQ[3][Q[1][Q[1][Q[0][Q[1][i] ^ us.S8[3]] ^ us.S8[7]] ^ us.S8[11]] ^ us.S8[15]]; } - for(i = 0; i != 40; i += 2) + for (i = 0; i != 40; i += 2) { - uint32 X = MDS0[Q0[Q0[Q1[Q1[i ]^key[24]]^key[16]]^key[ 8]]^key[ 0]] ^ - MDS1[Q0[Q1[Q1[Q0[i ]^key[25]]^key[17]]^key[ 9]]^key[ 1]] ^ - MDS2[Q1[Q0[Q0[Q0[i ]^key[26]]^key[18]]^key[10]]^key[ 2]] ^ - MDS3[Q1[Q1[Q0[Q1[i ]^key[27]]^key[19]]^key[11]]^key[ 3]]; - uint32 Y = MDS0[Q0[Q0[Q1[Q1[i+1]^key[28]]^key[20]]^key[12]]^key[ 4]] ^ - MDS1[Q0[Q1[Q1[Q0[i+1]^key[29]]^key[21]]^key[13]]^key[ 5]] ^ - MDS2[Q1[Q0[Q0[Q0[i+1]^key[30]]^key[22]]^key[14]]^key[ 6]] ^ - MDS3[Q1[Q1[Q0[Q1[i+1]^key[31]]^key[23]]^key[15]]^key[ 7]]; - Y = rotl32(Y, 8); - X += Y; Y += X; - - l_key[i] = X; - l_key[i+1] = rotl32(Y, 9); + uint32 a = MDSQ[0][Q[0][Q[0][Q[1][Q[1][i] ^ key[24]] ^ key[16]] ^ key[8]] ^ key[0]] ^ MDSQ[1][Q[0][Q[1][Q[1][Q[0][i] ^ key[25]] ^ key[17]] ^ key[9]] ^ key[1]] + ^ MDSQ[2][Q[1][Q[0][Q[0][Q[0][i] ^ key[26]] ^ key[18]] ^ key[10]] ^ key[2]] ^ MDSQ[3][Q[1][Q[1][Q[0][Q[1][i] ^ key[27]] ^ key[19]] ^ key[11]] ^ key[3]]; + uint32 b = rotl32(MDSQ[0][Q[0][Q[0][Q[1][Q[1][i + 1] ^ key[28]] ^ key[20]] ^ key[12]] ^ key[4]] ^ MDSQ[1][Q[0][Q[1][Q[1][Q[0][i + 1] ^ key[29]] ^ key[21]] ^ key[13]] ^ key[5]] + ^ MDSQ[2][Q[1][Q[0][Q[0][Q[0][i + 1] ^ key[30]] ^ key[22]] ^ key[14]] ^ key[6]] ^ MDSQ[3][Q[1][Q[1][Q[0][Q[1][i + 1] ^ key[31]] ^ key[23]] ^ key[15]] ^ key[7]], 8); + a += b; +#if CRYPTOPP_BOOL_X64 + if (i < 8) + { + instance->w[i] = a; + instance->w[i + 1] = rotl32(a + b, 9); + } + else + { + instance->k[i - 8] = a; + instance->k[i + 1 - 8] = rotl32(a + b, 9); + } +#else + instance->l_key[i] = a; + instance->l_key[i + 1] = rotl32(a + b, 9); +#endif } - - return l_key; } #else @@ -746,55 +1000,41 @@ u4byte *twofish_set_key(TwofishInstance *instance, const u4byte in_key[]) #ifndef TC_MINIMIZE_CODE_SIZE -void twofish_encrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[]) +#if CRYPTOPP_BOOL_X64 == 0 +void twofish_encrypt(TwofishInstance *ks, const u4byte in_blk[4], u4byte out_blk[4]) { - int j; + uint32* rk = ks->l_key; - u4byte *l_key = instance->l_key; - u4byte *mk_tab = instance->mk_tab; + uint32 x0 = in_blk[0] ^ rk[0]; + uint32 x1 = in_blk[1] ^ rk[1]; + uint32 x2 = in_blk[2] ^ rk[2]; + uint32 x3 = in_blk[3] ^ rk[3]; + uint32 f0, f1; - u4byte A = LE32(in_blk[0]) ^ l_key[0]; - u4byte B = LE32(in_blk[1]) ^ l_key[1]; - u4byte C = LE32(in_blk[2]) ^ l_key[2]; - u4byte D = LE32(in_blk[3]) ^ l_key[3]; - - for(j = 0; j != 16; j += 2) - { - u4byte X, Y; - X = mk_tab[ get_byte(3, A)] ^ mk_tab[256+get_byte(2, A)] ^ - mk_tab[512+get_byte(1, A)] ^ mk_tab[768+get_byte(0, A)]; - Y = mk_tab[ get_byte(0, B)] ^ mk_tab[256+get_byte(3, B)] ^ - mk_tab[512+get_byte(2, B)] ^ mk_tab[768+get_byte(1, B)]; - X += Y; - Y += X + l_key[2*j + 9]; - X += l_key[2*j + 8]; - - C = rotr32(C ^ X, 1); - D = rotl32(D, 1) ^ Y; - - X = mk_tab[ get_byte(3, C)] ^ mk_tab[256+get_byte(2, C)] ^ - mk_tab[512+get_byte(1, C)] ^ mk_tab[768+get_byte(0, C)]; - Y = mk_tab[ get_byte(0, D)] ^ mk_tab[256+get_byte(3, D)] ^ - mk_tab[512+get_byte(2, D)] ^ mk_tab[768+get_byte(1, D)]; - X += Y; - Y += X + l_key[2*j + 11]; - X += l_key[2*j + 10]; - - A = rotr32(A ^ X, 1); - B = rotl32(B, 1) ^ Y; - } +#ifdef UNROLL_TWOFISH + ROUNDA(0); ROUNDB(1); ROUNDA(2); ROUNDB(3); ROUNDA(4); ROUNDB(5); ROUNDA(6); ROUNDB(7); ROUNDA(8); ROUNDB(9); ROUNDA(10); ROUNDB(11); ROUNDA(12); ROUNDB(13); ROUNDA(14); ROUNDB(15); +#else + size_t j; + for(j = 0; j != 16; j += 2) + { + + ROUNDA (j); + ROUNDB (j + 1); + } +#endif - C ^= l_key[4]; - D ^= l_key[5]; - A ^= l_key[6]; - B ^= l_key[7]; + x2 ^= rk[4]; + x3 ^= rk[5]; + x0 ^= rk[6]; + x1 ^= rk[7]; - out_blk[0] = LE32(C); - out_blk[1] = LE32(D); - out_blk[2] = LE32(A); - out_blk[3] = LE32(B); -}; + out_blk[0] = x2; + out_blk[1] = x3; + out_blk[2] = x0; + out_blk[3] = x1; +} +#endif #else // TC_MINIMIZE_CODE_SIZE void twofish_encrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[]) @@ -833,54 +1073,38 @@ void twofish_encrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte o #ifndef TC_MINIMIZE_CODE_SIZE -void twofish_decrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[4]) +#if CRYPTOPP_BOOL_X64 == 0 +void twofish_decrypt(TwofishInstance *ks, const u4byte in_blk[4], u4byte out_blk[4]) { - int j; - u4byte *l_key = instance->l_key; - u4byte *mk_tab = instance->mk_tab; - - u4byte A = LE32(in_blk[0]) ^ l_key[4]; - u4byte B = LE32(in_blk[1]) ^ l_key[5]; - u4byte C = LE32(in_blk[2]) ^ l_key[6]; - u4byte D = LE32(in_blk[3]) ^ l_key[7]; - - for(j = 0; j != 16; j += 2) - { - u4byte X, Y; - X = mk_tab[ get_byte(3, A)] ^ mk_tab[256+get_byte(2, A)] ^ - mk_tab[512+get_byte(1, A)] ^ mk_tab[768+get_byte(0, A)]; - Y = mk_tab[ get_byte(0, B)] ^ mk_tab[256+get_byte(3, B)] ^ - mk_tab[512+get_byte(2, B)] ^ mk_tab[768+get_byte(1, B)]; - X += Y; - Y += X + l_key[39 - 2*j]; - X += l_key[38 - 2*j]; - - C = rotl32(C, 1) ^ X; - D = rotr32(D ^ Y, 1); - - X = mk_tab[ get_byte(3, C)] ^ mk_tab[256+get_byte(2, C)] ^ - mk_tab[512+get_byte(1, C)] ^ mk_tab[768+get_byte(0, C)]; - Y = mk_tab[ get_byte(0, D)] ^ mk_tab[256+get_byte(3, D)] ^ - mk_tab[512+get_byte(2, D)] ^ mk_tab[768+get_byte(1, D)]; - X += Y; - Y += X + l_key[37 - 2*j]; - X += l_key[36 - 2*j]; - - A = rotl32(A, 1) ^ X; - B = rotr32(B ^ Y, 1); - } - - C ^= l_key[0]; - D ^= l_key[1]; - A ^= l_key[2]; - B ^= l_key[3]; - - out_blk[0] = LE32(C); - out_blk[1] = LE32(D); - out_blk[2] = LE32(A); - out_blk[3] = LE32(B); + uint32* rk = ks->l_key; + uint32 x0 = in_blk[0] ^ rk[4]; + uint32 x1 = in_blk[1] ^ rk[5]; + uint32 x2 = in_blk[2] ^ rk[6]; + uint32 x3 = in_blk[3] ^ rk[7]; + uint32 f0, f1; + +#ifdef UNROLL_TWOFISH + RROUNDA(15); RROUNDB(14); RROUNDA(13); RROUNDB(12); RROUNDA(11); RROUNDB(10); RROUNDA(9); RROUNDB(8); RROUNDA(7); RROUNDB(6); RROUNDA(5); RROUNDB(4); RROUNDA(3); RROUNDB(2); RROUNDA(1); RROUNDB(0); +#else + int j; + for(j = 15; j != -1; j -= 2) + { + + RROUNDA (j); + RROUNDB (j - 1); + } +#endif + x2 ^= rk[0]; + x3 ^= rk[1]; + x0 ^= rk[2]; + x1 ^= rk[3]; + + out_blk[0] = x2; + out_blk[1] = x3; + out_blk[2] = x0; + out_blk[3] = x1; }; - +#endif #else // TC_MINIMIZE_CODE_SIZE void twofish_decrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[4]) diff --git a/src/Crypto/Twofish.h b/src/Crypto/Twofish.h index aebb6ea0..b2d44ddb 100644 --- a/src/Crypto/Twofish.h +++ b/src/Crypto/Twofish.h @@ -2,6 +2,7 @@ #define TWOFISH_H #include "Common/Tcdefs.h" +#include "config.h" #if defined(__cplusplus) extern "C" @@ -34,21 +35,32 @@ extern "C" #endif typedef struct { +#if CRYPTOPP_BOOL_X64 + u4byte mk_tab[4][256], w[8], k[32]; +#else u4byte l_key[40]; #ifdef TC_MINIMIZE_CODE_SIZE u4byte s_key[4]; -#endif -#if !defined (TC_MINIMIZE_CODE_SIZE) || defined (TC_WINDOWS_BOOT_TWOFISH) u4byte mk_tab[4 * 256]; +#else + u4byte mk_tab[4][256]; +#endif #endif } TwofishInstance; #define TWOFISH_KS sizeof(TwofishInstance) /* in_key must be 32-bytes long */ -u4byte * twofish_set_key(TwofishInstance *instance, const u4byte in_key[]); -void twofish_encrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[]); +void twofish_set_key(TwofishInstance *instance, const u4byte in_key[]); +#if CRYPTOPP_BOOL_X64 +void twofish_encrypt_blocks(TwofishInstance *instance, const byte* in_blk, byte* out_blk, uint32 blockCount); +void twofish_decrypt_blocks(TwofishInstance *instance, const byte* in_blk, byte* out_blk, uint32 blockCount); +#define twofish_encrypt(instance,in_blk,out_blk) twofish_encrypt_blocks(instance, (const byte*) in_blk, (byte*) out_blk, 1) +#define twofish_decrypt(instance,in_blk,out_blk) twofish_decrypt_blocks(instance, (const byte*) in_blk, (byte*) out_blk, 1) +#else +void twofish_encrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[4]); void twofish_decrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[4]); +#endif #if defined(__cplusplus) } diff --git a/src/Crypto/Twofish_x64.S b/src/Crypto/Twofish_x64.S new file mode 100644 index 00000000..1e271691 --- /dev/null +++ b/src/Crypto/Twofish_x64.S @@ -0,0 +1,314 @@ +/* twofish_asm_3way.S + * + * Copyright © 2011-2013 Jussi Kivilinna + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + + /* Adapted tp VeraCrypt */ + +.text + +.align 8 +.global twofish_enc_blk +.global _twofish_enc_blk + +twofish_enc_blk: +_twofish_enc_blk: + +.ifdef WINABI +pushq %rsi +pushq %rdi +movq %rcx, %rdi; +movq %rdx, %rsi; +movq %r8, %rdx; +.endif + + pushq %rbp + pushq %rbx + + pushq %rsi; + movq %rdx, %rsi; + + movq 4*(0)(%rsi), %rax; xorq 4096 +4*0(%rdi), %rax;; movq 4*(2)(%rsi), %rbx; xorq 4096 +4*2(%rdi), %rbx;;; + + movzbl %al, %r8d; movzbl %ah, %esi; rorq $(32), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(48), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*(0*2)+1)(%rdi), %esi; addl 4128 +4*(2*(0*2))(%rdi), %r8d; xorl %ebx, %r8d; shrq $32, %rbx; roll $1, %ebx; xorl %esi, %ebx; shlq $32, %rbx; rorl $1, %r8d; orq %r8, %rbx;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(32), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(48), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((0*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((0*2)+1))(%rdi), %r8d; xorl %eax, %r8d; shrq $32, %rax; roll $1, %eax; xorl %esi, %eax; shlq $32, %rax; rorl $1, %r8d; orq %r8, %rax;;; + movzbl %al, %r8d; movzbl %ah, %esi; rorq $(32), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(48), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*(1*2)+1)(%rdi), %esi; addl 4128 +4*(2*(1*2))(%rdi), %r8d; xorl %ebx, %r8d; shrq $32, %rbx; roll $1, %ebx; xorl %esi, %ebx; shlq $32, %rbx; rorl $1, %r8d; orq %r8, %rbx;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(32), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(48), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((1*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((1*2)+1))(%rdi), %r8d; xorl %eax, %r8d; shrq $32, %rax; roll $1, %eax; xorl %esi, %eax; shlq $32, %rax; rorl $1, %r8d; orq %r8, %rax;;; + movzbl %al, %r8d; movzbl %ah, %esi; rorq $(32), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(48), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*(2*2)+1)(%rdi), %esi; addl 4128 +4*(2*(2*2))(%rdi), %r8d; xorl %ebx, %r8d; shrq $32, %rbx; roll $1, %ebx; xorl %esi, %ebx; shlq $32, %rbx; rorl $1, %r8d; orq %r8, %rbx;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(32), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(48), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((2*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((2*2)+1))(%rdi), %r8d; xorl %eax, %r8d; shrq $32, %rax; roll $1, %eax; xorl %esi, %eax; shlq $32, %rax; rorl $1, %r8d; orq %r8, %rax;;; + movzbl %al, %r8d; movzbl %ah, %esi; rorq $(32), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(48), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*(3*2)+1)(%rdi), %esi; addl 4128 +4*(2*(3*2))(%rdi), %r8d; xorl %ebx, %r8d; shrq $32, %rbx; roll $1, %ebx; xorl %esi, %ebx; shlq $32, %rbx; rorl $1, %r8d; orq %r8, %rbx;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(32), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(48), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((3*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((3*2)+1))(%rdi), %r8d; xorl %eax, %r8d; shrq $32, %rax; roll $1, %eax; xorl %esi, %eax; shlq $32, %rax; rorl $1, %r8d; orq %r8, %rax;;; + movzbl %al, %r8d; movzbl %ah, %esi; rorq $(32), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(48), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*(4*2)+1)(%rdi), %esi; addl 4128 +4*(2*(4*2))(%rdi), %r8d; xorl %ebx, %r8d; shrq $32, %rbx; roll $1, %ebx; xorl %esi, %ebx; shlq $32, %rbx; rorl $1, %r8d; orq %r8, %rbx;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(32), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(48), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((4*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((4*2)+1))(%rdi), %r8d; xorl %eax, %r8d; shrq $32, %rax; roll $1, %eax; xorl %esi, %eax; shlq $32, %rax; rorl $1, %r8d; orq %r8, %rax;;; + movzbl %al, %r8d; movzbl %ah, %esi; rorq $(32), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(48), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*(5*2)+1)(%rdi), %esi; addl 4128 +4*(2*(5*2))(%rdi), %r8d; xorl %ebx, %r8d; shrq $32, %rbx; roll $1, %ebx; xorl %esi, %ebx; shlq $32, %rbx; rorl $1, %r8d; orq %r8, %rbx;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(32), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(48), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((5*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((5*2)+1))(%rdi), %r8d; xorl %eax, %r8d; shrq $32, %rax; roll $1, %eax; xorl %esi, %eax; shlq $32, %rax; rorl $1, %r8d; orq %r8, %rax;;; + movzbl %al, %r8d; movzbl %ah, %esi; rorq $(32), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(48), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*(6*2)+1)(%rdi), %esi; addl 4128 +4*(2*(6*2))(%rdi), %r8d; xorl %ebx, %r8d; shrq $32, %rbx; roll $1, %ebx; xorl %esi, %ebx; shlq $32, %rbx; rorl $1, %r8d; orq %r8, %rbx;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(32), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(48), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((6*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((6*2)+1))(%rdi), %r8d; xorl %eax, %r8d; shrq $32, %rax; roll $1, %eax; xorl %esi, %eax; shlq $32, %rax; rorl $1, %r8d; orq %r8, %rax;;; + movzbl %al, %r8d; movzbl %ah, %esi; rorq $(32), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(48), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*(7*2)+1)(%rdi), %esi; addl 4128 +4*(2*(7*2))(%rdi), %r8d; xorl %ebx, %r8d; shrq $32, %rbx; roll $1, %ebx; xorl %esi, %ebx; shlq $32, %rbx; rorl $1, %r8d; orq %r8, %rbx;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(32), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(48), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((7*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((7*2)+1))(%rdi), %r8d; xorl %eax, %r8d; shrq $32, %rax; roll $1, %eax; xorl %esi, %eax; shlq $32, %rax; rorl $1, %r8d; orq %r8, %rax;;; + + popq %rsi; + + xorq 4096 +4*4(%rdi), %rbx; movq %rbx, 4*(0)(%rsi);; xorq 4096 +4*6(%rdi), %rax; movq %rax, 4*(2)(%rsi);;; + + popq %rbx + popq %rbp +.ifdef WINABI +popq %rdi +popq %rsi +.endif + ret; + +.global twofish_dec_blk +.global _twofish_dec_blk + +twofish_dec_blk: +_twofish_dec_blk: + +.ifdef WINABI +pushq %rsi +pushq %rdi +movq %rcx, %rdi; +movq %rdx, %rsi; +movq %r8, %rdx; +.endif + + + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + + pushq %rsi; + movq %rdx, %rsi; + + movq 4*(0)(%rsi), %rbx; xorq 4096 +4*4(%rdi), %rbx;; movq 4*(2)(%rsi), %rax; xorq 4096 +4*6(%rdi), %rax;; rorq $32, %rbx; rorq $32, %rax;; + + movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(32), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(48), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((7*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((7*2)+1))(%rdi), %r8d; xorl %eax, %esi; shrq $32, %rax; roll $1, %eax; xorl %r8d, %eax; shlq $32, %rax; rorl $1, %esi; orq %rsi, %rax;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(32), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %r8d; movzbl %ah, %esi; rorq $(48), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((7*2))+1)(%rdi), %esi; addl 4128 +4*(2*((7*2)))(%rdi), %r8d; xorl %ebx, %esi; shrq $32, %rbx; roll $1, %ebx; xorl %r8d, %ebx; shlq $32, %rbx; rorl $1, %esi; orq %rsi, %rbx;;; + movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(32), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(48), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((6*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((6*2)+1))(%rdi), %r8d; xorl %eax, %esi; shrq $32, %rax; roll $1, %eax; xorl %r8d, %eax; shlq $32, %rax; rorl $1, %esi; orq %rsi, %rax;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(32), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %r8d; movzbl %ah, %esi; rorq $(48), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((6*2))+1)(%rdi), %esi; addl 4128 +4*(2*((6*2)))(%rdi), %r8d; xorl %ebx, %esi; shrq $32, %rbx; roll $1, %ebx; xorl %r8d, %ebx; shlq $32, %rbx; rorl $1, %esi; orq %rsi, %rbx;;; + movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(32), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(48), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((5*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((5*2)+1))(%rdi), %r8d; xorl %eax, %esi; shrq $32, %rax; roll $1, %eax; xorl %r8d, %eax; shlq $32, %rax; rorl $1, %esi; orq %rsi, %rax;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(32), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %r8d; movzbl %ah, %esi; rorq $(48), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((5*2))+1)(%rdi), %esi; addl 4128 +4*(2*((5*2)))(%rdi), %r8d; xorl %ebx, %esi; shrq $32, %rbx; roll $1, %ebx; xorl %r8d, %ebx; shlq $32, %rbx; rorl $1, %esi; orq %rsi, %rbx;;; + movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(32), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(48), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((4*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((4*2)+1))(%rdi), %r8d; xorl %eax, %esi; shrq $32, %rax; roll $1, %eax; xorl %r8d, %eax; shlq $32, %rax; rorl $1, %esi; orq %rsi, %rax;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(32), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %r8d; movzbl %ah, %esi; rorq $(48), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((4*2))+1)(%rdi), %esi; addl 4128 +4*(2*((4*2)))(%rdi), %r8d; xorl %ebx, %esi; shrq $32, %rbx; roll $1, %ebx; xorl %r8d, %ebx; shlq $32, %rbx; rorl $1, %esi; orq %rsi, %rbx;;; + movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(32), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(48), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((3*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((3*2)+1))(%rdi), %r8d; xorl %eax, %esi; shrq $32, %rax; roll $1, %eax; xorl %r8d, %eax; shlq $32, %rax; rorl $1, %esi; orq %rsi, %rax;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(32), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %r8d; movzbl %ah, %esi; rorq $(48), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((3*2))+1)(%rdi), %esi; addl 4128 +4*(2*((3*2)))(%rdi), %r8d; xorl %ebx, %esi; shrq $32, %rbx; roll $1, %ebx; xorl %r8d, %ebx; shlq $32, %rbx; rorl $1, %esi; orq %rsi, %rbx;;; + movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(32), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(48), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((2*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((2*2)+1))(%rdi), %r8d; xorl %eax, %esi; shrq $32, %rax; roll $1, %eax; xorl %r8d, %eax; shlq $32, %rax; rorl $1, %esi; orq %rsi, %rax;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(32), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %r8d; movzbl %ah, %esi; rorq $(48), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((2*2))+1)(%rdi), %esi; addl 4128 +4*(2*((2*2)))(%rdi), %r8d; xorl %ebx, %esi; shrq $32, %rbx; roll $1, %ebx; xorl %r8d, %ebx; shlq $32, %rbx; rorl $1, %esi; orq %rsi, %rbx;;; + movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(32), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(48), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((1*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((1*2)+1))(%rdi), %r8d; xorl %eax, %esi; shrq $32, %rax; roll $1, %eax; xorl %r8d, %eax; shlq $32, %rax; rorl $1, %esi; orq %rsi, %rax;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(32), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %r8d; movzbl %ah, %esi; rorq $(48), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((1*2))+1)(%rdi), %esi; addl 4128 +4*(2*((1*2)))(%rdi), %r8d; xorl %ebx, %esi; shrq $32, %rbx; roll $1, %ebx; xorl %r8d, %ebx; shlq $32, %rbx; rorl $1, %esi; orq %rsi, %rbx;;; + movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(32), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(48), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((0*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((0*2)+1))(%rdi), %r8d; xorl %eax, %esi; shrq $32, %rax; roll $1, %eax; xorl %r8d, %eax; shlq $32, %rax; rorl $1, %esi; orq %rsi, %rax;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(32), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %r8d; movzbl %ah, %esi; rorq $(48), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((0*2))+1)(%rdi), %esi; addl 4128 +4*(2*((0*2)))(%rdi), %r8d; xorl %ebx, %esi; shrq $32, %rbx; roll $1, %ebx; xorl %r8d, %ebx; shlq $32, %rbx; rorl $1, %esi; orq %rsi, %rbx;;; + + popq %rsi; + + rorq $32, %rax; rorq $32, %rbx; xorq 4096 +4*0(%rdi), %rax; movq %rax, 4*(0)(%rsi);; xorq 4096 +4*2(%rdi), %rbx; movq %rbx, 4*(2)(%rsi);;; + + popq %r13 + popq %r12 + popq %rbx + popq %rbp + +.ifdef WINABI +popq %rdi +popq %rsi +.endif + + ret; + +.global twofish_enc_blk2 +.global _twofish_enc_blk2 + +twofish_enc_blk2: +_twofish_enc_blk2: + +.ifdef WINABI +pushq %rsi +pushq %rdi +movq %rcx, %rdi; +movq %rdx, %rsi; +movq %r8, %rdx; +.endif + + + pushq %rbp + pushq %rbx + + pushq %rsi; + movq %rdx, %rsi; + + movq 4*(0)(%rsi), %rax; xorq 4096 +4*0(%rdi), %rax; movq 4*(4+(0))(%rsi), %rcx; xorq 4096 +4*0(%rdi), %rcx;; movq 4*(2)(%rsi), %rbx; xorq 4096 +4*2(%rdi), %rbx; movq 4*(4+(2))(%rsi), %rdx; xorq 4096 +4*2(%rdi), %rdx;;; + + movzbl %al, %r8d; movzbl %ah, %esi; rorq $(32), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(48), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %cl, %r9d; movzbl %ch, %esi; rorq $(32), %rcx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %cl, %r11d; movzbl %ch, %esi; rorq $(48), %rcx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(32), %rcx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(16), %rcx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*(0*2)+1)(%rdi), %esi; addl 4128 +4*(2*(0*2))(%rdi), %r8d; xorl %ebx, %r8d; shrq $32, %rbx; roll $1, %ebx; xorl %esi, %ebx; shlq $32, %rbx; rorl $1, %r8d; orq %r8, %rbx; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*(0*2)+1)(%rdi), %esi; addl 4128 +4*(2*(0*2))(%rdi), %r9d; xorl %edx, %r9d; shrq $32, %rdx; roll $1, %edx; xorl %esi, %edx; shlq $32, %rdx; rorl $1, %r9d; orq %r9, %rdx;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(32), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(48), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %dl, %r9d; movzbl %dh, %esi; rorq $(32), %rdx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %dl, %r11d; movzbl %dh, %esi; rorq $(48), %rdx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(32), %rdx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(16), %rdx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((0*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((0*2)+1))(%rdi), %r8d; xorl %eax, %r8d; shrq $32, %rax; roll $1, %eax; xorl %esi, %eax; shlq $32, %rax; rorl $1, %r8d; orq %r8, %rax; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((0*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((0*2)+1))(%rdi), %r9d; xorl %ecx, %r9d; shrq $32, %rcx; roll $1, %ecx; xorl %esi, %ecx; shlq $32, %rcx; rorl $1, %r9d; orq %r9, %rcx;;; + movzbl %al, %r8d; movzbl %ah, %esi; rorq $(32), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(48), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %cl, %r9d; movzbl %ch, %esi; rorq $(32), %rcx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %cl, %r11d; movzbl %ch, %esi; rorq $(48), %rcx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(32), %rcx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(16), %rcx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*(1*2)+1)(%rdi), %esi; addl 4128 +4*(2*(1*2))(%rdi), %r8d; xorl %ebx, %r8d; shrq $32, %rbx; roll $1, %ebx; xorl %esi, %ebx; shlq $32, %rbx; rorl $1, %r8d; orq %r8, %rbx; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*(1*2)+1)(%rdi), %esi; addl 4128 +4*(2*(1*2))(%rdi), %r9d; xorl %edx, %r9d; shrq $32, %rdx; roll $1, %edx; xorl %esi, %edx; shlq $32, %rdx; rorl $1, %r9d; orq %r9, %rdx;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(32), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(48), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %dl, %r9d; movzbl %dh, %esi; rorq $(32), %rdx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %dl, %r11d; movzbl %dh, %esi; rorq $(48), %rdx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(32), %rdx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(16), %rdx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((1*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((1*2)+1))(%rdi), %r8d; xorl %eax, %r8d; shrq $32, %rax; roll $1, %eax; xorl %esi, %eax; shlq $32, %rax; rorl $1, %r8d; orq %r8, %rax; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((1*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((1*2)+1))(%rdi), %r9d; xorl %ecx, %r9d; shrq $32, %rcx; roll $1, %ecx; xorl %esi, %ecx; shlq $32, %rcx; rorl $1, %r9d; orq %r9, %rcx;;; + movzbl %al, %r8d; movzbl %ah, %esi; rorq $(32), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(48), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %cl, %r9d; movzbl %ch, %esi; rorq $(32), %rcx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %cl, %r11d; movzbl %ch, %esi; rorq $(48), %rcx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(32), %rcx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(16), %rcx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*(2*2)+1)(%rdi), %esi; addl 4128 +4*(2*(2*2))(%rdi), %r8d; xorl %ebx, %r8d; shrq $32, %rbx; roll $1, %ebx; xorl %esi, %ebx; shlq $32, %rbx; rorl $1, %r8d; orq %r8, %rbx; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*(2*2)+1)(%rdi), %esi; addl 4128 +4*(2*(2*2))(%rdi), %r9d; xorl %edx, %r9d; shrq $32, %rdx; roll $1, %edx; xorl %esi, %edx; shlq $32, %rdx; rorl $1, %r9d; orq %r9, %rdx;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(32), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(48), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %dl, %r9d; movzbl %dh, %esi; rorq $(32), %rdx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %dl, %r11d; movzbl %dh, %esi; rorq $(48), %rdx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(32), %rdx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(16), %rdx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((2*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((2*2)+1))(%rdi), %r8d; xorl %eax, %r8d; shrq $32, %rax; roll $1, %eax; xorl %esi, %eax; shlq $32, %rax; rorl $1, %r8d; orq %r8, %rax; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((2*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((2*2)+1))(%rdi), %r9d; xorl %ecx, %r9d; shrq $32, %rcx; roll $1, %ecx; xorl %esi, %ecx; shlq $32, %rcx; rorl $1, %r9d; orq %r9, %rcx;;; + movzbl %al, %r8d; movzbl %ah, %esi; rorq $(32), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(48), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %cl, %r9d; movzbl %ch, %esi; rorq $(32), %rcx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %cl, %r11d; movzbl %ch, %esi; rorq $(48), %rcx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(32), %rcx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(16), %rcx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*(3*2)+1)(%rdi), %esi; addl 4128 +4*(2*(3*2))(%rdi), %r8d; xorl %ebx, %r8d; shrq $32, %rbx; roll $1, %ebx; xorl %esi, %ebx; shlq $32, %rbx; rorl $1, %r8d; orq %r8, %rbx; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*(3*2)+1)(%rdi), %esi; addl 4128 +4*(2*(3*2))(%rdi), %r9d; xorl %edx, %r9d; shrq $32, %rdx; roll $1, %edx; xorl %esi, %edx; shlq $32, %rdx; rorl $1, %r9d; orq %r9, %rdx;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(32), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(48), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %dl, %r9d; movzbl %dh, %esi; rorq $(32), %rdx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %dl, %r11d; movzbl %dh, %esi; rorq $(48), %rdx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(32), %rdx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(16), %rdx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((3*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((3*2)+1))(%rdi), %r8d; xorl %eax, %r8d; shrq $32, %rax; roll $1, %eax; xorl %esi, %eax; shlq $32, %rax; rorl $1, %r8d; orq %r8, %rax; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((3*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((3*2)+1))(%rdi), %r9d; xorl %ecx, %r9d; shrq $32, %rcx; roll $1, %ecx; xorl %esi, %ecx; shlq $32, %rcx; rorl $1, %r9d; orq %r9, %rcx;;; + movzbl %al, %r8d; movzbl %ah, %esi; rorq $(32), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(48), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %cl, %r9d; movzbl %ch, %esi; rorq $(32), %rcx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %cl, %r11d; movzbl %ch, %esi; rorq $(48), %rcx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(32), %rcx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(16), %rcx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*(4*2)+1)(%rdi), %esi; addl 4128 +4*(2*(4*2))(%rdi), %r8d; xorl %ebx, %r8d; shrq $32, %rbx; roll $1, %ebx; xorl %esi, %ebx; shlq $32, %rbx; rorl $1, %r8d; orq %r8, %rbx; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*(4*2)+1)(%rdi), %esi; addl 4128 +4*(2*(4*2))(%rdi), %r9d; xorl %edx, %r9d; shrq $32, %rdx; roll $1, %edx; xorl %esi, %edx; shlq $32, %rdx; rorl $1, %r9d; orq %r9, %rdx;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(32), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(48), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %dl, %r9d; movzbl %dh, %esi; rorq $(32), %rdx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %dl, %r11d; movzbl %dh, %esi; rorq $(48), %rdx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(32), %rdx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(16), %rdx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((4*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((4*2)+1))(%rdi), %r8d; xorl %eax, %r8d; shrq $32, %rax; roll $1, %eax; xorl %esi, %eax; shlq $32, %rax; rorl $1, %r8d; orq %r8, %rax; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((4*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((4*2)+1))(%rdi), %r9d; xorl %ecx, %r9d; shrq $32, %rcx; roll $1, %ecx; xorl %esi, %ecx; shlq $32, %rcx; rorl $1, %r9d; orq %r9, %rcx;;; + movzbl %al, %r8d; movzbl %ah, %esi; rorq $(32), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(48), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %cl, %r9d; movzbl %ch, %esi; rorq $(32), %rcx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %cl, %r11d; movzbl %ch, %esi; rorq $(48), %rcx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(32), %rcx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(16), %rcx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*(5*2)+1)(%rdi), %esi; addl 4128 +4*(2*(5*2))(%rdi), %r8d; xorl %ebx, %r8d; shrq $32, %rbx; roll $1, %ebx; xorl %esi, %ebx; shlq $32, %rbx; rorl $1, %r8d; orq %r8, %rbx; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*(5*2)+1)(%rdi), %esi; addl 4128 +4*(2*(5*2))(%rdi), %r9d; xorl %edx, %r9d; shrq $32, %rdx; roll $1, %edx; xorl %esi, %edx; shlq $32, %rdx; rorl $1, %r9d; orq %r9, %rdx;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(32), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(48), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %dl, %r9d; movzbl %dh, %esi; rorq $(32), %rdx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %dl, %r11d; movzbl %dh, %esi; rorq $(48), %rdx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(32), %rdx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(16), %rdx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((5*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((5*2)+1))(%rdi), %r8d; xorl %eax, %r8d; shrq $32, %rax; roll $1, %eax; xorl %esi, %eax; shlq $32, %rax; rorl $1, %r8d; orq %r8, %rax; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((5*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((5*2)+1))(%rdi), %r9d; xorl %ecx, %r9d; shrq $32, %rcx; roll $1, %ecx; xorl %esi, %ecx; shlq $32, %rcx; rorl $1, %r9d; orq %r9, %rcx;;; + movzbl %al, %r8d; movzbl %ah, %esi; rorq $(32), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(48), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %cl, %r9d; movzbl %ch, %esi; rorq $(32), %rcx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %cl, %r11d; movzbl %ch, %esi; rorq $(48), %rcx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(32), %rcx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(16), %rcx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*(6*2)+1)(%rdi), %esi; addl 4128 +4*(2*(6*2))(%rdi), %r8d; xorl %ebx, %r8d; shrq $32, %rbx; roll $1, %ebx; xorl %esi, %ebx; shlq $32, %rbx; rorl $1, %r8d; orq %r8, %rbx; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*(6*2)+1)(%rdi), %esi; addl 4128 +4*(2*(6*2))(%rdi), %r9d; xorl %edx, %r9d; shrq $32, %rdx; roll $1, %edx; xorl %esi, %edx; shlq $32, %rdx; rorl $1, %r9d; orq %r9, %rdx;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(32), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(48), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %dl, %r9d; movzbl %dh, %esi; rorq $(32), %rdx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %dl, %r11d; movzbl %dh, %esi; rorq $(48), %rdx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(32), %rdx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(16), %rdx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((6*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((6*2)+1))(%rdi), %r8d; xorl %eax, %r8d; shrq $32, %rax; roll $1, %eax; xorl %esi, %eax; shlq $32, %rax; rorl $1, %r8d; orq %r8, %rax; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((6*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((6*2)+1))(%rdi), %r9d; xorl %ecx, %r9d; shrq $32, %rcx; roll $1, %ecx; xorl %esi, %ecx; shlq $32, %rcx; rorl $1, %r9d; orq %r9, %rcx;;; + movzbl %al, %r8d; movzbl %ah, %esi; rorq $(32), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(48), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %cl, %r9d; movzbl %ch, %esi; rorq $(32), %rcx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %cl, %r11d; movzbl %ch, %esi; rorq $(48), %rcx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(32), %rcx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(16), %rcx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*(7*2)+1)(%rdi), %esi; addl 4128 +4*(2*(7*2))(%rdi), %r8d; xorl %ebx, %r8d; shrq $32, %rbx; roll $1, %ebx; xorl %esi, %ebx; shlq $32, %rbx; rorl $1, %r8d; orq %r8, %rbx; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*(7*2)+1)(%rdi), %esi; addl 4128 +4*(2*(7*2))(%rdi), %r9d; xorl %edx, %r9d; shrq $32, %rdx; roll $1, %edx; xorl %esi, %edx; shlq $32, %rdx; rorl $1, %r9d; orq %r9, %rdx;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(32), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(48), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %dl, %r9d; movzbl %dh, %esi; rorq $(32), %rdx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %dl, %r11d; movzbl %dh, %esi; rorq $(48), %rdx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(32), %rdx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(16), %rdx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((7*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((7*2)+1))(%rdi), %r8d; xorl %eax, %r8d; shrq $32, %rax; roll $1, %eax; xorl %esi, %eax; shlq $32, %rax; rorl $1, %r8d; orq %r8, %rax; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((7*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((7*2)+1))(%rdi), %r9d; xorl %ecx, %r9d; shrq $32, %rcx; roll $1, %ecx; xorl %esi, %ecx; shlq $32, %rcx; rorl $1, %r9d; orq %r9, %rcx;;; + + popq %rsi; + + xorq 4096 +4*4(%rdi), %rbx; movq %rbx, 4*(0)(%rsi); xorq 4096 +4*4(%rdi), %rdx; movq %rdx, 4*(4+(0))(%rsi);; xorq 4096 +4*6(%rdi), %rax; movq %rax, 4*(2)(%rsi); xorq 4096 +4*6(%rdi), %rcx; movq %rcx, 4*(4+(2))(%rsi);;; + + popq %rbx + popq %rbp +.ifdef WINABI +popq %rdi +popq %rsi +.endif + ret; + +.global twofish_dec_blk2 +.global _twofish_dec_blk2 + +twofish_dec_blk2: +_twofish_dec_blk2: + + +.ifdef WINABI +pushq %rsi +pushq %rdi +movq %rcx, %rdi; +movq %rdx, %rsi; +movq %r8, %rdx; +.endif + + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + + pushq %rsi; + movq %rdx, %rsi; + + movq 4*(0)(%rsi), %rbx; xorq 4096 +4*4(%rdi), %rbx; movq 4*(4+(0))(%rsi), %rdx; xorq 4096 +4*4(%rdi), %rdx;; movq 4*(2)(%rsi), %rax; xorq 4096 +4*6(%rdi), %rax; movq 4*(4+(2))(%rsi), %rcx; xorq 4096 +4*6(%rdi), %rcx;; rorq $32, %rbx; rorq $32, %rdx; rorq $32, %rax; rorq $32, %rcx;; + + movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(32), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(48), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %dl, %r11d; movzbl %dh, %esi; rorq $(32), %rdx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %dl, %r9d; movzbl %dh, %esi; rorq $(48), %rdx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(32), %rdx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(16), %rdx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((7*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((7*2)+1))(%rdi), %r8d; xorl %eax, %esi; shrq $32, %rax; roll $1, %eax; xorl %r8d, %eax; shlq $32, %rax; rorl $1, %esi; orq %rsi, %rax; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((7*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((7*2)+1))(%rdi), %r9d; xorl %ecx, %esi; shrq $32, %rcx; roll $1, %ecx; xorl %r9d, %ecx; shlq $32, %rcx; rorl $1, %esi; orq %rsi, %rcx;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(32), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %r8d; movzbl %ah, %esi; rorq $(48), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %cl, %r11d; movzbl %ch, %esi; rorq $(32), %rcx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %cl, %r9d; movzbl %ch, %esi; rorq $(48), %rcx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(32), %rcx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(16), %rcx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((7*2))+1)(%rdi), %esi; addl 4128 +4*(2*((7*2)))(%rdi), %r8d; xorl %ebx, %esi; shrq $32, %rbx; roll $1, %ebx; xorl %r8d, %ebx; shlq $32, %rbx; rorl $1, %esi; orq %rsi, %rbx; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((7*2))+1)(%rdi), %esi; addl 4128 +4*(2*((7*2)))(%rdi), %r9d; xorl %edx, %esi; shrq $32, %rdx; roll $1, %edx; xorl %r9d, %edx; shlq $32, %rdx; rorl $1, %esi; orq %rsi, %rdx;;; + movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(32), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(48), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %dl, %r11d; movzbl %dh, %esi; rorq $(32), %rdx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %dl, %r9d; movzbl %dh, %esi; rorq $(48), %rdx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(32), %rdx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(16), %rdx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((6*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((6*2)+1))(%rdi), %r8d; xorl %eax, %esi; shrq $32, %rax; roll $1, %eax; xorl %r8d, %eax; shlq $32, %rax; rorl $1, %esi; orq %rsi, %rax; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((6*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((6*2)+1))(%rdi), %r9d; xorl %ecx, %esi; shrq $32, %rcx; roll $1, %ecx; xorl %r9d, %ecx; shlq $32, %rcx; rorl $1, %esi; orq %rsi, %rcx;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(32), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %r8d; movzbl %ah, %esi; rorq $(48), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %cl, %r11d; movzbl %ch, %esi; rorq $(32), %rcx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %cl, %r9d; movzbl %ch, %esi; rorq $(48), %rcx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(32), %rcx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(16), %rcx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((6*2))+1)(%rdi), %esi; addl 4128 +4*(2*((6*2)))(%rdi), %r8d; xorl %ebx, %esi; shrq $32, %rbx; roll $1, %ebx; xorl %r8d, %ebx; shlq $32, %rbx; rorl $1, %esi; orq %rsi, %rbx; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((6*2))+1)(%rdi), %esi; addl 4128 +4*(2*((6*2)))(%rdi), %r9d; xorl %edx, %esi; shrq $32, %rdx; roll $1, %edx; xorl %r9d, %edx; shlq $32, %rdx; rorl $1, %esi; orq %rsi, %rdx;;; + movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(32), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(48), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %dl, %r11d; movzbl %dh, %esi; rorq $(32), %rdx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %dl, %r9d; movzbl %dh, %esi; rorq $(48), %rdx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(32), %rdx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(16), %rdx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((5*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((5*2)+1))(%rdi), %r8d; xorl %eax, %esi; shrq $32, %rax; roll $1, %eax; xorl %r8d, %eax; shlq $32, %rax; rorl $1, %esi; orq %rsi, %rax; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((5*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((5*2)+1))(%rdi), %r9d; xorl %ecx, %esi; shrq $32, %rcx; roll $1, %ecx; xorl %r9d, %ecx; shlq $32, %rcx; rorl $1, %esi; orq %rsi, %rcx;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(32), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %r8d; movzbl %ah, %esi; rorq $(48), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %cl, %r11d; movzbl %ch, %esi; rorq $(32), %rcx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %cl, %r9d; movzbl %ch, %esi; rorq $(48), %rcx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(32), %rcx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(16), %rcx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((5*2))+1)(%rdi), %esi; addl 4128 +4*(2*((5*2)))(%rdi), %r8d; xorl %ebx, %esi; shrq $32, %rbx; roll $1, %ebx; xorl %r8d, %ebx; shlq $32, %rbx; rorl $1, %esi; orq %rsi, %rbx; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((5*2))+1)(%rdi), %esi; addl 4128 +4*(2*((5*2)))(%rdi), %r9d; xorl %edx, %esi; shrq $32, %rdx; roll $1, %edx; xorl %r9d, %edx; shlq $32, %rdx; rorl $1, %esi; orq %rsi, %rdx;;; + movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(32), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(48), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %dl, %r11d; movzbl %dh, %esi; rorq $(32), %rdx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %dl, %r9d; movzbl %dh, %esi; rorq $(48), %rdx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(32), %rdx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(16), %rdx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((4*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((4*2)+1))(%rdi), %r8d; xorl %eax, %esi; shrq $32, %rax; roll $1, %eax; xorl %r8d, %eax; shlq $32, %rax; rorl $1, %esi; orq %rsi, %rax; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((4*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((4*2)+1))(%rdi), %r9d; xorl %ecx, %esi; shrq $32, %rcx; roll $1, %ecx; xorl %r9d, %ecx; shlq $32, %rcx; rorl $1, %esi; orq %rsi, %rcx;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(32), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %r8d; movzbl %ah, %esi; rorq $(48), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %cl, %r11d; movzbl %ch, %esi; rorq $(32), %rcx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %cl, %r9d; movzbl %ch, %esi; rorq $(48), %rcx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(32), %rcx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(16), %rcx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((4*2))+1)(%rdi), %esi; addl 4128 +4*(2*((4*2)))(%rdi), %r8d; xorl %ebx, %esi; shrq $32, %rbx; roll $1, %ebx; xorl %r8d, %ebx; shlq $32, %rbx; rorl $1, %esi; orq %rsi, %rbx; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((4*2))+1)(%rdi), %esi; addl 4128 +4*(2*((4*2)))(%rdi), %r9d; xorl %edx, %esi; shrq $32, %rdx; roll $1, %edx; xorl %r9d, %edx; shlq $32, %rdx; rorl $1, %esi; orq %rsi, %rdx;;; + movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(32), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(48), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %dl, %r11d; movzbl %dh, %esi; rorq $(32), %rdx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %dl, %r9d; movzbl %dh, %esi; rorq $(48), %rdx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(32), %rdx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(16), %rdx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((3*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((3*2)+1))(%rdi), %r8d; xorl %eax, %esi; shrq $32, %rax; roll $1, %eax; xorl %r8d, %eax; shlq $32, %rax; rorl $1, %esi; orq %rsi, %rax; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((3*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((3*2)+1))(%rdi), %r9d; xorl %ecx, %esi; shrq $32, %rcx; roll $1, %ecx; xorl %r9d, %ecx; shlq $32, %rcx; rorl $1, %esi; orq %rsi, %rcx;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(32), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %r8d; movzbl %ah, %esi; rorq $(48), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %cl, %r11d; movzbl %ch, %esi; rorq $(32), %rcx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %cl, %r9d; movzbl %ch, %esi; rorq $(48), %rcx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(32), %rcx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(16), %rcx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((3*2))+1)(%rdi), %esi; addl 4128 +4*(2*((3*2)))(%rdi), %r8d; xorl %ebx, %esi; shrq $32, %rbx; roll $1, %ebx; xorl %r8d, %ebx; shlq $32, %rbx; rorl $1, %esi; orq %rsi, %rbx; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((3*2))+1)(%rdi), %esi; addl 4128 +4*(2*((3*2)))(%rdi), %r9d; xorl %edx, %esi; shrq $32, %rdx; roll $1, %edx; xorl %r9d, %edx; shlq $32, %rdx; rorl $1, %esi; orq %rsi, %rdx;;; + movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(32), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(48), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %dl, %r11d; movzbl %dh, %esi; rorq $(32), %rdx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %dl, %r9d; movzbl %dh, %esi; rorq $(48), %rdx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(32), %rdx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(16), %rdx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((2*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((2*2)+1))(%rdi), %r8d; xorl %eax, %esi; shrq $32, %rax; roll $1, %eax; xorl %r8d, %eax; shlq $32, %rax; rorl $1, %esi; orq %rsi, %rax; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((2*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((2*2)+1))(%rdi), %r9d; xorl %ecx, %esi; shrq $32, %rcx; roll $1, %ecx; xorl %r9d, %ecx; shlq $32, %rcx; rorl $1, %esi; orq %rsi, %rcx;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(32), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %r8d; movzbl %ah, %esi; rorq $(48), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %cl, %r11d; movzbl %ch, %esi; rorq $(32), %rcx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %cl, %r9d; movzbl %ch, %esi; rorq $(48), %rcx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(32), %rcx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(16), %rcx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((2*2))+1)(%rdi), %esi; addl 4128 +4*(2*((2*2)))(%rdi), %r8d; xorl %ebx, %esi; shrq $32, %rbx; roll $1, %ebx; xorl %r8d, %ebx; shlq $32, %rbx; rorl $1, %esi; orq %rsi, %rbx; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((2*2))+1)(%rdi), %esi; addl 4128 +4*(2*((2*2)))(%rdi), %r9d; xorl %edx, %esi; shrq $32, %rdx; roll $1, %edx; xorl %r9d, %edx; shlq $32, %rdx; rorl $1, %esi; orq %rsi, %rdx;;; + movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(32), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(48), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %dl, %r11d; movzbl %dh, %esi; rorq $(32), %rdx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %dl, %r9d; movzbl %dh, %esi; rorq $(48), %rdx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(32), %rdx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(16), %rdx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((1*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((1*2)+1))(%rdi), %r8d; xorl %eax, %esi; shrq $32, %rax; roll $1, %eax; xorl %r8d, %eax; shlq $32, %rax; rorl $1, %esi; orq %rsi, %rax; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((1*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((1*2)+1))(%rdi), %r9d; xorl %ecx, %esi; shrq $32, %rcx; roll $1, %ecx; xorl %r9d, %ecx; shlq $32, %rcx; rorl $1, %esi; orq %rsi, %rcx;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(32), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %r8d; movzbl %ah, %esi; rorq $(48), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %cl, %r11d; movzbl %ch, %esi; rorq $(32), %rcx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %cl, %r9d; movzbl %ch, %esi; rorq $(48), %rcx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(32), %rcx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(16), %rcx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((1*2))+1)(%rdi), %esi; addl 4128 +4*(2*((1*2)))(%rdi), %r8d; xorl %ebx, %esi; shrq $32, %rbx; roll $1, %ebx; xorl %r8d, %ebx; shlq $32, %rbx; rorl $1, %esi; orq %rsi, %rbx; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((1*2))+1)(%rdi), %esi; addl 4128 +4*(2*((1*2)))(%rdi), %r9d; xorl %edx, %esi; shrq $32, %rdx; roll $1, %edx; xorl %r9d, %edx; shlq $32, %rdx; rorl $1, %esi; orq %rsi, %rdx;;; + movzbl %bl, %r10d; movzbl %bh, %esi; rorq $(32), %rbx; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %bl, %r8d; movzbl %bh, %esi; rorq $(48), %rbx; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %dl, %r11d; movzbl %dh, %esi; rorq $(32), %rdx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %dl, %r9d; movzbl %dh, %esi; rorq $(48), %rdx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(32), %rbx; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %bl, %ebp; movzbl %bh, %esi; rorq $(16), %rbx; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(32), %rdx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;; movzbl %dl, %ebp; movzbl %dh, %esi; rorq $(16), %rdx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((0*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((0*2)+1))(%rdi), %r8d; xorl %eax, %esi; shrq $32, %rax; roll $1, %eax; xorl %r8d, %eax; shlq $32, %rax; rorl $1, %esi; orq %rsi, %rax; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((0*2)+1)+1)(%rdi), %esi; addl 4128 +4*(2*((0*2)+1))(%rdi), %r9d; xorl %ecx, %esi; shrq $32, %rcx; roll $1, %ecx; xorl %r9d, %ecx; shlq $32, %rcx; rorl $1, %esi; orq %rsi, %rcx;; movzbl %al, %r10d; movzbl %ah, %esi; rorq $(32), %rax; movl 1024(%rdi, %r10, 4), %r10d; xorl 2048(%rdi, %rsi, 4), %r10d;; movzbl %al, %r8d; movzbl %ah, %esi; rorq $(48), %rax; movl 0(%rdi, %r8, 4), %r8d; xorl 1024(%rdi, %rsi, 4), %r8d;; movzbl %cl, %r11d; movzbl %ch, %esi; rorq $(32), %rcx; movl 1024(%rdi, %r11, 4), %r11d; xorl 2048(%rdi, %rsi, 4), %r11d;; movzbl %cl, %r9d; movzbl %ch, %esi; rorq $(48), %rcx; movl 0(%rdi, %r9, 4), %r9d; xorl 1024(%rdi, %rsi, 4), %r9d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(32), %rax; xorl 3072(%rdi, %rbp, 4), %r10d; xorl 0(%rdi, %rsi, 4), %r10d;; movzbl %al, %ebp; movzbl %ah, %esi; rorq $(16), %rax; xorl 2048(%rdi, %rbp, 4), %r8d; xorl 3072(%rdi, %rsi, 4), %r8d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(32), %rcx; xorl 3072(%rdi, %rbp, 4), %r11d; xorl 0(%rdi, %rsi, 4), %r11d;; movzbl %cl, %ebp; movzbl %ch, %esi; rorq $(16), %rcx; xorl 2048(%rdi, %rbp, 4), %r9d; xorl 3072(%rdi, %rsi, 4), %r9d;;; leal (%r8d, %r10d, 2), %esi; addl %r10d, %r8d; addl 4128 +4*(2*((0*2))+1)(%rdi), %esi; addl 4128 +4*(2*((0*2)))(%rdi), %r8d; xorl %ebx, %esi; shrq $32, %rbx; roll $1, %ebx; xorl %r8d, %ebx; shlq $32, %rbx; rorl $1, %esi; orq %rsi, %rbx; leal (%r9d, %r11d, 2), %esi; addl %r11d, %r9d; addl 4128 +4*(2*((0*2))+1)(%rdi), %esi; addl 4128 +4*(2*((0*2)))(%rdi), %r9d; xorl %edx, %esi; shrq $32, %rdx; roll $1, %edx; xorl %r9d, %edx; shlq $32, %rdx; rorl $1, %esi; orq %rsi, %rdx;;; + + popq %rsi; + + rorq $32, %rax; rorq $32, %rcx; rorq $32, %rbx; rorq $32, %rdx; xorq 4096 +4*0(%rdi), %rax; movq %rax, 4*(0)(%rsi); xorq 4096 +4*0(%rdi), %rcx; movq %rcx, 4*(4+(0))(%rsi);; xorq 4096 +4*2(%rdi), %rbx; movq %rbx, 4*(2)(%rsi); xorq 4096 +4*2(%rdi), %rdx; movq %rdx, 4*(4+(2))(%rsi);;; + + popq %r13 + popq %r12 + popq %rbx + popq %rbp + +.ifdef WINABI +popq %rdi +popq %rsi +.endif + ret; + +.global twofish_enc_blk3 +.global _twofish_enc_blk3 + +twofish_enc_blk3: +_twofish_enc_blk3: + +.ifdef WINABI +pushq %rsi +pushq %rdi +movq %rcx, %rdi; +movq %rdx, %rsi; +movq %r8, %rdx; +.endif + + + + + pushq %r15; + pushq %r14; + pushq %r13; + pushq %r12; + pushq %rbp; + pushq %rbx; + + pushq %rsi; + + movq 4*(0)(%rdx), %rax; xorq 4096 +4*0(%rdi), %rax; movq 4*(4+(0))(%rdx), %rbx; xorq 4096 +4*0(%rdi), %rbx; movq 4*(8+(0))(%rdx), %rcx; xorq 4096 +4*0(%rdi), %rcx;; movq 4*(2)(%rdx), %r8; xorq 4096 +4*2(%rdi), %r8; movq 4*(4+(2))(%rdx), %r9; xorq 4096 +4*2(%rdi), %r9; movq 4*(8+(2))(%rdx), %r10; xorq 4096 +4*2(%rdi), %r10;;; + + movzbl %al, %ebp; movzbl %ah, %edx; rorq $(32), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(48), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(32), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(48), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(32), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(48), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*(0*2))(%rdi), %ebp; xorl %eax, %ebp; addl 4128 +4*(2*(0*2)+1)(%rdi), %r13d; shrq $32, %rax; roll $1, %eax; xorl %r13d, %eax; shlq $32, %rax; rorl $1, %ebp; orq %rbp, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*(0*2))(%rdi), %r11d; xorl %ebx, %r11d; addl 4128 +4*(2*(0*2)+1)(%rdi), %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r14d, %ebx; shlq $32, %rbx; rorl $1, %r11d; orq %r11, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*(0*2))(%rdi), %r12d; xorl %ecx, %r12d; addl 4128 +4*(2*(0*2)+1)(%rdi), %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r15d, %ecx; shlq $32, %rcx; rorl $1, %r12d; orq %r12, %rcx;;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(32), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(48), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(32), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(48), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(32), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(48), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((0*2)+1))(%rdi), %ebp; xorl %eax, %ebp; addl 4128 +4*(2*((0*2)+1)+1)(%rdi), %r13d; shrq $32, %rax; roll $1, %eax; xorl %r13d, %eax; shlq $32, %rax; rorl $1, %ebp; orq %rbp, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((0*2)+1))(%rdi), %r11d; xorl %ebx, %r11d; addl 4128 +4*(2*((0*2)+1)+1)(%rdi), %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r14d, %ebx; shlq $32, %rbx; rorl $1, %r11d; orq %r11, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((0*2)+1))(%rdi), %r12d; xorl %ecx, %r12d; addl 4128 +4*(2*((0*2)+1)+1)(%rdi), %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r15d, %ecx; shlq $32, %rcx; rorl $1, %r12d; orq %r12, %rcx;;;; + movzbl %al, %ebp; movzbl %ah, %edx; rorq $(32), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(48), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(32), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(48), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(32), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(48), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*(1*2))(%rdi), %ebp; xorl %eax, %ebp; addl 4128 +4*(2*(1*2)+1)(%rdi), %r13d; shrq $32, %rax; roll $1, %eax; xorl %r13d, %eax; shlq $32, %rax; rorl $1, %ebp; orq %rbp, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*(1*2))(%rdi), %r11d; xorl %ebx, %r11d; addl 4128 +4*(2*(1*2)+1)(%rdi), %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r14d, %ebx; shlq $32, %rbx; rorl $1, %r11d; orq %r11, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*(1*2))(%rdi), %r12d; xorl %ecx, %r12d; addl 4128 +4*(2*(1*2)+1)(%rdi), %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r15d, %ecx; shlq $32, %rcx; rorl $1, %r12d; orq %r12, %rcx;;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(32), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(48), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(32), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(48), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(32), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(48), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((1*2)+1))(%rdi), %ebp; xorl %eax, %ebp; addl 4128 +4*(2*((1*2)+1)+1)(%rdi), %r13d; shrq $32, %rax; roll $1, %eax; xorl %r13d, %eax; shlq $32, %rax; rorl $1, %ebp; orq %rbp, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((1*2)+1))(%rdi), %r11d; xorl %ebx, %r11d; addl 4128 +4*(2*((1*2)+1)+1)(%rdi), %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r14d, %ebx; shlq $32, %rbx; rorl $1, %r11d; orq %r11, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((1*2)+1))(%rdi), %r12d; xorl %ecx, %r12d; addl 4128 +4*(2*((1*2)+1)+1)(%rdi), %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r15d, %ecx; shlq $32, %rcx; rorl $1, %r12d; orq %r12, %rcx;;;; + movzbl %al, %ebp; movzbl %ah, %edx; rorq $(32), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(48), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(32), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(48), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(32), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(48), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*(2*2))(%rdi), %ebp; xorl %eax, %ebp; addl 4128 +4*(2*(2*2)+1)(%rdi), %r13d; shrq $32, %rax; roll $1, %eax; xorl %r13d, %eax; shlq $32, %rax; rorl $1, %ebp; orq %rbp, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*(2*2))(%rdi), %r11d; xorl %ebx, %r11d; addl 4128 +4*(2*(2*2)+1)(%rdi), %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r14d, %ebx; shlq $32, %rbx; rorl $1, %r11d; orq %r11, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*(2*2))(%rdi), %r12d; xorl %ecx, %r12d; addl 4128 +4*(2*(2*2)+1)(%rdi), %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r15d, %ecx; shlq $32, %rcx; rorl $1, %r12d; orq %r12, %rcx;;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(32), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(48), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(32), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(48), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(32), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(48), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((2*2)+1))(%rdi), %ebp; xorl %eax, %ebp; addl 4128 +4*(2*((2*2)+1)+1)(%rdi), %r13d; shrq $32, %rax; roll $1, %eax; xorl %r13d, %eax; shlq $32, %rax; rorl $1, %ebp; orq %rbp, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((2*2)+1))(%rdi), %r11d; xorl %ebx, %r11d; addl 4128 +4*(2*((2*2)+1)+1)(%rdi), %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r14d, %ebx; shlq $32, %rbx; rorl $1, %r11d; orq %r11, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((2*2)+1))(%rdi), %r12d; xorl %ecx, %r12d; addl 4128 +4*(2*((2*2)+1)+1)(%rdi), %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r15d, %ecx; shlq $32, %rcx; rorl $1, %r12d; orq %r12, %rcx;;;; + movzbl %al, %ebp; movzbl %ah, %edx; rorq $(32), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(48), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(32), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(48), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(32), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(48), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*(3*2))(%rdi), %ebp; xorl %eax, %ebp; addl 4128 +4*(2*(3*2)+1)(%rdi), %r13d; shrq $32, %rax; roll $1, %eax; xorl %r13d, %eax; shlq $32, %rax; rorl $1, %ebp; orq %rbp, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*(3*2))(%rdi), %r11d; xorl %ebx, %r11d; addl 4128 +4*(2*(3*2)+1)(%rdi), %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r14d, %ebx; shlq $32, %rbx; rorl $1, %r11d; orq %r11, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*(3*2))(%rdi), %r12d; xorl %ecx, %r12d; addl 4128 +4*(2*(3*2)+1)(%rdi), %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r15d, %ecx; shlq $32, %rcx; rorl $1, %r12d; orq %r12, %rcx;;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(32), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(48), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(32), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(48), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(32), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(48), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((3*2)+1))(%rdi), %ebp; xorl %eax, %ebp; addl 4128 +4*(2*((3*2)+1)+1)(%rdi), %r13d; shrq $32, %rax; roll $1, %eax; xorl %r13d, %eax; shlq $32, %rax; rorl $1, %ebp; orq %rbp, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((3*2)+1))(%rdi), %r11d; xorl %ebx, %r11d; addl 4128 +4*(2*((3*2)+1)+1)(%rdi), %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r14d, %ebx; shlq $32, %rbx; rorl $1, %r11d; orq %r11, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((3*2)+1))(%rdi), %r12d; xorl %ecx, %r12d; addl 4128 +4*(2*((3*2)+1)+1)(%rdi), %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r15d, %ecx; shlq $32, %rcx; rorl $1, %r12d; orq %r12, %rcx;;;; + movzbl %al, %ebp; movzbl %ah, %edx; rorq $(32), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(48), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(32), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(48), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(32), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(48), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*(4*2))(%rdi), %ebp; xorl %eax, %ebp; addl 4128 +4*(2*(4*2)+1)(%rdi), %r13d; shrq $32, %rax; roll $1, %eax; xorl %r13d, %eax; shlq $32, %rax; rorl $1, %ebp; orq %rbp, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*(4*2))(%rdi), %r11d; xorl %ebx, %r11d; addl 4128 +4*(2*(4*2)+1)(%rdi), %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r14d, %ebx; shlq $32, %rbx; rorl $1, %r11d; orq %r11, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*(4*2))(%rdi), %r12d; xorl %ecx, %r12d; addl 4128 +4*(2*(4*2)+1)(%rdi), %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r15d, %ecx; shlq $32, %rcx; rorl $1, %r12d; orq %r12, %rcx;;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(32), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(48), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(32), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(48), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(32), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(48), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((4*2)+1))(%rdi), %ebp; xorl %eax, %ebp; addl 4128 +4*(2*((4*2)+1)+1)(%rdi), %r13d; shrq $32, %rax; roll $1, %eax; xorl %r13d, %eax; shlq $32, %rax; rorl $1, %ebp; orq %rbp, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((4*2)+1))(%rdi), %r11d; xorl %ebx, %r11d; addl 4128 +4*(2*((4*2)+1)+1)(%rdi), %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r14d, %ebx; shlq $32, %rbx; rorl $1, %r11d; orq %r11, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((4*2)+1))(%rdi), %r12d; xorl %ecx, %r12d; addl 4128 +4*(2*((4*2)+1)+1)(%rdi), %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r15d, %ecx; shlq $32, %rcx; rorl $1, %r12d; orq %r12, %rcx;;;; + movzbl %al, %ebp; movzbl %ah, %edx; rorq $(32), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(48), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(32), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(48), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(32), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(48), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*(5*2))(%rdi), %ebp; xorl %eax, %ebp; addl 4128 +4*(2*(5*2)+1)(%rdi), %r13d; shrq $32, %rax; roll $1, %eax; xorl %r13d, %eax; shlq $32, %rax; rorl $1, %ebp; orq %rbp, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*(5*2))(%rdi), %r11d; xorl %ebx, %r11d; addl 4128 +4*(2*(5*2)+1)(%rdi), %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r14d, %ebx; shlq $32, %rbx; rorl $1, %r11d; orq %r11, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*(5*2))(%rdi), %r12d; xorl %ecx, %r12d; addl 4128 +4*(2*(5*2)+1)(%rdi), %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r15d, %ecx; shlq $32, %rcx; rorl $1, %r12d; orq %r12, %rcx;;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(32), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(48), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(32), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(48), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(32), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(48), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((5*2)+1))(%rdi), %ebp; xorl %eax, %ebp; addl 4128 +4*(2*((5*2)+1)+1)(%rdi), %r13d; shrq $32, %rax; roll $1, %eax; xorl %r13d, %eax; shlq $32, %rax; rorl $1, %ebp; orq %rbp, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((5*2)+1))(%rdi), %r11d; xorl %ebx, %r11d; addl 4128 +4*(2*((5*2)+1)+1)(%rdi), %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r14d, %ebx; shlq $32, %rbx; rorl $1, %r11d; orq %r11, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((5*2)+1))(%rdi), %r12d; xorl %ecx, %r12d; addl 4128 +4*(2*((5*2)+1)+1)(%rdi), %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r15d, %ecx; shlq $32, %rcx; rorl $1, %r12d; orq %r12, %rcx;;;; + movzbl %al, %ebp; movzbl %ah, %edx; rorq $(32), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(48), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(32), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(48), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(32), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(48), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*(6*2))(%rdi), %ebp; xorl %eax, %ebp; addl 4128 +4*(2*(6*2)+1)(%rdi), %r13d; shrq $32, %rax; roll $1, %eax; xorl %r13d, %eax; shlq $32, %rax; rorl $1, %ebp; orq %rbp, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*(6*2))(%rdi), %r11d; xorl %ebx, %r11d; addl 4128 +4*(2*(6*2)+1)(%rdi), %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r14d, %ebx; shlq $32, %rbx; rorl $1, %r11d; orq %r11, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*(6*2))(%rdi), %r12d; xorl %ecx, %r12d; addl 4128 +4*(2*(6*2)+1)(%rdi), %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r15d, %ecx; shlq $32, %rcx; rorl $1, %r12d; orq %r12, %rcx;;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(32), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(48), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(32), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(48), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(32), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(48), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((6*2)+1))(%rdi), %ebp; xorl %eax, %ebp; addl 4128 +4*(2*((6*2)+1)+1)(%rdi), %r13d; shrq $32, %rax; roll $1, %eax; xorl %r13d, %eax; shlq $32, %rax; rorl $1, %ebp; orq %rbp, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((6*2)+1))(%rdi), %r11d; xorl %ebx, %r11d; addl 4128 +4*(2*((6*2)+1)+1)(%rdi), %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r14d, %ebx; shlq $32, %rbx; rorl $1, %r11d; orq %r11, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((6*2)+1))(%rdi), %r12d; xorl %ecx, %r12d; addl 4128 +4*(2*((6*2)+1)+1)(%rdi), %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r15d, %ecx; shlq $32, %rcx; rorl $1, %r12d; orq %r12, %rcx;;;; + movzbl %al, %ebp; movzbl %ah, %edx; rorq $(32), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(48), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(32), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(48), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(32), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(48), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*(7*2))(%rdi), %ebp; xorl %eax, %ebp; addl 4128 +4*(2*(7*2)+1)(%rdi), %r13d; shrq $32, %rax; roll $1, %eax; xorl %r13d, %eax; shlq $32, %rax; rorl $1, %ebp; orq %rbp, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*(7*2))(%rdi), %r11d; xorl %ebx, %r11d; addl 4128 +4*(2*(7*2)+1)(%rdi), %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r14d, %ebx; shlq $32, %rbx; rorl $1, %r11d; orq %r11, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*(7*2))(%rdi), %r12d; xorl %ecx, %r12d; addl 4128 +4*(2*(7*2)+1)(%rdi), %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r15d, %ecx; shlq $32, %rcx; rorl $1, %r12d; orq %r12, %rcx;;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(32), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(48), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(32), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(48), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(32), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(48), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((7*2)+1))(%rdi), %ebp; xorl %eax, %ebp; addl 4128 +4*(2*((7*2)+1)+1)(%rdi), %r13d; shrq $32, %rax; roll $1, %eax; xorl %r13d, %eax; shlq $32, %rax; rorl $1, %ebp; orq %rbp, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((7*2)+1))(%rdi), %r11d; xorl %ebx, %r11d; addl 4128 +4*(2*((7*2)+1)+1)(%rdi), %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r14d, %ebx; shlq $32, %rbx; rorl $1, %r11d; orq %r11, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((7*2)+1))(%rdi), %r12d; xorl %ecx, %r12d; addl 4128 +4*(2*((7*2)+1)+1)(%rdi), %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r15d, %ecx; shlq $32, %rcx; rorl $1, %r12d; orq %r12, %rcx;;;; + + popq %rdx; + + xorq 4096 +4*6(%rdi), %rax; movq %rax, 4*(2)(%rdx); xorq 4096 +4*6(%rdi), %rbx; movq %rbx, 4*(4+(2))(%rdx); xorq 4096 +4*6(%rdi), %rcx; movq %rcx, 4*(8+(2))(%rdx);; xorq 4096 +4*4(%rdi), %r8; movq %r8, 4*(0)(%rdx); xorq 4096 +4*4(%rdi), %r9; movq %r9, 4*(4+(0))(%rdx); xorq 4096 +4*4(%rdi), %r10; movq %r10, 4*(8+(0))(%rdx);;; + + popq %rbx; + popq %rbp; + popq %r12; + popq %r13; + popq %r14; + popq %r15; +.ifdef WINABI +popq %rdi +popq %rsi +.endif + ret; + +.global twofish_dec_blk3 +.global _twofish_dec_blk3 + +twofish_dec_blk3: +_twofish_dec_blk3: + + +.ifdef WINABI +pushq %rsi +pushq %rdi +movq %rcx, %rdi; +movq %rdx, %rsi; +movq %r8, %rdx; +.endif + + + pushq %r15; + pushq %r14; + pushq %r13; + pushq %r12; + pushq %rbp; + pushq %rbx; + + pushq %rsi; + + movq 4*(0)(%rdx), %rax; xorq 4096 +4*4(%rdi), %rax; movq 4*(4+(0))(%rdx), %rbx; xorq 4096 +4*4(%rdi), %rbx; movq 4*(8+(0))(%rdx), %rcx; xorq 4096 +4*4(%rdi), %rcx;; rorq $32, %rax; rorq $32, %rbx; rorq $32, %rcx; movq 4*(2)(%rdx), %r8; xorq 4096 +4*6(%rdi), %r8; movq 4*(4+(2))(%rdx), %r9; xorq 4096 +4*6(%rdi), %r9; movq 4*(8+(2))(%rdx), %r10; xorq 4096 +4*6(%rdi), %r10;; rorq $32, %r8; rorq $32, %r9; rorq $32, %r10;; + + movzbl %al, %r13d; movzbl %ah, %edx; rorq $(32), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(48), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(32), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(48), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(32), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(48), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((7*2)+1))(%rdi), %ebp; addl 4128 +4*(2*((7*2)+1)+1)(%rdi), %r13d; xorl %eax, %r13d; shrq $32, %rax; roll $1, %eax; xorl %ebp, %eax; shlq $32, %rax; rorl $1, %r13d; orq %r13, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((7*2)+1))(%rdi), %r11d; addl 4128 +4*(2*((7*2)+1)+1)(%rdi), %r14d; xorl %ebx, %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r11d, %ebx; shlq $32, %rbx; rorl $1, %r14d; orq %r14, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((7*2)+1))(%rdi), %r12d; addl 4128 +4*(2*((7*2)+1)+1)(%rdi), %r15d; xorl %ecx, %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r12d, %ecx; shlq $32, %rcx; rorl $1, %r15d; orq %r15, %rcx;;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(32), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(48), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(32), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(48), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(32), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(48), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((7*2)))(%rdi), %ebp; addl 4128 +4*(2*((7*2))+1)(%rdi), %r13d; xorl %eax, %r13d; shrq $32, %rax; roll $1, %eax; xorl %ebp, %eax; shlq $32, %rax; rorl $1, %r13d; orq %r13, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((7*2)))(%rdi), %r11d; addl 4128 +4*(2*((7*2))+1)(%rdi), %r14d; xorl %ebx, %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r11d, %ebx; shlq $32, %rbx; rorl $1, %r14d; orq %r14, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((7*2)))(%rdi), %r12d; addl 4128 +4*(2*((7*2))+1)(%rdi), %r15d; xorl %ecx, %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r12d, %ecx; shlq $32, %rcx; rorl $1, %r15d; orq %r15, %rcx;;;; + movzbl %al, %r13d; movzbl %ah, %edx; rorq $(32), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(48), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(32), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(48), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(32), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(48), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((6*2)+1))(%rdi), %ebp; addl 4128 +4*(2*((6*2)+1)+1)(%rdi), %r13d; xorl %eax, %r13d; shrq $32, %rax; roll $1, %eax; xorl %ebp, %eax; shlq $32, %rax; rorl $1, %r13d; orq %r13, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((6*2)+1))(%rdi), %r11d; addl 4128 +4*(2*((6*2)+1)+1)(%rdi), %r14d; xorl %ebx, %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r11d, %ebx; shlq $32, %rbx; rorl $1, %r14d; orq %r14, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((6*2)+1))(%rdi), %r12d; addl 4128 +4*(2*((6*2)+1)+1)(%rdi), %r15d; xorl %ecx, %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r12d, %ecx; shlq $32, %rcx; rorl $1, %r15d; orq %r15, %rcx;;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(32), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(48), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(32), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(48), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(32), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(48), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((6*2)))(%rdi), %ebp; addl 4128 +4*(2*((6*2))+1)(%rdi), %r13d; xorl %eax, %r13d; shrq $32, %rax; roll $1, %eax; xorl %ebp, %eax; shlq $32, %rax; rorl $1, %r13d; orq %r13, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((6*2)))(%rdi), %r11d; addl 4128 +4*(2*((6*2))+1)(%rdi), %r14d; xorl %ebx, %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r11d, %ebx; shlq $32, %rbx; rorl $1, %r14d; orq %r14, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((6*2)))(%rdi), %r12d; addl 4128 +4*(2*((6*2))+1)(%rdi), %r15d; xorl %ecx, %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r12d, %ecx; shlq $32, %rcx; rorl $1, %r15d; orq %r15, %rcx;;;; + movzbl %al, %r13d; movzbl %ah, %edx; rorq $(32), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(48), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(32), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(48), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(32), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(48), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((5*2)+1))(%rdi), %ebp; addl 4128 +4*(2*((5*2)+1)+1)(%rdi), %r13d; xorl %eax, %r13d; shrq $32, %rax; roll $1, %eax; xorl %ebp, %eax; shlq $32, %rax; rorl $1, %r13d; orq %r13, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((5*2)+1))(%rdi), %r11d; addl 4128 +4*(2*((5*2)+1)+1)(%rdi), %r14d; xorl %ebx, %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r11d, %ebx; shlq $32, %rbx; rorl $1, %r14d; orq %r14, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((5*2)+1))(%rdi), %r12d; addl 4128 +4*(2*((5*2)+1)+1)(%rdi), %r15d; xorl %ecx, %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r12d, %ecx; shlq $32, %rcx; rorl $1, %r15d; orq %r15, %rcx;;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(32), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(48), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(32), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(48), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(32), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(48), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((5*2)))(%rdi), %ebp; addl 4128 +4*(2*((5*2))+1)(%rdi), %r13d; xorl %eax, %r13d; shrq $32, %rax; roll $1, %eax; xorl %ebp, %eax; shlq $32, %rax; rorl $1, %r13d; orq %r13, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((5*2)))(%rdi), %r11d; addl 4128 +4*(2*((5*2))+1)(%rdi), %r14d; xorl %ebx, %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r11d, %ebx; shlq $32, %rbx; rorl $1, %r14d; orq %r14, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((5*2)))(%rdi), %r12d; addl 4128 +4*(2*((5*2))+1)(%rdi), %r15d; xorl %ecx, %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r12d, %ecx; shlq $32, %rcx; rorl $1, %r15d; orq %r15, %rcx;;;; + movzbl %al, %r13d; movzbl %ah, %edx; rorq $(32), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(48), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(32), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(48), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(32), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(48), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((4*2)+1))(%rdi), %ebp; addl 4128 +4*(2*((4*2)+1)+1)(%rdi), %r13d; xorl %eax, %r13d; shrq $32, %rax; roll $1, %eax; xorl %ebp, %eax; shlq $32, %rax; rorl $1, %r13d; orq %r13, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((4*2)+1))(%rdi), %r11d; addl 4128 +4*(2*((4*2)+1)+1)(%rdi), %r14d; xorl %ebx, %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r11d, %ebx; shlq $32, %rbx; rorl $1, %r14d; orq %r14, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((4*2)+1))(%rdi), %r12d; addl 4128 +4*(2*((4*2)+1)+1)(%rdi), %r15d; xorl %ecx, %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r12d, %ecx; shlq $32, %rcx; rorl $1, %r15d; orq %r15, %rcx;;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(32), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(48), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(32), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(48), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(32), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(48), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((4*2)))(%rdi), %ebp; addl 4128 +4*(2*((4*2))+1)(%rdi), %r13d; xorl %eax, %r13d; shrq $32, %rax; roll $1, %eax; xorl %ebp, %eax; shlq $32, %rax; rorl $1, %r13d; orq %r13, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((4*2)))(%rdi), %r11d; addl 4128 +4*(2*((4*2))+1)(%rdi), %r14d; xorl %ebx, %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r11d, %ebx; shlq $32, %rbx; rorl $1, %r14d; orq %r14, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((4*2)))(%rdi), %r12d; addl 4128 +4*(2*((4*2))+1)(%rdi), %r15d; xorl %ecx, %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r12d, %ecx; shlq $32, %rcx; rorl $1, %r15d; orq %r15, %rcx;;;; + movzbl %al, %r13d; movzbl %ah, %edx; rorq $(32), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(48), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(32), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(48), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(32), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(48), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((3*2)+1))(%rdi), %ebp; addl 4128 +4*(2*((3*2)+1)+1)(%rdi), %r13d; xorl %eax, %r13d; shrq $32, %rax; roll $1, %eax; xorl %ebp, %eax; shlq $32, %rax; rorl $1, %r13d; orq %r13, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((3*2)+1))(%rdi), %r11d; addl 4128 +4*(2*((3*2)+1)+1)(%rdi), %r14d; xorl %ebx, %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r11d, %ebx; shlq $32, %rbx; rorl $1, %r14d; orq %r14, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((3*2)+1))(%rdi), %r12d; addl 4128 +4*(2*((3*2)+1)+1)(%rdi), %r15d; xorl %ecx, %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r12d, %ecx; shlq $32, %rcx; rorl $1, %r15d; orq %r15, %rcx;;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(32), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(48), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(32), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(48), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(32), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(48), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((3*2)))(%rdi), %ebp; addl 4128 +4*(2*((3*2))+1)(%rdi), %r13d; xorl %eax, %r13d; shrq $32, %rax; roll $1, %eax; xorl %ebp, %eax; shlq $32, %rax; rorl $1, %r13d; orq %r13, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((3*2)))(%rdi), %r11d; addl 4128 +4*(2*((3*2))+1)(%rdi), %r14d; xorl %ebx, %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r11d, %ebx; shlq $32, %rbx; rorl $1, %r14d; orq %r14, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((3*2)))(%rdi), %r12d; addl 4128 +4*(2*((3*2))+1)(%rdi), %r15d; xorl %ecx, %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r12d, %ecx; shlq $32, %rcx; rorl $1, %r15d; orq %r15, %rcx;;;; + movzbl %al, %r13d; movzbl %ah, %edx; rorq $(32), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(48), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(32), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(48), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(32), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(48), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((2*2)+1))(%rdi), %ebp; addl 4128 +4*(2*((2*2)+1)+1)(%rdi), %r13d; xorl %eax, %r13d; shrq $32, %rax; roll $1, %eax; xorl %ebp, %eax; shlq $32, %rax; rorl $1, %r13d; orq %r13, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((2*2)+1))(%rdi), %r11d; addl 4128 +4*(2*((2*2)+1)+1)(%rdi), %r14d; xorl %ebx, %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r11d, %ebx; shlq $32, %rbx; rorl $1, %r14d; orq %r14, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((2*2)+1))(%rdi), %r12d; addl 4128 +4*(2*((2*2)+1)+1)(%rdi), %r15d; xorl %ecx, %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r12d, %ecx; shlq $32, %rcx; rorl $1, %r15d; orq %r15, %rcx;;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(32), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(48), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(32), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(48), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(32), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(48), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((2*2)))(%rdi), %ebp; addl 4128 +4*(2*((2*2))+1)(%rdi), %r13d; xorl %eax, %r13d; shrq $32, %rax; roll $1, %eax; xorl %ebp, %eax; shlq $32, %rax; rorl $1, %r13d; orq %r13, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((2*2)))(%rdi), %r11d; addl 4128 +4*(2*((2*2))+1)(%rdi), %r14d; xorl %ebx, %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r11d, %ebx; shlq $32, %rbx; rorl $1, %r14d; orq %r14, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((2*2)))(%rdi), %r12d; addl 4128 +4*(2*((2*2))+1)(%rdi), %r15d; xorl %ecx, %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r12d, %ecx; shlq $32, %rcx; rorl $1, %r15d; orq %r15, %rcx;;;; + movzbl %al, %r13d; movzbl %ah, %edx; rorq $(32), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(48), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(32), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(48), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(32), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(48), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((1*2)+1))(%rdi), %ebp; addl 4128 +4*(2*((1*2)+1)+1)(%rdi), %r13d; xorl %eax, %r13d; shrq $32, %rax; roll $1, %eax; xorl %ebp, %eax; shlq $32, %rax; rorl $1, %r13d; orq %r13, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((1*2)+1))(%rdi), %r11d; addl 4128 +4*(2*((1*2)+1)+1)(%rdi), %r14d; xorl %ebx, %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r11d, %ebx; shlq $32, %rbx; rorl $1, %r14d; orq %r14, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((1*2)+1))(%rdi), %r12d; addl 4128 +4*(2*((1*2)+1)+1)(%rdi), %r15d; xorl %ecx, %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r12d, %ecx; shlq $32, %rcx; rorl $1, %r15d; orq %r15, %rcx;;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(32), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(48), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(32), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(48), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(32), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(48), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((1*2)))(%rdi), %ebp; addl 4128 +4*(2*((1*2))+1)(%rdi), %r13d; xorl %eax, %r13d; shrq $32, %rax; roll $1, %eax; xorl %ebp, %eax; shlq $32, %rax; rorl $1, %r13d; orq %r13, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((1*2)))(%rdi), %r11d; addl 4128 +4*(2*((1*2))+1)(%rdi), %r14d; xorl %ebx, %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r11d, %ebx; shlq $32, %rbx; rorl $1, %r14d; orq %r14, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((1*2)))(%rdi), %r12d; addl 4128 +4*(2*((1*2))+1)(%rdi), %r15d; xorl %ecx, %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r12d, %ecx; shlq $32, %rcx; rorl $1, %r15d; orq %r15, %rcx;;;; + movzbl %al, %r13d; movzbl %ah, %edx; rorq $(32), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(48), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(32), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(48), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(32), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(48), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((0*2)+1))(%rdi), %ebp; addl 4128 +4*(2*((0*2)+1)+1)(%rdi), %r13d; xorl %eax, %r13d; shrq $32, %rax; roll $1, %eax; xorl %ebp, %eax; shlq $32, %rax; rorl $1, %r13d; orq %r13, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((0*2)+1))(%rdi), %r11d; addl 4128 +4*(2*((0*2)+1)+1)(%rdi), %r14d; xorl %ebx, %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r11d, %ebx; shlq $32, %rbx; rorl $1, %r14d; orq %r14, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((0*2)+1))(%rdi), %r12d; addl 4128 +4*(2*((0*2)+1)+1)(%rdi), %r15d; xorl %ecx, %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r12d, %ecx; shlq $32, %rcx; rorl $1, %r15d; orq %r15, %rcx;;; movzbl %al, %r13d; movzbl %ah, %edx; rorq $(32), %rax; movl 1024(%rdi, %r13, 4), %r13d; xorl 2048(%rdi, %rdx, 4), %r13d;; movzbl %al, %ebp; movzbl %ah, %edx; rorq $(48), %rax; movl 0(%rdi, %rbp, 4), %ebp; xorl 1024(%rdi, %rdx, 4), %ebp;; movzbl %bl, %r14d; movzbl %bh, %edx; rorq $(32), %rbx; movl 1024(%rdi, %r14, 4), %r14d; xorl 2048(%rdi, %rdx, 4), %r14d;; movzbl %bl, %r11d; movzbl %bh, %edx; rorq $(48), %rbx; movl 0(%rdi, %r11, 4), %r11d; xorl 1024(%rdi, %rdx, 4), %r11d;; movzbl %cl, %r15d; movzbl %ch, %edx; rorq $(32), %rcx; movl 1024(%rdi, %r15, 4), %r15d; xorl 2048(%rdi, %rdx, 4), %r15d;; movzbl %cl, %r12d; movzbl %ch, %edx; rorq $(48), %rcx; movl 0(%rdi, %r12, 4), %r12d; xorl 1024(%rdi, %rdx, 4), %r12d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(32), %rax; xorl 3072(%rdi, %rsi, 4), %r13d; xorl 0(%rdi, %rdx, 4), %r13d;; movzbl %al, %esi; movzbl %ah, %edx; rorq $(16), %rax; xorl 2048(%rdi, %rsi, 4), %ebp; xorl 3072(%rdi, %rdx, 4), %ebp;; xchgq %r8, %rax; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(32), %rbx; xorl 3072(%rdi, %rsi, 4), %r14d; xorl 0(%rdi, %rdx, 4), %r14d;; movzbl %bl, %esi; movzbl %bh, %edx; rorq $(16), %rbx; xorl 2048(%rdi, %rsi, 4), %r11d; xorl 3072(%rdi, %rdx, 4), %r11d;; xchgq %r9, %rbx; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(32), %rcx; xorl 3072(%rdi, %rsi, 4), %r15d; xorl 0(%rdi, %rdx, 4), %r15d;; movzbl %cl, %esi; movzbl %ch, %edx; rorq $(16), %rcx; xorl 2048(%rdi, %rsi, 4), %r12d; xorl 3072(%rdi, %rdx, 4), %r12d;; xchgq %r10, %rcx;; addl %r13d, %ebp; addl %ebp, %r13d; addl 4128 +4*(2*((0*2)))(%rdi), %ebp; addl 4128 +4*(2*((0*2))+1)(%rdi), %r13d; xorl %eax, %r13d; shrq $32, %rax; roll $1, %eax; xorl %ebp, %eax; shlq $32, %rax; rorl $1, %r13d; orq %r13, %rax;; addl %r14d, %r11d; addl %r11d, %r14d; addl 4128 +4*(2*((0*2)))(%rdi), %r11d; addl 4128 +4*(2*((0*2))+1)(%rdi), %r14d; xorl %ebx, %r14d; shrq $32, %rbx; roll $1, %ebx; xorl %r11d, %ebx; shlq $32, %rbx; rorl $1, %r14d; orq %r14, %rbx;; addl %r15d, %r12d; addl %r12d, %r15d; addl 4128 +4*(2*((0*2)))(%rdi), %r12d; addl 4128 +4*(2*((0*2))+1)(%rdi), %r15d; xorl %ecx, %r15d; shrq $32, %rcx; roll $1, %ecx; xorl %r12d, %ecx; shlq $32, %rcx; rorl $1, %r15d; orq %r15, %rcx;;;; + + popq %rdx; + + rorq $32, %r8; rorq $32, %r9; rorq $32, %r10; xorq 4096 +4*0(%rdi), %r8; movq %r8, 4*(0)(%rdx); xorq 4096 +4*0(%rdi), %r9; movq %r9, 4*(4+(0))(%rdx); xorq 4096 +4*0(%rdi), %r10; movq %r10, 4*(8+(0))(%rdx);; rorq $32, %rax; rorq $32, %rbx; rorq $32, %rcx; xorq 4096 +4*2(%rdi), %rax; movq %rax, 4*(2)(%rdx); xorq 4096 +4*2(%rdi), %rbx; movq %rbx, 4*(4+(2))(%rdx); xorq 4096 +4*2(%rdi), %rcx; movq %rcx, 4*(8+(2))(%rdx);;; + + popq %rbx; + popq %rbp; + popq %r12; + popq %r13; + popq %r14; + popq %r15; + + .ifdef WINABI + popq %rdi + popq %rsi + .endif + ret; + diff --git a/src/Crypto/Twofish_x86.S b/src/Crypto/Twofish_x86.S new file mode 100644 index 00000000..e69de29b diff --git a/src/Volume/Cipher.cpp b/src/Volume/Cipher.cpp index 09c821bb..be8cc3eb 100644 --- a/src/Volume/Cipher.cpp +++ b/src/Volume/Cipher.cpp @@ -296,6 +296,39 @@ namespace VeraCrypt twofish_set_key ((TwofishInstance *) ScheduledKey.Ptr(), (unsigned int *) key); } + void CipherTwofish::EncryptBlocks (byte *data, size_t blockCount) const + { + if (!Initialized) + throw NotInitialized (SRC_POS); + +#if CRYPTOPP_BOOL_X64 + twofish_encrypt_blocks ( (TwofishInstance *) ScheduledKey.Ptr(), data, data, blockCount); +#else + Cipher::EncryptBlocks (data, blockCount); +#endif + } + + void CipherTwofish::DecryptBlocks (byte *data, size_t blockCount) const + { + if (!Initialized) + throw NotInitialized (SRC_POS); + +#if CRYPTOPP_BOOL_X64 + twofish_decrypt_blocks ( (TwofishInstance *) ScheduledKey.Ptr(), data, data, blockCount); +#else + Cipher::DecryptBlocks (data, blockCount); +#endif + } + + bool CipherTwofish::IsHwSupportAvailable () const + { +#if CRYPTOPP_BOOL_X64 + return true; +#else + return false; +#endif + } + // Camellia void CipherCamellia::Decrypt (byte *data) const { diff --git a/src/Volume/Cipher.h b/src/Volume/Cipher.h index 7aefbfc6..5ebdde19 100644 --- a/src/Volume/Cipher.h +++ b/src/Volume/Cipher.h @@ -14,6 +14,7 @@ #define TC_HEADER_Encryption_Ciphers #include "Platform/Platform.h" +#include "Crypto/config.h" namespace VeraCrypt @@ -101,11 +102,11 @@ namespace VeraCrypt TC_CIPHER (AES, 16, 32); TC_CIPHER (Serpent, 16, 32); + TC_CIPHER (Twofish, 16, 32); #undef TC_CIPHER_ADD_METHODS #define TC_CIPHER_ADD_METHODS - TC_CIPHER (Twofish, 16, 32); TC_CIPHER (Camellia, 16, 32); TC_CIPHER (Gost89, 16, 32); TC_CIPHER (Gost89StaticSBOX, 16, 32); diff --git a/src/Volume/Volume.make b/src/Volume/Volume.make index fa3a4207..f6a8cec8 100644 --- a/src/Volume/Volume.make +++ b/src/Volume/Volume.make @@ -33,12 +33,14 @@ ifeq "$(PLATFORM)" "MacOSX" OBJSEX += ../Crypto/Aes_asm.oo OBJS += ../Crypto/Aes_hw_cpu.o OBJS += ../Crypto/Aescrypt.o + OBJSEX += ../Crypto/Twofish_asm.oo else ifeq "$(CPU_ARCH)" "x86" OBJS += ../Crypto/Aes_x86.o OBJS += ../Crypto/Aes_hw_cpu.o else ifeq "$(CPU_ARCH)" "x64" OBJS += ../Crypto/Aes_x64.o OBJS += ../Crypto/Aes_hw_cpu.o + OBJS += ../Crypto/Twofish_x64.o else OBJS += ../Crypto/Aescrypt.o endif @@ -72,6 +74,9 @@ ifeq "$(PLATFORM)" "MacOSX" $(AS) $(ASFLAGS) -f macho64 -o ../Crypto/Aes_x64.o ../Crypto/Aes_x64.asm lipo -create ../Crypto/Aes_x86.o ../Crypto/Aes_x64.o -output ../Crypto/Aes_asm.oo rm -fr ../Crypto/Aes_x86.o ../Crypto/Aes_x64.o +../Crypto/Twofish_asm.oo: ../Crypto/Twofish_x64.S + @echo Assembling $(