VeraCrypt
aboutsummaryrefslogtreecommitdiff
path: root/src/Crypto/Aes_x86.asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/Crypto/Aes_x86.asm')
-rw-r--r--src/Crypto/Aes_x86.asm1292
1 files changed, 646 insertions, 646 deletions
diff --git a/src/Crypto/Aes_x86.asm b/src/Crypto/Aes_x86.asm
index 239da3e3..3825deee 100644
--- a/src/Crypto/Aes_x86.asm
+++ b/src/Crypto/Aes_x86.asm
@@ -1,646 +1,646 @@
-
-; ---------------------------------------------------------------------------
-; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
-;
-; LICENSE TERMS
-;
-; The free distribution and use of this software is allowed (with or without
-; changes) provided that:
-;
-; 1. source code distributions include the above copyright notice, this
-; list of conditions and the following disclaimer;
-;
-; 2. binary distributions include the above copyright notice, this list
-; of conditions and the following disclaimer in their documentation;
-;
-; 3. the name of the copyright holder is not used to endorse products
-; built using this software without specific written permission.
-;
-; DISCLAIMER
-;
-; This software is provided 'as is' with no explicit or implied warranties
-; in respect of its properties, including, but not limited to, correctness
-; and/or fitness for purpose.
-; ---------------------------------------------------------------------------
-; Issue 20/12/2007
-;
-; This code requires ASM_X86_V1C to be set in aesopt.h. It requires the C files
-; aeskey.c and aestab.c for support.
-
-;
-; Adapted for TrueCrypt:
-; - Compatibility with NASM and GCC
-;
-
-; An AES implementation for x86 processors using the YASM (or NASM) assembler.
-; This is an assembler implementation that covers encryption and decryption
-; only and is intended as a replacement of the C file aescrypt.c. It hence
-; requires the file aeskey.c for keying and aestab.c for the AES tables. It
-; employs full tables rather than compressed tables.
-
-; This code provides the standard AES block size (128 bits, 16 bytes) and the
-; three standard AES key sizes (128, 192 and 256 bits). It has the same call
-; interface as my C implementation. The ebx, esi, edi and ebp registers are
-; preserved across calls but eax, ecx and edx and the artihmetic status flags
-; are not. It is also important that the defines below match those used in the
-; C code. This code uses the VC++ register saving conentions; if it is used
-; with another compiler, conventions for using and saving registers may need to
-; be checked (and calling conventions). The YASM command line for the VC++
-; custom build step is:
-;
-; yasm -Xvc -f win32 -o "$(TargetDir)\$(InputName).obj" "$(InputPath)"
-;
-; The calling intefaces are:
-;
-; AES_RETURN aes_encrypt(const unsigned char in_blk[],
-; unsigned char out_blk[], const aes_encrypt_ctx cx[1]);
-;
-; AES_RETURN aes_decrypt(const unsigned char in_blk[],
-; unsigned char out_blk[], const aes_decrypt_ctx cx[1]);
-;
-; AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
-; const aes_encrypt_ctx cx[1]);
-;
-; AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
-; const aes_decrypt_ctx cx[1]);
-;
-; AES_RETURN aes_encrypt_key(const unsigned char key[],
-; unsigned int len, const aes_decrypt_ctx cx[1]);
-;
-; AES_RETURN aes_decrypt_key(const unsigned char key[],
-; unsigned int len, const aes_decrypt_ctx cx[1]);
-;
-; where <NNN> is 128, 102 or 256. In the last two calls the length can be in
-; either bits or bytes.
-;
-; Comment in/out the following lines to obtain the desired subroutines. These
-; selections MUST match those in the C header file aes.h
-
-; %define AES_128 ; define if AES with 128 bit keys is needed
-; %define AES_192 ; define if AES with 192 bit keys is needed
-%define AES_256 ; define if AES with 256 bit keys is needed
-; %define AES_VAR ; define if a variable key size is needed
-%define ENCRYPTION ; define if encryption is needed
-%define DECRYPTION ; define if decryption is needed
-%define AES_REV_DKS ; define if key decryption schedule is reversed
-%define LAST_ROUND_TABLES ; define if tables are to be used for last round
-
-; offsets to parameters
-
-in_blk equ 4 ; input byte array address parameter
-out_blk equ 8 ; output byte array address parameter
-ctx equ 12 ; AES context structure
-stk_spc equ 20 ; stack space
-%define parms 12 ; parameter space on stack
-
-; The encryption key schedule has the following in memory layout where N is the
-; number of rounds (10, 12 or 14):
-;
-; lo: | input key (round 0) | ; each round is four 32-bit words
-; | encryption round 1 |
-; | encryption round 2 |
-; ....
-; | encryption round N-1 |
-; hi: | encryption round N |
-;
-; The decryption key schedule is normally set up so that it has the same
-; layout as above by actually reversing the order of the encryption key
-; schedule in memory (this happens when AES_REV_DKS is set):
-;
-; lo: | decryption round 0 | = | encryption round N |
-; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ]
-; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ]
-; .... ....
-; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ]
-; hi: | decryption round N | = | input key (round 0) |
-;
-; with rounds except the first and last modified using inv_mix_column()
-; But if AES_REV_DKS is NOT set the order of keys is left as it is for
-; encryption so that it has to be accessed in reverse when used for
-; decryption (although the inverse mix column modifications are done)
-;
-; lo: | decryption round 0 | = | input key (round 0) |
-; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ]
-; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ]
-; .... ....
-; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
-; hi: | decryption round N | = | encryption round N |
-;
-; This layout is faster when the assembler key scheduling provided here
-; is used.
-;
-; The DLL interface must use the _stdcall convention in which the number
-; of bytes of parameter space is added after an @ to the sutine's name.
-; We must also remove our parameters from the stack before return (see
-; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version.
-
-;%define DLL_EXPORT
-
-; End of user defines
-
-%ifdef AES_VAR
-%ifndef AES_128
-%define AES_128
-%endif
-%ifndef AES_192
-%define AES_192
-%endif
-%ifndef AES_256
-%define AES_256
-%endif
-%endif
-
-%ifdef AES_VAR
-%define KS_LENGTH 60
-%elifdef AES_256
-%define KS_LENGTH 60
-%elifdef AES_192
-%define KS_LENGTH 52
-%else
-%define KS_LENGTH 44
-%endif
-
-; These macros implement stack based local variables
-
-%macro save 2
- mov [esp+4*%1],%2
-%endmacro
-
-%macro restore 2
- mov %1,[esp+4*%2]
-%endmacro
-
-; the DLL has to implement the _stdcall calling interface on return
-; In this case we have to take our parameters (3 4-byte pointers)
-; off the stack
-
-%macro do_name 1-2 parms
-%ifndef DLL_EXPORT
- align 32
- global %1
-%1:
-%else
- align 32
- global %1@%2
- export _%1@%2
-%1@%2:
-%endif
-%endmacro
-
-%macro do_call 1-2 parms
-%ifndef DLL_EXPORT
- call %1
- add esp,%2
-%else
- call %1@%2
-%endif
-%endmacro
-
-%macro do_exit 0-1 parms
-%ifdef DLL_EXPORT
- ret %1
-%else
- ret
-%endif
-%endmacro
-
-%ifdef ENCRYPTION
-
- extern t_fn
-
-%define etab_0(x) [t_fn+4*x]
-%define etab_1(x) [t_fn+1024+4*x]
-%define etab_2(x) [t_fn+2048+4*x]
-%define etab_3(x) [t_fn+3072+4*x]
-
-%ifdef LAST_ROUND_TABLES
-
- extern t_fl
-
-%define eltab_0(x) [t_fl+4*x]
-%define eltab_1(x) [t_fl+1024+4*x]
-%define eltab_2(x) [t_fl+2048+4*x]
-%define eltab_3(x) [t_fl+3072+4*x]
-
-%else
-
-%define etab_b(x) byte [t_fn+3072+4*x]
-
-%endif
-
-; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the
-; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX.
-;
-; Input:
-;
-; EAX column[0]
-; EBX column[1]
-; ECX column[2]
-; EDX column[3]
-; ESI column key[round][2]
-; EDI column key[round][3]
-; EBP scratch
-;
-; Output:
-;
-; EBP column[0] unkeyed
-; EBX column[1] unkeyed
-; ESI column[2] keyed
-; EDI column[3] keyed
-; EAX scratch
-; ECX scratch
-; EDX scratch
-
-%macro rnd_fun 2
-
- rol ebx,16
- %1 esi, cl, 0, ebp
- %1 esi, dh, 1, ebp
- %1 esi, bh, 3, ebp
- %1 edi, dl, 0, ebp
- %1 edi, ah, 1, ebp
- %1 edi, bl, 2, ebp
- %2 ebp, al, 0, ebp
- shr ebx,16
- and eax,0xffff0000
- or eax,ebx
- shr edx,16
- %1 ebp, ah, 1, ebx
- %1 ebp, dh, 3, ebx
- %2 ebx, dl, 2, ebx
- %1 ebx, ch, 1, edx
- %1 ebx, al, 0, edx
- shr eax,16
- shr ecx,16
- %1 ebp, cl, 2, edx
- %1 edi, ch, 3, edx
- %1 esi, al, 2, edx
- %1 ebx, ah, 3, edx
-
-%endmacro
-
-; Basic MOV and XOR Operations for normal rounds
-
-%macro nr_xor 4
- movzx %4,%2
- xor %1,etab_%3(%4)
-%endmacro
-
-%macro nr_mov 4
- movzx %4,%2
- mov %1,etab_%3(%4)
-%endmacro
-
-; Basic MOV and XOR Operations for last round
-
-%ifdef LAST_ROUND_TABLES
-
- %macro lr_xor 4
- movzx %4,%2
- xor %1,eltab_%3(%4)
- %endmacro
-
- %macro lr_mov 4
- movzx %4,%2
- mov %1,eltab_%3(%4)
- %endmacro
-
-%else
-
- %macro lr_xor 4
- movzx %4,%2
- movzx %4,etab_b(%4)
- %if %3 != 0
- shl %4,8*%3
- %endif
- xor %1,%4
- %endmacro
-
- %macro lr_mov 4
- movzx %4,%2
- movzx %1,etab_b(%4)
- %if %3 != 0
- shl %1,8*%3
- %endif
- %endmacro
-
-%endif
-
-%macro enc_round 0
-
- add ebp,16
- save 0,ebp
- mov esi,[ebp+8]
- mov edi,[ebp+12]
-
- rnd_fun nr_xor, nr_mov
-
- mov eax,ebp
- mov ecx,esi
- mov edx,edi
- restore ebp,0
- xor eax,[ebp]
- xor ebx,[ebp+4]
-
-%endmacro
-
-%macro enc_last_round 0
-
- add ebp,16
- save 0,ebp
- mov esi,[ebp+8]
- mov edi,[ebp+12]
-
- rnd_fun lr_xor, lr_mov
-
- mov eax,ebp
- restore ebp,0
- xor eax,[ebp]
- xor ebx,[ebp+4]
-
-%endmacro
-
- section .text align=32
-
-; AES Encryption Subroutine
-
- do_name aes_encrypt
-
- sub esp,stk_spc
- mov [esp+16],ebp
- mov [esp+12],ebx
- mov [esp+ 8],esi
- mov [esp+ 4],edi
-
- mov esi,[esp+in_blk+stk_spc] ; input pointer
- mov eax,[esi ]
- mov ebx,[esi+ 4]
- mov ecx,[esi+ 8]
- mov edx,[esi+12]
-
- mov ebp,[esp+ctx+stk_spc] ; key pointer
- movzx edi,byte [ebp+4*KS_LENGTH]
- xor eax,[ebp ]
- xor ebx,[ebp+ 4]
- xor ecx,[ebp+ 8]
- xor edx,[ebp+12]
-
-; determine the number of rounds
-
- cmp edi,10*16
- je .3
- cmp edi,12*16
- je .2
- cmp edi,14*16
- je .1
- mov eax,-1
- jmp .5
-
-.1: enc_round
- enc_round
-.2: enc_round
- enc_round
-.3: enc_round
- enc_round
- enc_round
- enc_round
- enc_round
- enc_round
- enc_round
- enc_round
- enc_round
- enc_last_round
-
- mov edx,[esp+out_blk+stk_spc]
- mov [edx],eax
- mov [edx+4],ebx
- mov [edx+8],esi
- mov [edx+12],edi
- xor eax,eax
-
-.5: mov ebp,[esp+16]
- mov ebx,[esp+12]
- mov esi,[esp+ 8]
- mov edi,[esp+ 4]
- add esp,stk_spc
- do_exit
-
-%endif
-
-%ifdef DECRYPTION
-
- extern t_in
-
-%define dtab_0(x) [t_in+4*x]
-%define dtab_1(x) [t_in+1024+4*x]
-%define dtab_2(x) [t_in+2048+4*x]
-%define dtab_3(x) [t_in+3072+4*x]
-
-%ifdef LAST_ROUND_TABLES
-
- extern t_il
-
-%define dltab_0(x) [t_il+4*x]
-%define dltab_1(x) [t_il+1024+4*x]
-%define dltab_2(x) [t_il+2048+4*x]
-%define dltab_3(x) [t_il+3072+4*x]
-
-%else
-
- extern _t_ibox
-
-%define dtab_x(x) byte [_t_ibox+x]
-
-%endif
-
-%macro irn_fun 2
-
- rol eax,16
- %1 esi, cl, 0, ebp
- %1 esi, bh, 1, ebp
- %1 esi, al, 2, ebp
- %1 edi, dl, 0, ebp
- %1 edi, ch, 1, ebp
- %1 edi, ah, 3, ebp
- %2 ebp, bl, 0, ebp
- shr eax,16
- and ebx,0xffff0000
- or ebx,eax
- shr ecx,16
- %1 ebp, bh, 1, eax
- %1 ebp, ch, 3, eax
- %2 eax, cl, 2, ecx
- %1 eax, bl, 0, ecx
- %1 eax, dh, 1, ecx
- shr ebx,16
- shr edx,16
- %1 esi, dh, 3, ecx
- %1 ebp, dl, 2, ecx
- %1 eax, bh, 3, ecx
- %1 edi, bl, 2, ecx
-
-%endmacro
-
-; Basic MOV and XOR Operations for normal rounds
-
-%macro ni_xor 4
- movzx %4,%2
- xor %1,dtab_%3(%4)
-%endmacro
-
-%macro ni_mov 4
- movzx %4,%2
- mov %1,dtab_%3(%4)
-%endmacro
-
-; Basic MOV and XOR Operations for last round
-
-%ifdef LAST_ROUND_TABLES
-
-%macro li_xor 4
- movzx %4,%2
- xor %1,dltab_%3(%4)
-%endmacro
-
-%macro li_mov 4
- movzx %4,%2
- mov %1,dltab_%3(%4)
-%endmacro
-
-%else
-
- %macro li_xor 4
- movzx %4,%2
- movzx %4,dtab_x(%4)
- %if %3 != 0
- shl %4,8*%3
- %endif
- xor %1,%4
- %endmacro
-
- %macro li_mov 4
- movzx %4,%2
- movzx %1,dtab_x(%4)
- %if %3 != 0
- shl %1,8*%3
- %endif
- %endmacro
-
-%endif
-
-%macro dec_round 0
-
-%ifdef AES_REV_DKS
- add ebp,16
-%else
- sub ebp,16
-%endif
- save 0,ebp
- mov esi,[ebp+8]
- mov edi,[ebp+12]
-
- irn_fun ni_xor, ni_mov
-
- mov ebx,ebp
- mov ecx,esi
- mov edx,edi
- restore ebp,0
- xor eax,[ebp]
- xor ebx,[ebp+4]
-
-%endmacro
-
-%macro dec_last_round 0
-
-%ifdef AES_REV_DKS
- add ebp,16
-%else
- sub ebp,16
-%endif
- save 0,ebp
- mov esi,[ebp+8]
- mov edi,[ebp+12]
-
- irn_fun li_xor, li_mov
-
- mov ebx,ebp
- restore ebp,0
- xor eax,[ebp]
- xor ebx,[ebp+4]
-
-%endmacro
-
- section .text
-
-; AES Decryption Subroutine
-
- do_name aes_decrypt
-
- sub esp,stk_spc
- mov [esp+16],ebp
- mov [esp+12],ebx
- mov [esp+ 8],esi
- mov [esp+ 4],edi
-
-; input four columns and xor in first round key
-
- mov esi,[esp+in_blk+stk_spc] ; input pointer
- mov eax,[esi ]
- mov ebx,[esi+ 4]
- mov ecx,[esi+ 8]
- mov edx,[esi+12]
- lea esi,[esi+16]
-
- mov ebp,[esp+ctx+stk_spc] ; key pointer
- movzx edi,byte[ebp+4*KS_LENGTH]
-%ifndef AES_REV_DKS ; if decryption key schedule is not reversed
- lea ebp,[ebp+edi] ; we have to access it from the top down
-%endif
- xor eax,[ebp ] ; key schedule
- xor ebx,[ebp+ 4]
- xor ecx,[ebp+ 8]
- xor edx,[ebp+12]
-
-; determine the number of rounds
-
- cmp edi,10*16
- je .3
- cmp edi,12*16
- je .2
- cmp edi,14*16
- je .1
- mov eax,-1
- jmp .5
-
-.1: dec_round
- dec_round
-.2: dec_round
- dec_round
-.3: dec_round
- dec_round
- dec_round
- dec_round
- dec_round
- dec_round
- dec_round
- dec_round
- dec_round
- dec_last_round
-
-; move final values to the output array.
-
- mov ebp,[esp+out_blk+stk_spc]
- mov [ebp],eax
- mov [ebp+4],ebx
- mov [ebp+8],esi
- mov [ebp+12],edi
- xor eax,eax
-
-.5: mov ebp,[esp+16]
- mov ebx,[esp+12]
- mov esi,[esp+ 8]
- mov edi,[esp+ 4]
- add esp,stk_spc
- do_exit
-
-%endif
+
+; ---------------------------------------------------------------------------
+; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
+;
+; LICENSE TERMS
+;
+; The free distribution and use of this software is allowed (with or without
+; changes) provided that:
+;
+; 1. source code distributions include the above copyright notice, this
+; list of conditions and the following disclaimer;
+;
+; 2. binary distributions include the above copyright notice, this list
+; of conditions and the following disclaimer in their documentation;
+;
+; 3. the name of the copyright holder is not used to endorse products
+; built using this software without specific written permission.
+;
+; DISCLAIMER
+;
+; This software is provided 'as is' with no explicit or implied warranties
+; in respect of its properties, including, but not limited to, correctness
+; and/or fitness for purpose.
+; ---------------------------------------------------------------------------
+; Issue 20/12/2007
+;
+; This code requires ASM_X86_V1C to be set in aesopt.h. It requires the C files
+; aeskey.c and aestab.c for support.
+
+;
+; Adapted for TrueCrypt:
+; - Compatibility with NASM and GCC
+;
+
+; An AES implementation for x86 processors using the YASM (or NASM) assembler.
+; This is an assembler implementation that covers encryption and decryption
+; only and is intended as a replacement of the C file aescrypt.c. It hence
+; requires the file aeskey.c for keying and aestab.c for the AES tables. It
+; employs full tables rather than compressed tables.
+
+; This code provides the standard AES block size (128 bits, 16 bytes) and the
+; three standard AES key sizes (128, 192 and 256 bits). It has the same call
+; interface as my C implementation. The ebx, esi, edi and ebp registers are
+; preserved across calls but eax, ecx and edx and the artihmetic status flags
+; are not. It is also important that the defines below match those used in the
+; C code. This code uses the VC++ register saving conentions; if it is used
+; with another compiler, conventions for using and saving registers may need to
+; be checked (and calling conventions). The YASM command line for the VC++
+; custom build step is:
+;
+; yasm -Xvc -f win32 -o "$(TargetDir)\$(InputName).obj" "$(InputPath)"
+;
+; The calling intefaces are:
+;
+; AES_RETURN aes_encrypt(const unsigned char in_blk[],
+; unsigned char out_blk[], const aes_encrypt_ctx cx[1]);
+;
+; AES_RETURN aes_decrypt(const unsigned char in_blk[],
+; unsigned char out_blk[], const aes_decrypt_ctx cx[1]);
+;
+; AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
+; const aes_encrypt_ctx cx[1]);
+;
+; AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
+; const aes_decrypt_ctx cx[1]);
+;
+; AES_RETURN aes_encrypt_key(const unsigned char key[],
+; unsigned int len, const aes_decrypt_ctx cx[1]);
+;
+; AES_RETURN aes_decrypt_key(const unsigned char key[],
+; unsigned int len, const aes_decrypt_ctx cx[1]);
+;
+; where <NNN> is 128, 102 or 256. In the last two calls the length can be in
+; either bits or bytes.
+;
+; Comment in/out the following lines to obtain the desired subroutines. These
+; selections MUST match those in the C header file aes.h
+
+; %define AES_128 ; define if AES with 128 bit keys is needed
+; %define AES_192 ; define if AES with 192 bit keys is needed
+%define AES_256 ; define if AES with 256 bit keys is needed
+; %define AES_VAR ; define if a variable key size is needed
+%define ENCRYPTION ; define if encryption is needed
+%define DECRYPTION ; define if decryption is needed
+%define AES_REV_DKS ; define if key decryption schedule is reversed
+%define LAST_ROUND_TABLES ; define if tables are to be used for last round
+
+; offsets to parameters
+
+in_blk equ 4 ; input byte array address parameter
+out_blk equ 8 ; output byte array address parameter
+ctx equ 12 ; AES context structure
+stk_spc equ 20 ; stack space
+%define parms 12 ; parameter space on stack
+
+; The encryption key schedule has the following in memory layout where N is the
+; number of rounds (10, 12 or 14):
+;
+; lo: | input key (round 0) | ; each round is four 32-bit words
+; | encryption round 1 |
+; | encryption round 2 |
+; ....
+; | encryption round N-1 |
+; hi: | encryption round N |
+;
+; The decryption key schedule is normally set up so that it has the same
+; layout as above by actually reversing the order of the encryption key
+; schedule in memory (this happens when AES_REV_DKS is set):
+;
+; lo: | decryption round 0 | = | encryption round N |
+; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ]
+; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ]
+; .... ....
+; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ]
+; hi: | decryption round N | = | input key (round 0) |
+;
+; with rounds except the first and last modified using inv_mix_column()
+; But if AES_REV_DKS is NOT set the order of keys is left as it is for
+; encryption so that it has to be accessed in reverse when used for
+; decryption (although the inverse mix column modifications are done)
+;
+; lo: | decryption round 0 | = | input key (round 0) |
+; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ]
+; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ]
+; .... ....
+; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
+; hi: | decryption round N | = | encryption round N |
+;
+; This layout is faster when the assembler key scheduling provided here
+; is used.
+;
+; The DLL interface must use the _stdcall convention in which the number
+; of bytes of parameter space is added after an @ to the sutine's name.
+; We must also remove our parameters from the stack before return (see
+; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version.
+
+;%define DLL_EXPORT
+
+; End of user defines
+
+%ifdef AES_VAR
+%ifndef AES_128
+%define AES_128
+%endif
+%ifndef AES_192
+%define AES_192
+%endif
+%ifndef AES_256
+%define AES_256
+%endif
+%endif
+
+%ifdef AES_VAR
+%define KS_LENGTH 60
+%elifdef AES_256
+%define KS_LENGTH 60
+%elifdef AES_192
+%define KS_LENGTH 52
+%else
+%define KS_LENGTH 44
+%endif
+
+; These macros implement stack based local variables
+
+%macro save 2
+ mov [esp+4*%1],%2
+%endmacro
+
+%macro restore 2
+ mov %1,[esp+4*%2]
+%endmacro
+
+; the DLL has to implement the _stdcall calling interface on return
+; In this case we have to take our parameters (3 4-byte pointers)
+; off the stack
+
+%macro do_name 1-2 parms
+%ifndef DLL_EXPORT
+ align 32
+ global %1
+%1:
+%else
+ align 32
+ global %1@%2
+ export _%1@%2
+%1@%2:
+%endif
+%endmacro
+
+%macro do_call 1-2 parms
+%ifndef DLL_EXPORT
+ call %1
+ add esp,%2
+%else
+ call %1@%2
+%endif
+%endmacro
+
+%macro do_exit 0-1 parms
+%ifdef DLL_EXPORT
+ ret %1
+%else
+ ret
+%endif
+%endmacro
+
+%ifdef ENCRYPTION
+
+ extern t_fn
+
+%define etab_0(x) [t_fn+4*x]
+%define etab_1(x) [t_fn+1024+4*x]
+%define etab_2(x) [t_fn+2048+4*x]
+%define etab_3(x) [t_fn+3072+4*x]
+
+%ifdef LAST_ROUND_TABLES
+
+ extern t_fl
+
+%define eltab_0(x) [t_fl+4*x]
+%define eltab_1(x) [t_fl+1024+4*x]
+%define eltab_2(x) [t_fl+2048+4*x]
+%define eltab_3(x) [t_fl+3072+4*x]
+
+%else
+
+%define etab_b(x) byte [t_fn+3072+4*x]
+
+%endif
+
+; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the
+; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX.
+;
+; Input:
+;
+; EAX column[0]
+; EBX column[1]
+; ECX column[2]
+; EDX column[3]
+; ESI column key[round][2]
+; EDI column key[round][3]
+; EBP scratch
+;
+; Output:
+;
+; EBP column[0] unkeyed
+; EBX column[1] unkeyed
+; ESI column[2] keyed
+; EDI column[3] keyed
+; EAX scratch
+; ECX scratch
+; EDX scratch
+
+%macro rnd_fun 2
+
+ rol ebx,16
+ %1 esi, cl, 0, ebp
+ %1 esi, dh, 1, ebp
+ %1 esi, bh, 3, ebp
+ %1 edi, dl, 0, ebp
+ %1 edi, ah, 1, ebp
+ %1 edi, bl, 2, ebp
+ %2 ebp, al, 0, ebp
+ shr ebx,16
+ and eax,0xffff0000
+ or eax,ebx
+ shr edx,16
+ %1 ebp, ah, 1, ebx
+ %1 ebp, dh, 3, ebx
+ %2 ebx, dl, 2, ebx
+ %1 ebx, ch, 1, edx
+ %1 ebx, al, 0, edx
+ shr eax,16
+ shr ecx,16
+ %1 ebp, cl, 2, edx
+ %1 edi, ch, 3, edx
+ %1 esi, al, 2, edx
+ %1 ebx, ah, 3, edx
+
+%endmacro
+
+; Basic MOV and XOR Operations for normal rounds
+
+%macro nr_xor 4
+ movzx %4,%2
+ xor %1,etab_%3(%4)
+%endmacro
+
+%macro nr_mov 4
+ movzx %4,%2
+ mov %1,etab_%3(%4)
+%endmacro
+
+; Basic MOV and XOR Operations for last round
+
+%ifdef LAST_ROUND_TABLES
+
+ %macro lr_xor 4
+ movzx %4,%2
+ xor %1,eltab_%3(%4)
+ %endmacro
+
+ %macro lr_mov 4
+ movzx %4,%2
+ mov %1,eltab_%3(%4)
+ %endmacro
+
+%else
+
+ %macro lr_xor 4
+ movzx %4,%2
+ movzx %4,etab_b(%4)
+ %if %3 != 0
+ shl %4,8*%3
+ %endif
+ xor %1,%4
+ %endmacro
+
+ %macro lr_mov 4
+ movzx %4,%2
+ movzx %1,etab_b(%4)
+ %if %3 != 0
+ shl %1,8*%3
+ %endif
+ %endmacro
+
+%endif
+
+%macro enc_round 0
+
+ add ebp,16
+ save 0,ebp
+ mov esi,[ebp+8]
+ mov edi,[ebp+12]
+
+ rnd_fun nr_xor, nr_mov
+
+ mov eax,ebp
+ mov ecx,esi
+ mov edx,edi
+ restore ebp,0
+ xor eax,[ebp]
+ xor ebx,[ebp+4]
+
+%endmacro
+
+%macro enc_last_round 0
+
+ add ebp,16
+ save 0,ebp
+ mov esi,[ebp+8]
+ mov edi,[ebp+12]
+
+ rnd_fun lr_xor, lr_mov
+
+ mov eax,ebp
+ restore ebp,0
+ xor eax,[ebp]
+ xor ebx,[ebp+4]
+
+%endmacro
+
+ section .text align=32
+
+; AES Encryption Subroutine
+
+ do_name aes_encrypt
+
+ sub esp,stk_spc
+ mov [esp+16],ebp
+ mov [esp+12],ebx
+ mov [esp+ 8],esi
+ mov [esp+ 4],edi
+
+ mov esi,[esp+in_blk+stk_spc] ; input pointer
+ mov eax,[esi ]
+ mov ebx,[esi+ 4]
+ mov ecx,[esi+ 8]
+ mov edx,[esi+12]
+
+ mov ebp,[esp+ctx+stk_spc] ; key pointer
+ movzx edi,byte [ebp+4*KS_LENGTH]
+ xor eax,[ebp ]
+ xor ebx,[ebp+ 4]
+ xor ecx,[ebp+ 8]
+ xor edx,[ebp+12]
+
+; determine the number of rounds
+
+ cmp edi,10*16
+ je .3
+ cmp edi,12*16
+ je .2
+ cmp edi,14*16
+ je .1
+ mov eax,-1
+ jmp .5
+
+.1: enc_round
+ enc_round
+.2: enc_round
+ enc_round
+.3: enc_round
+ enc_round
+ enc_round
+ enc_round
+ enc_round
+ enc_round
+ enc_round
+ enc_round
+ enc_round
+ enc_last_round
+
+ mov edx,[esp+out_blk+stk_spc]
+ mov [edx],eax
+ mov [edx+4],ebx
+ mov [edx+8],esi
+ mov [edx+12],edi
+ xor eax,eax
+
+.5: mov ebp,[esp+16]
+ mov ebx,[esp+12]
+ mov esi,[esp+ 8]
+ mov edi,[esp+ 4]
+ add esp,stk_spc
+ do_exit
+
+%endif
+
+%ifdef DECRYPTION
+
+ extern t_in
+
+%define dtab_0(x) [t_in+4*x]
+%define dtab_1(x) [t_in+1024+4*x]
+%define dtab_2(x) [t_in+2048+4*x]
+%define dtab_3(x) [t_in+3072+4*x]
+
+%ifdef LAST_ROUND_TABLES
+
+ extern t_il
+
+%define dltab_0(x) [t_il+4*x]
+%define dltab_1(x) [t_il+1024+4*x]
+%define dltab_2(x) [t_il+2048+4*x]
+%define dltab_3(x) [t_il+3072+4*x]
+
+%else
+
+ extern _t_ibox
+
+%define dtab_x(x) byte [_t_ibox+x]
+
+%endif
+
+%macro irn_fun 2
+
+ rol eax,16
+ %1 esi, cl, 0, ebp
+ %1 esi, bh, 1, ebp
+ %1 esi, al, 2, ebp
+ %1 edi, dl, 0, ebp
+ %1 edi, ch, 1, ebp
+ %1 edi, ah, 3, ebp
+ %2 ebp, bl, 0, ebp
+ shr eax,16
+ and ebx,0xffff0000
+ or ebx,eax
+ shr ecx,16
+ %1 ebp, bh, 1, eax
+ %1 ebp, ch, 3, eax
+ %2 eax, cl, 2, ecx
+ %1 eax, bl, 0, ecx
+ %1 eax, dh, 1, ecx
+ shr ebx,16
+ shr edx,16
+ %1 esi, dh, 3, ecx
+ %1 ebp, dl, 2, ecx
+ %1 eax, bh, 3, ecx
+ %1 edi, bl, 2, ecx
+
+%endmacro
+
+; Basic MOV and XOR Operations for normal rounds
+
+%macro ni_xor 4
+ movzx %4,%2
+ xor %1,dtab_%3(%4)
+%endmacro
+
+%macro ni_mov 4
+ movzx %4,%2
+ mov %1,dtab_%3(%4)
+%endmacro
+
+; Basic MOV and XOR Operations for last round
+
+%ifdef LAST_ROUND_TABLES
+
+%macro li_xor 4
+ movzx %4,%2
+ xor %1,dltab_%3(%4)
+%endmacro
+
+%macro li_mov 4
+ movzx %4,%2
+ mov %1,dltab_%3(%4)
+%endmacro
+
+%else
+
+ %macro li_xor 4
+ movzx %4,%2
+ movzx %4,dtab_x(%4)
+ %if %3 != 0
+ shl %4,8*%3
+ %endif
+ xor %1,%4
+ %endmacro
+
+ %macro li_mov 4
+ movzx %4,%2
+ movzx %1,dtab_x(%4)
+ %if %3 != 0
+ shl %1,8*%3
+ %endif
+ %endmacro
+
+%endif
+
+%macro dec_round 0
+
+%ifdef AES_REV_DKS
+ add ebp,16
+%else
+ sub ebp,16
+%endif
+ save 0,ebp
+ mov esi,[ebp+8]
+ mov edi,[ebp+12]
+
+ irn_fun ni_xor, ni_mov
+
+ mov ebx,ebp
+ mov ecx,esi
+ mov edx,edi
+ restore ebp,0
+ xor eax,[ebp]
+ xor ebx,[ebp+4]
+
+%endmacro
+
+%macro dec_last_round 0
+
+%ifdef AES_REV_DKS
+ add ebp,16
+%else
+ sub ebp,16
+%endif
+ save 0,ebp
+ mov esi,[ebp+8]
+ mov edi,[ebp+12]
+
+ irn_fun li_xor, li_mov
+
+ mov ebx,ebp
+ restore ebp,0
+ xor eax,[ebp]
+ xor ebx,[ebp+4]
+
+%endmacro
+
+ section .text
+
+; AES Decryption Subroutine
+
+ do_name aes_decrypt
+
+ sub esp,stk_spc
+ mov [esp+16],ebp
+ mov [esp+12],ebx
+ mov [esp+ 8],esi
+ mov [esp+ 4],edi
+
+; input four columns and xor in first round key
+
+ mov esi,[esp+in_blk+stk_spc] ; input pointer
+ mov eax,[esi ]
+ mov ebx,[esi+ 4]
+ mov ecx,[esi+ 8]
+ mov edx,[esi+12]
+ lea esi,[esi+16]
+
+ mov ebp,[esp+ctx+stk_spc] ; key pointer
+ movzx edi,byte[ebp+4*KS_LENGTH]
+%ifndef AES_REV_DKS ; if decryption key schedule is not reversed
+ lea ebp,[ebp+edi] ; we have to access it from the top down
+%endif
+ xor eax,[ebp ] ; key schedule
+ xor ebx,[ebp+ 4]
+ xor ecx,[ebp+ 8]
+ xor edx,[ebp+12]
+
+; determine the number of rounds
+
+ cmp edi,10*16
+ je .3
+ cmp edi,12*16
+ je .2
+ cmp edi,14*16
+ je .1
+ mov eax,-1
+ jmp .5
+
+.1: dec_round
+ dec_round
+.2: dec_round
+ dec_round
+.3: dec_round
+ dec_round
+ dec_round
+ dec_round
+ dec_round
+ dec_round
+ dec_round
+ dec_round
+ dec_round
+ dec_last_round
+
+; move final values to the output array.
+
+ mov ebp,[esp+out_blk+stk_spc]
+ mov [ebp],eax
+ mov [ebp+4],ebx
+ mov [ebp+8],esi
+ mov [ebp+12],edi
+ xor eax,eax
+
+.5: mov ebp,[esp+16]
+ mov ebx,[esp+12]
+ mov esi,[esp+ 8]
+ mov edi,[esp+ 4]
+ add esp,stk_spc
+ do_exit
+
+%endif