VeraCrypt
aboutsummaryrefslogtreecommitdiff
path: root/src/Crypto/Aes_hw_cpu.asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/Crypto/Aes_hw_cpu.asm')
-rw-r--r--src/Crypto/Aes_hw_cpu.asm330
1 files changed, 330 insertions, 0 deletions
diff --git a/src/Crypto/Aes_hw_cpu.asm b/src/Crypto/Aes_hw_cpu.asm
new file mode 100644
index 00000000..64c3bad8
--- /dev/null
+++ b/src/Crypto/Aes_hw_cpu.asm
@@ -0,0 +1,330 @@
+;
+; Copyright (c) 2010 TrueCrypt Developers Association. All rights reserved.
+;
+; Governed by the TrueCrypt License 3.0 the full text of which is contained in
+; the file License.txt included in TrueCrypt binary and source code distribution
+; packages.
+;
+
+
+%ifidn __BITS__, 16
+ %define R e
+%elifidn __BITS__, 32
+ %define R e
+%elifidn __BITS__, 64
+ %define R r
+%endif
+
+
+%macro export_function 1-2 0
+
+ %ifdef MS_STDCALL
+ global %1@%2
+ export _%1@%2
+ %1@%2:
+ %elifidn __BITS__, 16
+ global _%1
+ _%1:
+ %else
+ global %1
+ %1:
+ %endif
+
+%endmacro
+
+
+%macro aes_function_entry 1
+
+ ; void (const byte *ks, byte *data);
+
+ export_function %1, 8
+
+ %ifidn __BITS__, 32
+ mov ecx, [esp + 4 + 4 * 0]
+ mov edx, [esp + 4 + 4 * 1]
+ %elifidn __BITS__, 64
+ %ifnidn __OUTPUT_FORMAT__, win64
+ mov rcx, rdi
+ mov rdx, rsi
+ %endif
+ %endif
+
+ ; ecx/rcx = ks
+ ; edx/rdx = data
+
+%endmacro
+
+
+%macro aes_function_exit 0
+
+ ; void (const byte *, byte *);
+
+ %ifdef MS_STDCALL
+ ret 8
+ %else
+ ret
+ %endif
+
+%endmacro
+
+
+%macro push_xmm 2
+ sub rsp, 16 * (%2 - %1 + 1)
+
+ %assign stackoffset 0
+ %assign regnumber %1
+
+ %rep (%2 - %1 + 1)
+ movdqu [rsp + 16 * stackoffset], xmm%[regnumber]
+
+ %assign stackoffset stackoffset+1
+ %assign regnumber regnumber+1
+ %endrep
+%endmacro
+
+
+%macro pop_xmm 2
+ %assign stackoffset 0
+ %assign regnumber %1
+
+ %rep (%2 - %1 + 1)
+ movdqu xmm%[regnumber], [rsp + 16 * stackoffset]
+
+ %assign stackoffset stackoffset+1
+ %assign regnumber regnumber+1
+ %endrep
+
+ add rsp, 16 * (%2 - %1 + 1)
+%endmacro
+
+
+%macro aes_hw_cpu 2
+ %define OPERATION %1
+ %define BLOCK_COUNT %2
+
+ ; Load data blocks
+ %assign block 1
+ %rep BLOCK_COUNT
+ movdqu xmm%[block], [%[R]dx + 16 * (block - 1)]
+ %assign block block+1
+ %endrep
+
+ ; Encrypt/decrypt data blocks
+ %assign round 0
+ %rep 15
+ movdqu xmm0, [%[R]cx + 16 * round]
+
+ %assign block 1
+ %rep BLOCK_COUNT
+
+ %if round = 0
+ pxor xmm%[block], xmm0
+ %else
+ %if round < 14
+ aes%[OPERATION] xmm%[block], xmm0
+ %else
+ aes%[OPERATION]last xmm%[block], xmm0
+ %endif
+ %endif
+
+ %assign block block+1
+ %endrep
+
+ %assign round round+1
+ %endrep
+
+ ; Store data blocks
+ %assign block 1
+ %rep BLOCK_COUNT
+ movdqu [%[R]dx + 16 * (block - 1)], xmm%[block]
+ %assign block block+1
+ %endrep
+
+ %undef OPERATION
+ %undef BLOCK_COUNT
+%endmacro
+
+
+%macro aes_hw_cpu_32_blocks 1
+ %define OPERATION_32_BLOCKS %1
+
+ %ifidn __BITS__, 64
+ %define MAX_REG_BLOCK_COUNT 15
+ %else
+ %define MAX_REG_BLOCK_COUNT 7
+ %endif
+
+ %ifidn __OUTPUT_FORMAT__, win64
+ %if MAX_REG_BLOCK_COUNT > 5
+ push_xmm 6, MAX_REG_BLOCK_COUNT
+ %endif
+ %endif
+
+ mov eax, 32 / MAX_REG_BLOCK_COUNT
+ .1:
+ aes_hw_cpu %[OPERATION_32_BLOCKS], MAX_REG_BLOCK_COUNT
+
+ add %[R]dx, 16 * MAX_REG_BLOCK_COUNT
+ dec eax
+ jnz .1
+
+ %if (32 % MAX_REG_BLOCK_COUNT) != 0
+ aes_hw_cpu %[OPERATION_32_BLOCKS], (32 % MAX_REG_BLOCK_COUNT)
+ %endif
+
+ %ifidn __OUTPUT_FORMAT__, win64
+ %if MAX_REG_BLOCK_COUNT > 5
+ pop_xmm 6, MAX_REG_BLOCK_COUNT
+ %endif
+ %endif
+
+ %undef OPERATION_32_BLOCKS
+ %undef MAX_REG_BLOCK_COUNT
+%endmacro
+
+
+%ifidn __BITS__, 16
+
+ USE16
+ SEGMENT _TEXT PUBLIC CLASS=CODE USE16
+ SEGMENT _DATA PUBLIC CLASS=DATA USE16
+ GROUP DGROUP _TEXT _DATA
+ SECTION _TEXT
+
+%else
+
+ SECTION .text
+
+%endif
+
+
+; void aes_hw_cpu_enable_sse ();
+
+ export_function aes_hw_cpu_enable_sse
+ mov %[R]ax, cr4
+ or ax, 1 << 9
+ mov cr4, %[R]ax
+ ret
+
+
+%ifidn __BITS__, 16
+
+
+; byte is_aes_hw_cpu_supported ();
+
+ export_function is_aes_hw_cpu_supported
+ mov eax, 1
+ cpuid
+ mov eax, ecx
+ shr eax, 25
+ and al, 1
+ ret
+
+
+; void aes_hw_cpu_decrypt (const byte *ks, byte *data);
+
+ export_function aes_hw_cpu_decrypt
+ mov ax, -16
+ jmp aes_hw_cpu_encrypt_decrypt
+
+; void aes_hw_cpu_encrypt (const byte *ks, byte *data);
+
+ export_function aes_hw_cpu_encrypt
+ mov ax, 16
+
+ aes_hw_cpu_encrypt_decrypt:
+ push bp
+ mov bp, sp
+ push di
+ push si
+
+ mov si, [bp + 4] ; ks
+ mov di, [bp + 4 + 2] ; data
+
+ movdqu xmm0, [si]
+ movdqu xmm1, [di]
+
+ pxor xmm1, xmm0
+
+ mov cx, 13
+
+ .round1_13:
+ add si, ax
+ movdqu xmm0, [si]
+
+ cmp ax, 0
+ jl .decrypt
+
+ aesenc xmm1, xmm0
+ jmp .2
+ .decrypt:
+ aesdec xmm1, xmm0
+ .2:
+ loop .round1_13
+
+ add si, ax
+ movdqu xmm0, [si]
+
+ cmp ax, 0
+ jl .decrypt_last
+
+ aesenclast xmm1, xmm0
+ jmp .3
+ .decrypt_last:
+ aesdeclast xmm1, xmm0
+ .3:
+ movdqu [di], xmm1
+
+ pop si
+ pop di
+ pop bp
+ ret
+
+
+%else ; __BITS__ != 16
+
+
+; byte is_aes_hw_cpu_supported ();
+
+ export_function is_aes_hw_cpu_supported
+ push %[R]bx
+
+ mov eax, 1
+ cpuid
+ mov eax, ecx
+ shr eax, 25
+ and eax, 1
+
+ pop %[R]bx
+ ret
+
+
+; void aes_hw_cpu_decrypt (const byte *ks, byte *data);
+
+ aes_function_entry aes_hw_cpu_decrypt
+ aes_hw_cpu dec, 1
+ aes_function_exit
+
+
+; void aes_hw_cpu_decrypt_32_blocks (const byte *ks, byte *data);
+
+ aes_function_entry aes_hw_cpu_decrypt_32_blocks
+ aes_hw_cpu_32_blocks dec
+ aes_function_exit
+
+
+; void aes_hw_cpu_encrypt (const byte *ks, byte *data);
+
+ aes_function_entry aes_hw_cpu_encrypt
+ aes_hw_cpu enc, 1
+ aes_function_exit
+
+
+; void aes_hw_cpu_encrypt_32_blocks (const byte *ks, byte *data);
+
+ aes_function_entry aes_hw_cpu_encrypt_32_blocks
+ aes_hw_cpu_32_blocks enc
+ aes_function_exit
+
+
+%endif ; __BITS__ != 16