From c606f0866c3a2a5db3ef9bc41738ef33eb9612a9 Mon Sep 17 00:00:00 2001 From: Mounir IDRASSI Date: Sat, 22 Jun 2013 16:16:13 +0200 Subject: Add original TrueCrypt 7.1a sources --- src/Crypto/Aes_hw_cpu.asm | 330 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 330 insertions(+) create mode 100644 src/Crypto/Aes_hw_cpu.asm (limited to 'src/Crypto/Aes_hw_cpu.asm') diff --git a/src/Crypto/Aes_hw_cpu.asm b/src/Crypto/Aes_hw_cpu.asm new file mode 100644 index 00000000..64c3bad8 --- /dev/null +++ b/src/Crypto/Aes_hw_cpu.asm @@ -0,0 +1,330 @@ +; +; Copyright (c) 2010 TrueCrypt Developers Association. All rights reserved. +; +; Governed by the TrueCrypt License 3.0 the full text of which is contained in +; the file License.txt included in TrueCrypt binary and source code distribution +; packages. +; + + +%ifidn __BITS__, 16 + %define R e +%elifidn __BITS__, 32 + %define R e +%elifidn __BITS__, 64 + %define R r +%endif + + +%macro export_function 1-2 0 + + %ifdef MS_STDCALL + global %1@%2 + export _%1@%2 + %1@%2: + %elifidn __BITS__, 16 + global _%1 + _%1: + %else + global %1 + %1: + %endif + +%endmacro + + +%macro aes_function_entry 1 + + ; void (const byte *ks, byte *data); + + export_function %1, 8 + + %ifidn __BITS__, 32 + mov ecx, [esp + 4 + 4 * 0] + mov edx, [esp + 4 + 4 * 1] + %elifidn __BITS__, 64 + %ifnidn __OUTPUT_FORMAT__, win64 + mov rcx, rdi + mov rdx, rsi + %endif + %endif + + ; ecx/rcx = ks + ; edx/rdx = data + +%endmacro + + +%macro aes_function_exit 0 + + ; void (const byte *, byte *); + + %ifdef MS_STDCALL + ret 8 + %else + ret + %endif + +%endmacro + + +%macro push_xmm 2 + sub rsp, 16 * (%2 - %1 + 1) + + %assign stackoffset 0 + %assign regnumber %1 + + %rep (%2 - %1 + 1) + movdqu [rsp + 16 * stackoffset], xmm%[regnumber] + + %assign stackoffset stackoffset+1 + %assign regnumber regnumber+1 + %endrep +%endmacro + + +%macro pop_xmm 2 + %assign stackoffset 0 + %assign regnumber %1 + + %rep (%2 - %1 + 1) + movdqu xmm%[regnumber], [rsp + 16 * stackoffset] + + %assign stackoffset stackoffset+1 + %assign regnumber regnumber+1 + %endrep + + add rsp, 16 * (%2 - %1 + 1) +%endmacro + + +%macro aes_hw_cpu 2 + %define OPERATION %1 + %define BLOCK_COUNT %2 + + ; Load data blocks + %assign block 1 + %rep BLOCK_COUNT + movdqu xmm%[block], [%[R]dx + 16 * (block - 1)] + %assign block block+1 + %endrep + + ; Encrypt/decrypt data blocks + %assign round 0 + %rep 15 + movdqu xmm0, [%[R]cx + 16 * round] + + %assign block 1 + %rep BLOCK_COUNT + + %if round = 0 + pxor xmm%[block], xmm0 + %else + %if round < 14 + aes%[OPERATION] xmm%[block], xmm0 + %else + aes%[OPERATION]last xmm%[block], xmm0 + %endif + %endif + + %assign block block+1 + %endrep + + %assign round round+1 + %endrep + + ; Store data blocks + %assign block 1 + %rep BLOCK_COUNT + movdqu [%[R]dx + 16 * (block - 1)], xmm%[block] + %assign block block+1 + %endrep + + %undef OPERATION + %undef BLOCK_COUNT +%endmacro + + +%macro aes_hw_cpu_32_blocks 1 + %define OPERATION_32_BLOCKS %1 + + %ifidn __BITS__, 64 + %define MAX_REG_BLOCK_COUNT 15 + %else + %define MAX_REG_BLOCK_COUNT 7 + %endif + + %ifidn __OUTPUT_FORMAT__, win64 + %if MAX_REG_BLOCK_COUNT > 5 + push_xmm 6, MAX_REG_BLOCK_COUNT + %endif + %endif + + mov eax, 32 / MAX_REG_BLOCK_COUNT + .1: + aes_hw_cpu %[OPERATION_32_BLOCKS], MAX_REG_BLOCK_COUNT + + add %[R]dx, 16 * MAX_REG_BLOCK_COUNT + dec eax + jnz .1 + + %if (32 % MAX_REG_BLOCK_COUNT) != 0 + aes_hw_cpu %[OPERATION_32_BLOCKS], (32 % MAX_REG_BLOCK_COUNT) + %endif + + %ifidn __OUTPUT_FORMAT__, win64 + %if MAX_REG_BLOCK_COUNT > 5 + pop_xmm 6, MAX_REG_BLOCK_COUNT + %endif + %endif + + %undef OPERATION_32_BLOCKS + %undef MAX_REG_BLOCK_COUNT +%endmacro + + +%ifidn __BITS__, 16 + + USE16 + SEGMENT _TEXT PUBLIC CLASS=CODE USE16 + SEGMENT _DATA PUBLIC CLASS=DATA USE16 + GROUP DGROUP _TEXT _DATA + SECTION _TEXT + +%else + + SECTION .text + +%endif + + +; void aes_hw_cpu_enable_sse (); + + export_function aes_hw_cpu_enable_sse + mov %[R]ax, cr4 + or ax, 1 << 9 + mov cr4, %[R]ax + ret + + +%ifidn __BITS__, 16 + + +; byte is_aes_hw_cpu_supported (); + + export_function is_aes_hw_cpu_supported + mov eax, 1 + cpuid + mov eax, ecx + shr eax, 25 + and al, 1 + ret + + +; void aes_hw_cpu_decrypt (const byte *ks, byte *data); + + export_function aes_hw_cpu_decrypt + mov ax, -16 + jmp aes_hw_cpu_encrypt_decrypt + +; void aes_hw_cpu_encrypt (const byte *ks, byte *data); + + export_function aes_hw_cpu_encrypt + mov ax, 16 + + aes_hw_cpu_encrypt_decrypt: + push bp + mov bp, sp + push di + push si + + mov si, [bp + 4] ; ks + mov di, [bp + 4 + 2] ; data + + movdqu xmm0, [si] + movdqu xmm1, [di] + + pxor xmm1, xmm0 + + mov cx, 13 + + .round1_13: + add si, ax + movdqu xmm0, [si] + + cmp ax, 0 + jl .decrypt + + aesenc xmm1, xmm0 + jmp .2 + .decrypt: + aesdec xmm1, xmm0 + .2: + loop .round1_13 + + add si, ax + movdqu xmm0, [si] + + cmp ax, 0 + jl .decrypt_last + + aesenclast xmm1, xmm0 + jmp .3 + .decrypt_last: + aesdeclast xmm1, xmm0 + .3: + movdqu [di], xmm1 + + pop si + pop di + pop bp + ret + + +%else ; __BITS__ != 16 + + +; byte is_aes_hw_cpu_supported (); + + export_function is_aes_hw_cpu_supported + push %[R]bx + + mov eax, 1 + cpuid + mov eax, ecx + shr eax, 25 + and eax, 1 + + pop %[R]bx + ret + + +; void aes_hw_cpu_decrypt (const byte *ks, byte *data); + + aes_function_entry aes_hw_cpu_decrypt + aes_hw_cpu dec, 1 + aes_function_exit + + +; void aes_hw_cpu_decrypt_32_blocks (const byte *ks, byte *data); + + aes_function_entry aes_hw_cpu_decrypt_32_blocks + aes_hw_cpu_32_blocks dec + aes_function_exit + + +; void aes_hw_cpu_encrypt (const byte *ks, byte *data); + + aes_function_entry aes_hw_cpu_encrypt + aes_hw_cpu enc, 1 + aes_function_exit + + +; void aes_hw_cpu_encrypt_32_blocks (const byte *ks, byte *data); + + aes_function_entry aes_hw_cpu_encrypt_32_blocks + aes_hw_cpu_32_blocks enc + aes_function_exit + + +%endif ; __BITS__ != 16 -- cgit v1.2.3