1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
|
;
; Copyright (c) 2010 TrueCrypt Developers Association. All rights reserved.
;
; Governed by the TrueCrypt License 3.0 the full text of which is contained in
; the file License.txt included in TrueCrypt binary and source code distribution
; packages.
;
%ifidn __BITS__, 16
%define R e
%elifidn __BITS__, 32
%define R e
%elifidn __BITS__, 64
%define R r
%endif
%macro export_function 1-2 0
%ifdef MS_STDCALL
global %1@%2
export _%1@%2
%1@%2:
%elifidn __BITS__, 16
global _%1
_%1:
%else
global %1
%1:
%endif
%endmacro
%macro aes_function_entry 1
; void (const byte *ks, byte *data);
export_function %1, 8
%ifidn __BITS__, 32
mov ecx, [esp + 4 + 4 * 0]
mov edx, [esp + 4 + 4 * 1]
%elifidn __BITS__, 64
%ifnidn __OUTPUT_FORMAT__, win64
mov rcx, rdi
mov rdx, rsi
%endif
%endif
; ecx/rcx = ks
; edx/rdx = data
%endmacro
%macro aes_function_exit 0
; void (const byte *, byte *);
%ifdef MS_STDCALL
ret 8
%else
ret
%endif
%endmacro
%macro push_xmm 2
sub rsp, 16 * (%2 - %1 + 1)
%assign stackoffset 0
%assign regnumber %1
%rep (%2 - %1 + 1)
movdqu [rsp + 16 * stackoffset], xmm%[regnumber]
%assign stackoffset stackoffset+1
%assign regnumber regnumber+1
%endrep
%endmacro
%macro pop_xmm 2
%assign stackoffset 0
%assign regnumber %1
%rep (%2 - %1 + 1)
movdqu xmm%[regnumber], [rsp + 16 * stackoffset]
%assign stackoffset stackoffset+1
%assign regnumber regnumber+1
%endrep
add rsp, 16 * (%2 - %1 + 1)
%endmacro
%macro aes_hw_cpu 2
%define OPERATION %1
%define BLOCK_COUNT %2
; Load data blocks
%assign block 1
%rep BLOCK_COUNT
movdqu xmm%[block], [%[R]dx + 16 * (block - 1)]
%assign block block+1
%endrep
; Encrypt/decrypt data blocks
%assign round 0
%rep 15
movdqu xmm0, [%[R]cx + 16 * round]
%assign block 1
%rep BLOCK_COUNT
%if round = 0
pxor xmm%[block], xmm0
%else
%if round < 14
aes%[OPERATION] xmm%[block], xmm0
%else
aes%[OPERATION]last xmm%[block], xmm0
%endif
%endif
%assign block block+1
%endrep
%assign round round+1
%endrep
; Store data blocks
%assign block 1
%rep BLOCK_COUNT
movdqu [%[R]dx + 16 * (block - 1)], xmm%[block]
%assign block block+1
%endrep
%undef OPERATION
%undef BLOCK_COUNT
%endmacro
%macro aes_hw_cpu_32_blocks 1
%define OPERATION_32_BLOCKS %1
%ifidn __BITS__, 64
%define MAX_REG_BLOCK_COUNT 15
%else
%define MAX_REG_BLOCK_COUNT 7
%endif
%ifidn __OUTPUT_FORMAT__, win64
%if MAX_REG_BLOCK_COUNT > 5
push_xmm 6, MAX_REG_BLOCK_COUNT
%endif
%endif
mov eax, 32 / MAX_REG_BLOCK_COUNT
.1:
aes_hw_cpu %[OPERATION_32_BLOCKS], MAX_REG_BLOCK_COUNT
add %[R]dx, 16 * MAX_REG_BLOCK_COUNT
dec eax
jnz .1
%if (32 % MAX_REG_BLOCK_COUNT) != 0
aes_hw_cpu %[OPERATION_32_BLOCKS], (32 % MAX_REG_BLOCK_COUNT)
%endif
%ifidn __OUTPUT_FORMAT__, win64
%if MAX_REG_BLOCK_COUNT > 5
pop_xmm 6, MAX_REG_BLOCK_COUNT
%endif
%endif
%undef OPERATION_32_BLOCKS
%undef MAX_REG_BLOCK_COUNT
%endmacro
%ifidn __BITS__, 16
USE16
SEGMENT _TEXT PUBLIC CLASS=CODE USE16
SEGMENT _DATA PUBLIC CLASS=DATA USE16
GROUP DGROUP _TEXT _DATA
SECTION _TEXT
%else
SECTION .text
%endif
; void aes_hw_cpu_enable_sse ();
export_function aes_hw_cpu_enable_sse
mov %[R]ax, cr4
or ax, 1 << 9
mov cr4, %[R]ax
ret
%ifidn __BITS__, 16
; byte is_aes_hw_cpu_supported ();
export_function is_aes_hw_cpu_supported
mov eax, 1
cpuid
mov eax, ecx
shr eax, 25
and al, 1
ret
; void aes_hw_cpu_decrypt (const byte *ks, byte *data);
export_function aes_hw_cpu_decrypt
mov ax, -16
jmp aes_hw_cpu_encrypt_decrypt
; void aes_hw_cpu_encrypt (const byte *ks, byte *data);
export_function aes_hw_cpu_encrypt
mov ax, 16
aes_hw_cpu_encrypt_decrypt:
push bp
mov bp, sp
push di
push si
mov si, [bp + 4] ; ks
mov di, [bp + 4 + 2] ; data
movdqu xmm0, [si]
movdqu xmm1, [di]
pxor xmm1, xmm0
mov cx, 13
.round1_13:
add si, ax
movdqu xmm0, [si]
cmp ax, 0
jl .decrypt
aesenc xmm1, xmm0
jmp .2
.decrypt:
aesdec xmm1, xmm0
.2:
loop .round1_13
add si, ax
movdqu xmm0, [si]
cmp ax, 0
jl .decrypt_last
aesenclast xmm1, xmm0
jmp .3
.decrypt_last:
aesdeclast xmm1, xmm0
.3:
movdqu [di], xmm1
pop si
pop di
pop bp
ret
%else ; __BITS__ != 16
; byte is_aes_hw_cpu_supported ();
; We comment this since we have an alternative C implementation
; that supports Hyper-V detection workaround
;
; export_function is_aes_hw_cpu_supported
; push %[R]bx
;
; mov eax, 1
; cpuid
; mov eax, ecx
; shr eax, 25
; and eax, 1
;
; pop %[R]bx
; ret
; void aes_hw_cpu_decrypt (const byte *ks, byte *data);
aes_function_entry aes_hw_cpu_decrypt
aes_hw_cpu dec, 1
aes_function_exit
; void aes_hw_cpu_decrypt_32_blocks (const byte *ks, byte *data);
aes_function_entry aes_hw_cpu_decrypt_32_blocks
aes_hw_cpu_32_blocks dec
aes_function_exit
; void aes_hw_cpu_encrypt (const byte *ks, byte *data);
aes_function_entry aes_hw_cpu_encrypt
aes_hw_cpu enc, 1
aes_function_exit
; void aes_hw_cpu_encrypt_32_blocks (const byte *ks, byte *data);
aes_function_entry aes_hw_cpu_encrypt_32_blocks
aes_hw_cpu_32_blocks enc
aes_function_exit
%endif ; __BITS__ != 16
|