From 89efcdb8cd95ea798187fe4062a73fa5d2fca456 Mon Sep 17 00:00:00 2001 From: Mounir IDRASSI Date: Tue, 4 Jul 2017 02:05:11 +0200 Subject: Windows Driver: correctly save and restore extended processor state when performing AVX operations on Windows 7 and later. Enhance readability of code handling save/restore of floating point state. --- src/Common/Pkcs5.c | 102 +++++++++++++++++++++++++++++++++++++---------- src/Common/Tcdefs.h | 20 ++++++++++ src/Common/Tests.c | 8 ++-- src/Crypto/Camellia.c | 32 +++++++++++---- src/Crypto/GostCipher.c | 4 +- src/Driver/DriveFilter.c | 18 ++++++--- src/Driver/DumpFilter.c | 15 +++++++ src/Driver/Ntdriver.c | 41 +++++++++++++++++++ 8 files changed, 201 insertions(+), 39 deletions(-) diff --git a/src/Common/Pkcs5.c b/src/Common/Pkcs5.c index e2a9966b..28df35d5 100644 --- a/src/Common/Pkcs5.c +++ b/src/Common/Pkcs5.c @@ -99,6 +99,18 @@ void hmac_sha256 char* buf = hmac.k; int b; char key[SHA256_DIGESTSIZE]; +#if defined (DEVICE_DRIVER) + NTSTATUS saveStatus = STATUS_INVALID_PARAMETER; +#ifdef _WIN64 + XSTATE_SAVE SaveState; + if (g_isIntel && HasSAVX()) + saveStatus = KeSaveExtendedProcessorState(XSTATE_MASK_GSSE, &SaveState); +#else + KFLOATING_SAVE floatingPointState; + if (HasSSE2()) + saveStatus = KeSaveFloatingPointState (&floatingPointState); +#endif +#endif /* If the key is longer than the hash algorithm block size, let key = sha256(key), as per HMAC specifications. */ if (lk > SHA256_BLOCKSIZE) @@ -139,6 +151,16 @@ void hmac_sha256 sha256_hash ((unsigned char *) buf, SHA256_BLOCKSIZE, ctx); hmac_sha256_internal(d, ld, &hmac); + +#if defined (DEVICE_DRIVER) + if (NT_SUCCESS (saveStatus)) +#ifdef _WIN64 + KeRestoreExtendedProcessorState(&SaveState); +#else + KeRestoreFloatingPointState (&floatingPointState); +#endif +#endif + /* Prevent leaks */ burn(&hmac, sizeof(hmac)); burn(key, sizeof(key)); @@ -204,6 +226,18 @@ void derive_key_sha256 (char *pwd, int pwd_len, char *salt, int salt_len, uint32 int b, l, r; #ifndef TC_WINDOWS_BOOT char key[SHA256_DIGESTSIZE]; +#if defined (DEVICE_DRIVER) + NTSTATUS saveStatus = STATUS_INVALID_PARAMETER; +#ifdef _WIN64 + XSTATE_SAVE SaveState; + if (g_isIntel && HasSAVX()) + saveStatus = KeSaveExtendedProcessorState(XSTATE_MASK_GSSE, &SaveState); +#else + KFLOATING_SAVE floatingPointState; + if (HasSSE2()) + saveStatus = KeSaveFloatingPointState (&floatingPointState); +#endif +#endif /* If the password is longer than the hash algorithm block size, let pwd = sha256(pwd), as per HMAC specifications. */ if (pwd_len > SHA256_BLOCKSIZE) @@ -267,6 +301,14 @@ void derive_key_sha256 (char *pwd, int pwd_len, char *salt, int salt_len, uint32 derive_u_sha256 (salt, salt_len, iterations, b, &hmac); memcpy (dk, hmac.u, r); +#if defined (DEVICE_DRIVER) + if (NT_SUCCESS (saveStatus)) +#ifdef _WIN64 + KeRestoreExtendedProcessorState(&SaveState); +#else + KeRestoreFloatingPointState (&floatingPointState); +#endif +#endif /* Prevent possible leaks. */ burn (&hmac, sizeof(hmac)); @@ -327,11 +369,17 @@ void hmac_sha512 char* buf = hmac.k; int b; char key[SHA512_DIGESTSIZE]; -#if defined (DEVICE_DRIVER) && !defined (_WIN64) - KFLOATING_SAVE floatingPointState; - NTSTATUS saveStatus = STATUS_SUCCESS; - if (HasSSE2() && HasMMX()) +#if defined (DEVICE_DRIVER) + NTSTATUS saveStatus = STATUS_INVALID_PARAMETER; +#ifdef _WIN64 + XSTATE_SAVE SaveState; + if (g_isIntel && HasSAVX()) + saveStatus = KeSaveExtendedProcessorState(XSTATE_MASK_GSSE, &SaveState); +#else + KFLOATING_SAVE floatingPointState; + if (HasSSSE3() && HasMMX()) saveStatus = KeSaveFloatingPointState (&floatingPointState); +#endif #endif /* If the key is longer than the hash algorithm block size, @@ -375,9 +423,13 @@ void hmac_sha512 hmac_sha512_internal (d, ld, &hmac); -#if defined (DEVICE_DRIVER) && !defined (_WIN64) - if (NT_SUCCESS (saveStatus) && (HasSSE2() && HasMMX())) +#if defined (DEVICE_DRIVER) + if (NT_SUCCESS (saveStatus)) +#ifdef _WIN64 + KeRestoreExtendedProcessorState(&SaveState); +#else KeRestoreFloatingPointState (&floatingPointState); +#endif #endif /* Prevent leaks */ @@ -419,11 +471,17 @@ void derive_key_sha512 (char *pwd, int pwd_len, char *salt, int salt_len, uint32 char* buf = hmac.k; int b, l, r; char key[SHA512_DIGESTSIZE]; -#if defined (DEVICE_DRIVER) && !defined (_WIN64) - KFLOATING_SAVE floatingPointState; - NTSTATUS saveStatus = STATUS_SUCCESS; - if (HasSSE2() && HasMMX()) +#if defined (DEVICE_DRIVER) + NTSTATUS saveStatus = STATUS_INVALID_PARAMETER; +#ifdef _WIN64 + XSTATE_SAVE SaveState; + if (g_isIntel && HasSAVX()) + saveStatus = KeSaveExtendedProcessorState(XSTATE_MASK_GSSE, &SaveState); +#else + KFLOATING_SAVE floatingPointState; + if (HasSSSE3() && HasMMX()) saveStatus = KeSaveFloatingPointState (&floatingPointState); +#endif #endif /* If the password is longer than the hash algorithm block size, @@ -488,9 +546,13 @@ void derive_key_sha512 (char *pwd, int pwd_len, char *salt, int salt_len, uint32 derive_u_sha512 (salt, salt_len, iterations, b, &hmac); memcpy (dk, hmac.u, r); -#if defined (DEVICE_DRIVER) && !defined (_WIN64) - if (NT_SUCCESS (saveStatus) && (HasSSE2() && HasMMX())) +#if defined (DEVICE_DRIVER) + if (NT_SUCCESS (saveStatus)) +#ifdef _WIN64 + KeRestoreExtendedProcessorState(&SaveState); +#else KeRestoreFloatingPointState (&floatingPointState); +#endif #endif /* Prevent possible leaks. */ @@ -771,7 +833,7 @@ void hmac_whirlpool char key[WHIRLPOOL_DIGESTSIZE]; #if defined (DEVICE_DRIVER) && !defined (_WIN64) KFLOATING_SAVE floatingPointState; - NTSTATUS saveStatus = STATUS_SUCCESS; + NTSTATUS saveStatus = STATUS_INVALID_PARAMETER; if (HasISSE()) saveStatus = KeSaveFloatingPointState (&floatingPointState); #endif @@ -817,7 +879,7 @@ void hmac_whirlpool hmac_whirlpool_internal(d, ld, &hmac); #if defined (DEVICE_DRIVER) && !defined (_WIN64) - if (NT_SUCCESS (saveStatus) && HasISSE()) + if (NT_SUCCESS (saveStatus)) KeRestoreFloatingPointState (&floatingPointState); #endif /* Prevent leaks */ @@ -859,7 +921,7 @@ void derive_key_whirlpool (char *pwd, int pwd_len, char *salt, int salt_len, uin int b, l, r; #if defined (DEVICE_DRIVER) && !defined (_WIN64) KFLOATING_SAVE floatingPointState; - NTSTATUS saveStatus = STATUS_SUCCESS; + NTSTATUS saveStatus = STATUS_INVALID_PARAMETER; if (HasISSE()) saveStatus = KeSaveFloatingPointState (&floatingPointState); #endif @@ -926,7 +988,7 @@ void derive_key_whirlpool (char *pwd, int pwd_len, char *salt, int salt_len, uin memcpy (dk, hmac.u, r); #if defined (DEVICE_DRIVER) && !defined (_WIN64) - if (NT_SUCCESS (saveStatus) && HasISSE()) + if (NT_SUCCESS (saveStatus)) KeRestoreFloatingPointState (&floatingPointState); #endif @@ -986,7 +1048,7 @@ void hmac_streebog CRYPTOPP_ALIGN_DATA(16) char key[STREEBOG_DIGESTSIZE]; #if defined (DEVICE_DRIVER) && !defined (_WIN64) KFLOATING_SAVE floatingPointState; - NTSTATUS saveStatus = STATUS_SUCCESS; + NTSTATUS saveStatus = STATUS_INVALID_PARAMETER; if (HasSSE2() || HasSSE41()) saveStatus = KeSaveFloatingPointState (&floatingPointState); #endif @@ -1032,7 +1094,7 @@ void hmac_streebog hmac_streebog_internal(d, ld, &hmac); #if defined (DEVICE_DRIVER) && !defined (_WIN64) - if (NT_SUCCESS (saveStatus) && (HasSSE2() || HasSSE41())) + if (NT_SUCCESS (saveStatus)) KeRestoreFloatingPointState (&floatingPointState); #endif /* Prevent leaks */ @@ -1074,7 +1136,7 @@ void derive_key_streebog (char *pwd, int pwd_len, char *salt, int salt_len, uint int b, l, r; #if defined (DEVICE_DRIVER) && !defined (_WIN64) KFLOATING_SAVE floatingPointState; - NTSTATUS saveStatus = STATUS_SUCCESS; + NTSTATUS saveStatus = STATUS_INVALID_PARAMETER; if (HasSSE2() || HasSSE41()) saveStatus = KeSaveFloatingPointState (&floatingPointState); #endif @@ -1141,7 +1203,7 @@ void derive_key_streebog (char *pwd, int pwd_len, char *salt, int salt_len, uint memcpy (dk, hmac.u, r); #if defined (DEVICE_DRIVER) && !defined (_WIN64) - if (NT_SUCCESS (saveStatus) && (HasSSE2() || HasSSE41())) + if (NT_SUCCESS (saveStatus)) KeRestoreFloatingPointState (&floatingPointState); #endif diff --git a/src/Common/Tcdefs.h b/src/Common/Tcdefs.h index f56cdc9b..e7f54d8e 100644 --- a/src/Common/Tcdefs.h +++ b/src/Common/Tcdefs.h @@ -260,6 +260,26 @@ typedef int BOOL; #define FALSE !TRUE #endif +typedef NTSTATUS (NTAPI *KeSaveExtendedProcessorStateFn) ( + __in ULONG64 Mask, + PXSTATE_SAVE XStateSave + ); + + +typedef VOID (NTAPI *KeRestoreExtendedProcessorStateFn) ( + PXSTATE_SAVE XStateSave + ); + +extern NTSTATUS NTAPI KeSaveExtendedProcessorState ( + __in ULONG64 Mask, + PXSTATE_SAVE XStateSave + ); + + +extern VOID NTAPI KeRestoreExtendedProcessorState ( + PXSTATE_SAVE XStateSave + ); + #else /* !TC_WINDOWS_DRIVER */ #if !defined(_UEFI) #define TCalloc malloc diff --git a/src/Common/Tests.c b/src/Common/Tests.c index 8daf9f7d..cf30e4a1 100644 --- a/src/Common/Tests.c +++ b/src/Common/Tests.c @@ -583,8 +583,8 @@ BOOL RunHashTest (HashFunction fn, HashTestVector* vector, BOOL bUseSSE) BOOL bRet = TRUE; #if defined (DEVICE_DRIVER) && !defined (_WIN64) KFLOATING_SAVE floatingPointState; - NTSTATUS saveStatus = STATUS_SUCCESS; - if (bUseSSE && (HasISSE() || HasSSE2())) + NTSTATUS saveStatus = STATUS_INVALID_PARAMETER; + if (bUseSSE) saveStatus = KeSaveFloatingPointState (&floatingPointState); #endif while (vector[i].hexInput && vector[i].hexOutput) @@ -601,7 +601,7 @@ BOOL RunHashTest (HashFunction fn, HashTestVector* vector, BOOL bUseSSE) } #if defined (DEVICE_DRIVER) && !defined (_WIN64) - if (NT_SUCCESS (saveStatus) && bUseSSE && (HasISSE() || HasSSE2())) + if (NT_SUCCESS (saveStatus)) KeRestoreFloatingPointState (&floatingPointState); #endif @@ -1508,7 +1508,7 @@ BOOL test_pkcs5 () return FALSE; /* STREEBOG hash tests */ - if (RunHashTest (StreebogHash, Streebog512TestVectors, TRUE) == FALSE) + if (RunHashTest (StreebogHash, Streebog512TestVectors, (HasSSE2() || HasSSE41())? TRUE : FALSE) == FALSE) return FALSE; /* PKCS-5 test 1 with HMAC-SHA-256 used as the PRF (https://tools.ietf.org/html/draft-josefsson-scrypt-kdf-00) */ diff --git a/src/Crypto/Camellia.c b/src/Crypto/Camellia.c index f74130cd..49bc7670 100644 --- a/src/Crypto/Camellia.c +++ b/src/Crypto/Camellia.c @@ -1096,15 +1096,24 @@ void camellia_decrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock void camellia_encrypt_blocks(unsigned __int8 *instance, const byte* in_blk, byte* out_blk, uint32 blockCount) { #if !defined (_UEFI) - if (IsCpuIntel() && IsAesHwCpuSupported () && HasSAVX()) /* on AMD cpu, AVX is too slow */ + if ((blockCount >= 16) && IsCpuIntel() && IsAesHwCpuSupported () && HasSAVX()) /* on AMD cpu, AVX is too slow */ { - while (blockCount >= 16) +#if defined (TC_WINDOWS_DRIVER) + XSTATE_SAVE SaveState; + if (NT_SUCCESS (KeSaveExtendedProcessorState(XSTATE_MASK_GSSE, &SaveState))) { - camellia_ecb_enc_16way (instance, out_blk, in_blk); - out_blk += 16 * 16; - in_blk += 16 * 16; - blockCount -= 16; +#endif + while (blockCount >= 16) + { + camellia_ecb_enc_16way (instance, out_blk, in_blk); + out_blk += 16 * 16; + in_blk += 16 * 16; + blockCount -= 16; + } +#if defined (TC_WINDOWS_DRIVER) + KeRestoreExtendedProcessorState(&SaveState); } +#endif } #endif @@ -1123,8 +1132,13 @@ void camellia_encrypt_blocks(unsigned __int8 *instance, const byte* in_blk, byte void camellia_decrypt_blocks(unsigned __int8 *instance, const byte* in_blk, byte* out_blk, uint32 blockCount) { #if !defined (_UEFI) - if (IsCpuIntel() && IsAesHwCpuSupported () && HasSAVX()) /* on AMD cpu, AVX is too slow */ + if ((blockCount >= 16) && IsCpuIntel() && IsAesHwCpuSupported () && HasSAVX()) /* on AMD cpu, AVX is too slow */ { +#if defined (TC_WINDOWS_DRIVER) + XSTATE_SAVE SaveState; + if (NT_SUCCESS (KeSaveExtendedProcessorState(XSTATE_MASK_GSSE, &SaveState))) + { +#endif while (blockCount >= 16) { camellia_ecb_dec_16way (instance, out_blk, in_blk); @@ -1132,6 +1146,10 @@ void camellia_decrypt_blocks(unsigned __int8 *instance, const byte* in_blk, byte in_blk += 16 * 16; blockCount -= 16; } +#if defined (TC_WINDOWS_DRIVER) + KeRestoreExtendedProcessorState(&SaveState); + } +#endif } #endif diff --git a/src/Crypto/GostCipher.c b/src/Crypto/GostCipher.c index 0fd3941a..ddd649cd 100644 --- a/src/Crypto/GostCipher.c +++ b/src/Crypto/GostCipher.c @@ -96,7 +96,7 @@ void gost_set_key(const byte *key, gost_kds *ks, int useDynamicSbox) byte sbox_seed[64]; #if defined (DEVICE_DRIVER) && !defined (_WIN64) KFLOATING_SAVE floatingPointState; - NTSTATUS saveStatus = STATUS_SUCCESS; + NTSTATUS saveStatus = STATUS_INVALID_PARAMETER; if (HasSSE2() || HasSSE41()) saveStatus = KeSaveFloatingPointState (&floatingPointState); #endif @@ -106,7 +106,7 @@ void gost_set_key(const byte *key, gost_kds *ks, int useDynamicSbox) STREEBOG_finalize(&sctx, sbox_seed); #if defined (DEVICE_DRIVER) && !defined (_WIN64) - if (NT_SUCCESS (saveStatus) && (HasSSE2() || HasSSE41())) + if (NT_SUCCESS (saveStatus)) KeRestoreFloatingPointState (&floatingPointState); #endif diff --git a/src/Driver/DriveFilter.c b/src/Driver/DriveFilter.c index d4d5e122..08bebe18 100644 --- a/src/Driver/DriveFilter.c +++ b/src/Driver/DriveFilter.c @@ -327,10 +327,14 @@ static void ComputeBootLoaderFingerprint(PDEVICE_OBJECT LowerDeviceObject, byte* status = TCReadDevice (LowerDeviceObject, ioBuffer, offset, TC_SECTOR_SIZE_BIOS); if (NT_SUCCESS (status)) { -#if !defined (_WIN64) - KFLOATING_SAVE floatingPointState; - NTSTATUS saveStatus = STATUS_SUCCESS; - if (HasISSE()|| (HasSSE2() && HasMMX())) + NTSTATUS saveStatus = STATUS_INVALID_PARAMETER; +#ifdef _WIN64 + XSTATE_SAVE SaveState; + if (g_isIntel && HasSAVX()) + saveStatus = KeSaveExtendedProcessorState(XSTATE_MASK_GSSE, &SaveState); +#else + KFLOATING_SAVE floatingPointState; + if (HasISSE() || (HasSSSE3() && HasMMX())) saveStatus = KeSaveFloatingPointState (&floatingPointState); #endif WHIRLPOOL_add (ioBuffer, TC_BOOT_SECTOR_PIM_VALUE_OFFSET, &whirlpool); @@ -367,8 +371,10 @@ static void ComputeBootLoaderFingerprint(PDEVICE_OBJECT LowerDeviceObject, byte* sha512_end (&BootLoaderFingerprint [WHIRLPOOL_DIGESTSIZE], &sha2); } -#if !defined (_WIN64) - if (NT_SUCCESS (saveStatus) && (HasISSE() || (HasSSE2() && HasMMX()))) + if (NT_SUCCESS (saveStatus)) +#ifdef _WIN64 + KeRestoreExtendedProcessorState(&SaveState); +#else KeRestoreFloatingPointState (&floatingPointState); #endif } diff --git a/src/Driver/DumpFilter.c b/src/Driver/DumpFilter.c index 1b57bdbf..18feca06 100644 --- a/src/Driver/DumpFilter.c +++ b/src/Driver/DumpFilter.c @@ -14,6 +14,7 @@ #include "DriveFilter.h" #include "Ntdriver.h" #include "Tests.h" +#include "cpu.h" static DriveFilterExtension *BootDriveFilterExtension = NULL; static LARGE_INTEGER DumpPartitionOffset; @@ -63,7 +64,21 @@ NTSTATUS DumpFilterEntry (PFILTER_EXTENSION filterExtension, PFILTER_INITIALIZAT // KeSaveFloatingPointState() may generate a bug check during crash dump #if !defined (_WIN64) if (filterExtension->DumpType == DumpTypeCrashdump) + { dumpConfig.HwEncryptionEnabled = FALSE; + // disable also SSE optimizations + HasMMX() = 0; + HasISSE() = 0; + HasSSE2() = 0; + HasSSSE3() = 0; + HasSSE41() = 0; + HasSSE42() = 0; + HasAESNI() = 0; + HasCLMUL() = 0; + HasSAVX() = 0; + HasSAVX2() = 0; + HasSBMI2() = 0; + } #endif EnableHwEncryption (dumpConfig.HwEncryptionEnabled); diff --git a/src/Driver/Ntdriver.c b/src/Driver/Ntdriver.c index ab555904..8f6f151f 100644 --- a/src/Driver/Ntdriver.c +++ b/src/Driver/Ntdriver.c @@ -73,6 +73,11 @@ #pragma alloc_text(INIT,DriverEntry) #pragma alloc_text(INIT,TCCreateRootDeviceObject) +/* We need to silence 'type cast' warning in order to use MmGetSystemRoutineAddress. + * MmGetSystemRoutineAddress() should have been declare FARPROC instead of PVOID. + */ +#pragma warning(disable:4055) + PDRIVER_OBJECT TCDriverObject; PDEVICE_OBJECT RootDeviceObject = NULL; static KMUTEX RootDeviceControlMutex; @@ -91,6 +96,8 @@ static size_t EncryptionThreadPoolFreeCpuCountLimit = 0; static BOOL SystemFavoriteVolumeDirty = FALSE; static BOOL PagingFileCreationPrevented = FALSE; static BOOL EnableExtendedIoctlSupport = FALSE; +static KeSaveExtendedProcessorStateFn KeSaveExtendedProcessorStatePtr = NULL; +static KeRestoreExtendedProcessorStateFn KeRestoreExtendedProcessorStatePtr = NULL; POOL_TYPE ExDefaultNonPagedPoolType = NonPagedPool; ULONG ExDefaultMdlProtection = 0; @@ -119,6 +126,15 @@ NTSTATUS DriverEntry (PDRIVER_OBJECT DriverObject, PUNICODE_STRING RegistryPath) ExDefaultMdlProtection = MdlMappingNoExecute; } + // KeSaveExtendedProcessorState/KeRestoreExtendedProcessorState are available starting from Windows 7 + if ((OsMajorVersion > 6) || (OsMajorVersion == 6 && OsMinorVersion >= 1)) + { + UNICODE_STRING funcName; + RtlInitUnicodeString(&funcName, L"KeSaveExtendedProcessorState"); + KeSaveExtendedProcessorStatePtr = (KeSaveExtendedProcessorStateFn) MmGetSystemRoutineAddress(&funcName); + KeRestoreExtendedProcessorStatePtr = (KeRestoreExtendedProcessorStateFn) MmGetSystemRoutineAddress(&funcName); + } + // Load dump filter if the main driver is already loaded if (NT_SUCCESS (TCDeviceIoControl (NT_ROOT_PREFIX, TC_IOCTL_GET_DRIVER_VERSION, NULL, 0, &version, sizeof (version)))) return DumpFilterEntry ((PFILTER_EXTENSION) DriverObject, (PFILTER_INITIALIZATION_DATA) RegistryPath); @@ -3960,3 +3976,28 @@ BOOL IsOSAtLeast (OSVersionEnum reqMinOS) return ((OsMajorVersion << 16 | OsMinorVersion << 8) >= (major << 16 | minor << 8)); } + +NTSTATUS NTAPI KeSaveExtendedProcessorState ( + __in ULONG64 Mask, + PXSTATE_SAVE XStateSave + ) +{ + if (KeSaveExtendedProcessorStatePtr) + { + return (KeSaveExtendedProcessorStatePtr) (Mask, XStateSave); + } + else + { + return STATUS_SUCCESS; + } +} + +VOID NTAPI KeRestoreExtendedProcessorState ( + PXSTATE_SAVE XStateSave + ) +{ + if (KeRestoreExtendedProcessorStatePtr) + { + (KeRestoreExtendedProcessorStatePtr) (XStateSave); + } +} \ No newline at end of file -- cgit v1.2.3