VeraCrypt
aboutsummaryrefslogtreecommitdiff
path: root/src/Crypto/chacha-xmm.c
blob: 198d0b5b2d9f2742108e917db2f162f7c96d64f4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
/*
chacha.c version $Date: 2014/09/08 17:38:05 $
D. J. Bernstein
Romain Dolbeau
Public domain.
*/

// Modified by kerukuro for use in cppcrypto.

/* Adapted to VeraCrypt */

#include "Common/Tcdefs.h"
#include "config.h"
#include "cpu.h"
#include "misc.h"

#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE

#ifndef _M_X64
#ifdef _MSC_VER
#if _MSC_VER < 1900
__inline __m128i _mm_set_epi64x(int64 i0, int64 i1) {
	union {
		int64 q[2];
		int32 r[4];
	} u;
	u.q[0] = i1;  u.q[1] = i0;
	// this is inefficient, but other solutions are worse
	return _mm_setr_epi32(u.r[0], u.r[1], u.r[2], u.r[3]);
}
#pragma warning(disable:4799)
__inline __m128i _mm_set1_epi64x(int64 a)
{
	union {
		__m64 m;
		long long ii;
	} u;
	u.ii = a;
	return _mm_set1_epi64(u.m);
}
#pragma warning(default:4799)
#endif
#endif
#endif

#define uint8 byte

#define U32V(v) (v)
#define ROTL32(x,n)	rotl32(x, n)
#define U32TO8_LITTLE(p, v) (((uint32*)(p))[0] = (v))
#define U8TO32_LITTLE(v) *((uint32*)(v))


#define ROTATE(v,c) (ROTL32(v,c))
#define XOR(v,w) ((v) ^ (w))
#define PLUS(v,w) (U32V((v) + (w)))
#define PLUSONE(v) (PLUS((v),1))

#define QUARTERROUND(a,b,c,d) \
  x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \
  x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \
  x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \
  x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7);

static void salsa20_wordtobyte(uint8 output[64],const uint32 input[16], unsigned int r)
{
  uint32 x[16];
  int i;

  for (i = 0;i < 16;++i) x[i] = input[i];
  for (i = r;i > 0;--i) {
    QUARTERROUND( 0, 4, 8,12)
    QUARTERROUND( 1, 5, 9,13)
    QUARTERROUND( 2, 6,10,14)
    QUARTERROUND( 3, 7,11,15)
    QUARTERROUND( 0, 5,10,15)
    QUARTERROUND( 1, 6,11,12)
    QUARTERROUND( 2, 7, 8,13)
    QUARTERROUND( 3, 4, 9,14)
  }
  for (i = 0;i < 16;++i) x[i] = PLUS(x[i],input[i]);
  for (i = 0;i < 16;++i) U32TO8_LITTLE(output + 4 * i,x[i]);
}

void chacha_ECRYPT_init(void)
{
  return;
}

static const char sigma[17] = "expand 32-byte k";
static const char tau[17] = "expand 16-byte k";

void chacha_ECRYPT_keysetup(uint32* input,const uint8 *k,uint32 kbits,uint32 ivbits)
{
  const char *constants;

  input[4] = U8TO32_LITTLE(k + 0);
  input[5] = U8TO32_LITTLE(k + 4);
  input[6] = U8TO32_LITTLE(k + 8);
  input[7] = U8TO32_LITTLE(k + 12);
  if (kbits == 256) { /* recommended */
    k += 16;
    constants = sigma;
  } else { /* kbits == 128 */
    constants = tau;
  }
  input[8] = U8TO32_LITTLE(k + 0);
  input[9] = U8TO32_LITTLE(k + 4);
  input[10] = U8TO32_LITTLE(k + 8);
  input[11] = U8TO32_LITTLE(k + 12);
  input[0] = U8TO32_LITTLE(constants + 0);
  input[1] = U8TO32_LITTLE(constants + 4);
  input[2] = U8TO32_LITTLE(constants + 8);
  input[3] = U8TO32_LITTLE(constants + 12);
}

void chacha_ECRYPT_ivsetup(uint32* input,const uint8 *iv)
{
  input[12] = 0;
  input[13] = 0;
  input[14] = U8TO32_LITTLE(iv + 0);
  input[15] = U8TO32_LITTLE(iv + 4);
}

void chacha_ECRYPT_encrypt_bytes(size_t bytes, uint32* x, const uint8* m, uint8* out, uint8* output, unsigned int r)
{
  unsigned int i;

#include "chacha_u4.h"

#include "chacha_u1.h"

#ifndef _M_X64
#ifdef _MSC_VER
#if _MSC_VER < 1900
  _mm_empty();
#endif
#endif
#endif

  if (!bytes) return;
  for (;;) {
    salsa20_wordtobyte(output,x, r);
    x[12] = PLUSONE(x[12]);
    if (!x[12]) {
      x[13] = PLUSONE(x[13]);
      /* stopping at 2^70 bytes per nonce is user's responsibility */
    }
    if (bytes <= 64) {
      for (i = 0;i < bytes;++i) out[i] = m[i] ^ output[i];
      return;
    }
    for (i = 0;i < 64;++i) out[i] = m[i] ^ output[i];
    bytes -= 64;
    out += 64;
    m += 64;
  }
}

#endif