]>
Commit | Line | Data |
---|---|---|
fa7499cc | 1 | /* |
2 | * OpenSSH Multi-threaded AES-CTR Cipher | |
3 | * | |
4 | * Author: Benjamin Bennett <ben@psc.edu> | |
5 | * Copyright (c) 2008 Pittsburgh Supercomputing Center. All rights reserved. | |
6 | * | |
7 | * Based on original OpenSSH AES-CTR cipher. Small portions remain unchanged, | |
8 | * Copyright (c) 2003 Markus Friedl <markus@openbsd.org> | |
9 | * | |
10 | * Permission to use, copy, modify, and distribute this software for any | |
11 | * purpose with or without fee is hereby granted, provided that the above | |
12 | * copyright notice and this permission notice appear in all copies. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
15 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
16 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
17 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
18 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
19 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
20 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
21 | */ | |
22 | #include "includes.h" | |
23 | ||
24 | #include <sys/types.h> | |
25 | ||
26 | #include <stdarg.h> | |
27 | #include <string.h> | |
28 | ||
29 | #include <openssl/evp.h> | |
30 | ||
31 | #include "xmalloc.h" | |
32 | #include "log.h" | |
33 | ||
34 | /* compatibility with old or broken OpenSSL versions */ | |
35 | #include "openbsd-compat/openssl-compat.h" | |
36 | ||
37 | #ifndef USE_BUILTIN_RIJNDAEL | |
38 | #include <openssl/aes.h> | |
39 | #endif | |
40 | ||
41 | #include <pthread.h> | |
42 | ||
43 | /*-------------------- TUNABLES --------------------*/ | |
44 | /* Number of pregen threads to use */ | |
45 | #define CIPHER_THREADS 2 | |
46 | ||
47 | /* Number of keystream queues */ | |
48 | #define NUMKQ (CIPHER_THREADS + 2) | |
49 | ||
50 | /* Length of a keystream queue */ | |
51 | #define KQLEN 4096 | |
52 | ||
53 | /* Processor cacheline length */ | |
54 | #define CACHELINE_LEN 64 | |
fa7499cc | 55 | /*-------------------- END TUNABLES --------------------*/ |
56 | ||
57 | ||
58 | const EVP_CIPHER *evp_aes_ctr_mt(void); | |
59 | ||
fa7499cc | 60 | /* Keystream Queue state */ |
61 | enum { | |
62 | KQINIT, | |
63 | KQEMPTY, | |
64 | KQFILLING, | |
65 | KQFULL, | |
66 | KQDRAINING | |
67 | }; | |
68 | ||
69 | /* Keystream Queue struct */ | |
70 | struct kq { | |
71 | u_char keys[KQLEN][AES_BLOCK_SIZE]; | |
72 | u_char ctr[AES_BLOCK_SIZE]; | |
73 | u_char pad0[CACHELINE_LEN]; | |
74 | volatile int qstate; | |
75 | pthread_mutex_t lock; | |
76 | pthread_cond_t cond; | |
77 | u_char pad1[CACHELINE_LEN]; | |
78 | }; | |
79 | ||
80 | /* Context struct */ | |
81 | struct ssh_aes_ctr_ctx | |
82 | { | |
83 | struct kq q[NUMKQ]; | |
84 | AES_KEY aes_ctx; | |
fa7499cc | 85 | u_char aes_counter[AES_BLOCK_SIZE]; |
86 | pthread_t tid[CIPHER_THREADS]; | |
87 | int state; | |
88 | int qidx; | |
89 | int ridx; | |
90 | }; | |
91 | ||
92 | /* <friedl> | |
93 | * increment counter 'ctr', | |
94 | * the counter is of size 'len' bytes and stored in network-byte-order. | |
95 | * (LSB at ctr[len-1], MSB at ctr[0]) | |
96 | */ | |
97 | static void | |
98 | ssh_ctr_inc(u_char *ctr, u_int len) | |
99 | { | |
100 | int i; | |
101 | ||
102 | for (i = len - 1; i >= 0; i--) | |
103 | if (++ctr[i]) /* continue on overflow */ | |
104 | return; | |
105 | } | |
106 | ||
107 | /* | |
108 | * Add num to counter 'ctr' | |
109 | */ | |
110 | static void | |
111 | ssh_ctr_add(u_char *ctr, uint32_t num, u_int len) | |
112 | { | |
113 | int i; | |
114 | uint16_t n; | |
115 | ||
116 | for (n = 0, i = len - 1; i >= 0 && (num || n); i--) { | |
117 | n = ctr[i] + (num & 0xff) + n; | |
118 | num >>= 8; | |
119 | ctr[i] = n & 0xff; | |
120 | n >>= 8; | |
121 | } | |
122 | } | |
123 | ||
124 | /* | |
125 | * Threads may be cancelled in a pthread_cond_wait, we must free the mutex | |
126 | */ | |
127 | static void | |
128 | thread_loop_cleanup(void *x) | |
129 | { | |
130 | pthread_mutex_unlock((pthread_mutex_t *)x); | |
131 | } | |
132 | ||
133 | /* | |
134 | * The life of a pregen thread: | |
135 | * Find empty keystream queues and fill them using their counter. | |
136 | * When done, update counter for the next fill. | |
137 | */ | |
138 | static void * | |
139 | thread_loop(void *x) | |
140 | { | |
141 | AES_KEY key; | |
fa7499cc | 142 | struct ssh_aes_ctr_ctx *c = x; |
143 | struct kq *q; | |
144 | int i; | |
145 | int qidx; | |
146 | ||
fa7499cc | 147 | /* Thread local copy of AES key */ |
148 | memcpy(&key, &c->aes_ctx, sizeof(key)); | |
149 | ||
150 | /* | |
151 | * Handle the special case of startup, one thread must fill | |
152 | * the first KQ then mark it as draining. Lock held throughout. | |
153 | */ | |
154 | if (pthread_equal(pthread_self(), c->tid[0])) { | |
155 | q = &c->q[0]; | |
156 | pthread_mutex_lock(&q->lock); | |
157 | if (q->qstate == KQINIT) { | |
158 | for (i = 0; i < KQLEN; i++) { | |
159 | AES_encrypt(q->ctr, q->keys[i], &key); | |
160 | ssh_ctr_inc(q->ctr, AES_BLOCK_SIZE); | |
161 | } | |
162 | ssh_ctr_add(q->ctr, KQLEN * (NUMKQ - 1), AES_BLOCK_SIZE); | |
163 | q->qstate = KQDRAINING; | |
fa7499cc | 164 | pthread_cond_broadcast(&q->cond); |
165 | } | |
166 | pthread_mutex_unlock(&q->lock); | |
167 | } | |
fa7499cc | 168 | |
169 | /* | |
170 | * Normal case is to find empty queues and fill them, skipping over | |
171 | * queues already filled by other threads and stopping to wait for | |
172 | * a draining queue to become empty. | |
173 | * | |
174 | * Multiple threads may be waiting on a draining queue and awoken | |
175 | * when empty. The first thread to wake will mark it as filling, | |
176 | * others will move on to fill, skip, or wait on the next queue. | |
177 | */ | |
178 | for (qidx = 1;; qidx = (qidx + 1) % NUMKQ) { | |
179 | /* Check if I was cancelled, also checked in cond_wait */ | |
180 | pthread_testcancel(); | |
181 | ||
182 | /* Lock queue and block if its draining */ | |
183 | q = &c->q[qidx]; | |
184 | pthread_mutex_lock(&q->lock); | |
185 | pthread_cleanup_push(thread_loop_cleanup, &q->lock); | |
186 | while (q->qstate == KQDRAINING || q->qstate == KQINIT) { | |
fa7499cc | 187 | pthread_cond_wait(&q->cond, &q->lock); |
188 | } | |
189 | pthread_cleanup_pop(0); | |
190 | ||
191 | /* If filling or full, somebody else got it, skip */ | |
192 | if (q->qstate != KQEMPTY) { | |
193 | pthread_mutex_unlock(&q->lock); | |
fa7499cc | 194 | continue; |
195 | } | |
196 | ||
197 | /* | |
198 | * Empty, let's fill it. | |
199 | * Queue lock is relinquished while we do this so others | |
200 | * can see that it's being filled. | |
201 | */ | |
202 | q->qstate = KQFILLING; | |
203 | pthread_mutex_unlock(&q->lock); | |
204 | for (i = 0; i < KQLEN; i++) { | |
205 | AES_encrypt(q->ctr, q->keys[i], &key); | |
206 | ssh_ctr_inc(q->ctr, AES_BLOCK_SIZE); | |
207 | } | |
208 | ||
209 | /* Re-lock, mark full and signal consumer */ | |
210 | pthread_mutex_lock(&q->lock); | |
211 | ssh_ctr_add(q->ctr, KQLEN * (NUMKQ - 1), AES_BLOCK_SIZE); | |
212 | q->qstate = KQFULL; | |
fa7499cc | 213 | pthread_cond_signal(&q->cond); |
214 | pthread_mutex_unlock(&q->lock); | |
215 | } | |
216 | ||
fa7499cc | 217 | return NULL; |
218 | } | |
219 | ||
220 | static int | |
221 | ssh_aes_ctr(EVP_CIPHER_CTX *ctx, u_char *dest, const u_char *src, | |
222 | u_int len) | |
223 | { | |
224 | struct ssh_aes_ctr_ctx *c; | |
225 | struct kq *q, *oldq; | |
226 | int ridx; | |
227 | u_char *buf; | |
228 | ||
229 | if (len == 0) | |
230 | return (1); | |
231 | if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) == NULL) | |
232 | return (0); | |
233 | ||
234 | q = &c->q[c->qidx]; | |
235 | ridx = c->ridx; | |
236 | ||
237 | /* src already padded to block multiple */ | |
238 | while (len > 0) { | |
239 | buf = q->keys[ridx]; | |
240 | ||
241 | #ifdef CIPHER_BYTE_XOR | |
242 | dest[0] = src[0] ^ buf[0]; | |
243 | dest[1] = src[1] ^ buf[1]; | |
244 | dest[2] = src[2] ^ buf[2]; | |
245 | dest[3] = src[3] ^ buf[3]; | |
246 | dest[4] = src[4] ^ buf[4]; | |
247 | dest[5] = src[5] ^ buf[5]; | |
248 | dest[6] = src[6] ^ buf[6]; | |
249 | dest[7] = src[7] ^ buf[7]; | |
250 | dest[8] = src[8] ^ buf[8]; | |
251 | dest[9] = src[9] ^ buf[9]; | |
252 | dest[10] = src[10] ^ buf[10]; | |
253 | dest[11] = src[11] ^ buf[11]; | |
254 | dest[12] = src[12] ^ buf[12]; | |
255 | dest[13] = src[13] ^ buf[13]; | |
256 | dest[14] = src[14] ^ buf[14]; | |
257 | dest[15] = src[15] ^ buf[15]; | |
258 | #else | |
259 | *(uint64_t *)dest = *(uint64_t *)src ^ *(uint64_t *)buf; | |
260 | *(uint64_t *)(dest + 8) = *(uint64_t *)(src + 8) ^ | |
261 | *(uint64_t *)(buf + 8); | |
262 | #endif | |
263 | ||
264 | dest += 16; | |
265 | src += 16; | |
266 | len -= 16; | |
267 | ssh_ctr_inc(ctx->iv, AES_BLOCK_SIZE); | |
268 | ||
269 | /* Increment read index, switch queues on rollover */ | |
270 | if ((ridx = (ridx + 1) % KQLEN) == 0) { | |
271 | oldq = q; | |
272 | ||
273 | /* Mark next queue draining, may need to wait */ | |
274 | c->qidx = (c->qidx + 1) % NUMKQ; | |
275 | q = &c->q[c->qidx]; | |
276 | pthread_mutex_lock(&q->lock); | |
277 | while (q->qstate != KQFULL) { | |
fa7499cc | 278 | pthread_cond_wait(&q->cond, &q->lock); |
279 | } | |
280 | q->qstate = KQDRAINING; | |
281 | pthread_mutex_unlock(&q->lock); | |
282 | ||
283 | /* Mark consumed queue empty and signal producers */ | |
284 | pthread_mutex_lock(&oldq->lock); | |
285 | oldq->qstate = KQEMPTY; | |
fa7499cc | 286 | pthread_cond_broadcast(&oldq->cond); |
287 | pthread_mutex_unlock(&oldq->lock); | |
288 | } | |
289 | } | |
290 | c->ridx = ridx; | |
291 | return (1); | |
292 | } | |
293 | ||
294 | #define HAVE_NONE 0 | |
295 | #define HAVE_KEY 1 | |
296 | #define HAVE_IV 2 | |
297 | static int | |
298 | ssh_aes_ctr_init(EVP_CIPHER_CTX *ctx, const u_char *key, const u_char *iv, | |
299 | int enc) | |
300 | { | |
301 | struct ssh_aes_ctr_ctx *c; | |
302 | int i; | |
303 | ||
304 | if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) == NULL) { | |
305 | c = xmalloc(sizeof(*c)); | |
306 | ||
307 | c->state = HAVE_NONE; | |
308 | for (i = 0; i < NUMKQ; i++) { | |
309 | pthread_mutex_init(&c->q[i].lock, NULL); | |
310 | pthread_cond_init(&c->q[i].cond, NULL); | |
311 | } | |
312 | ||
fa7499cc | 313 | EVP_CIPHER_CTX_set_app_data(ctx, c); |
314 | } | |
315 | ||
316 | if (c->state == (HAVE_KEY | HAVE_IV)) { | |
317 | /* Cancel pregen threads */ | |
318 | for (i = 0; i < CIPHER_THREADS; i++) | |
319 | pthread_cancel(c->tid[i]); | |
320 | for (i = 0; i < CIPHER_THREADS; i++) | |
321 | pthread_join(c->tid[i], NULL); | |
322 | /* Start over getting key & iv */ | |
323 | c->state = HAVE_NONE; | |
324 | } | |
325 | ||
326 | if (key != NULL) { | |
327 | AES_set_encrypt_key(key, EVP_CIPHER_CTX_key_length(ctx) * 8, | |
328 | &c->aes_ctx); | |
329 | c->state |= HAVE_KEY; | |
330 | } | |
331 | ||
332 | if (iv != NULL) { | |
333 | memcpy(ctx->iv, iv, AES_BLOCK_SIZE); | |
334 | c->state |= HAVE_IV; | |
335 | } | |
336 | ||
337 | if (c->state == (HAVE_KEY | HAVE_IV)) { | |
338 | /* Clear queues */ | |
339 | memcpy(c->q[0].ctr, ctx->iv, AES_BLOCK_SIZE); | |
340 | c->q[0].qstate = KQINIT; | |
341 | for (i = 1; i < NUMKQ; i++) { | |
342 | memcpy(c->q[i].ctr, ctx->iv, AES_BLOCK_SIZE); | |
343 | ssh_ctr_add(c->q[i].ctr, i * KQLEN, AES_BLOCK_SIZE); | |
344 | c->q[i].qstate = KQEMPTY; | |
345 | } | |
346 | c->qidx = 0; | |
347 | c->ridx = 0; | |
348 | ||
349 | /* Start threads */ | |
350 | for (i = 0; i < CIPHER_THREADS; i++) { | |
351 | pthread_create(&c->tid[i], NULL, thread_loop, c); | |
352 | } | |
353 | pthread_mutex_lock(&c->q[0].lock); | |
354 | while (c->q[0].qstate != KQDRAINING) | |
355 | pthread_cond_wait(&c->q[0].cond, &c->q[0].lock); | |
356 | pthread_mutex_unlock(&c->q[0].lock); | |
357 | ||
358 | } | |
359 | return (1); | |
360 | } | |
361 | ||
362 | static int | |
363 | ssh_aes_ctr_cleanup(EVP_CIPHER_CTX *ctx) | |
364 | { | |
365 | struct ssh_aes_ctr_ctx *c; | |
366 | int i; | |
367 | ||
368 | if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) != NULL) { | |
fa7499cc | 369 | /* Cancel pregen threads */ |
370 | for (i = 0; i < CIPHER_THREADS; i++) | |
371 | pthread_cancel(c->tid[i]); | |
372 | for (i = 0; i < CIPHER_THREADS; i++) | |
373 | pthread_join(c->tid[i], NULL); | |
374 | ||
375 | memset(c, 0, sizeof(*c)); | |
376 | xfree(c); | |
377 | EVP_CIPHER_CTX_set_app_data(ctx, NULL); | |
378 | } | |
379 | return (1); | |
380 | } | |
381 | ||
382 | /* <friedl> */ | |
383 | const EVP_CIPHER * | |
384 | evp_aes_ctr_mt(void) | |
385 | { | |
386 | static EVP_CIPHER aes_ctr; | |
387 | ||
388 | memset(&aes_ctr, 0, sizeof(EVP_CIPHER)); | |
389 | aes_ctr.nid = NID_undef; | |
390 | aes_ctr.block_size = AES_BLOCK_SIZE; | |
391 | aes_ctr.iv_len = AES_BLOCK_SIZE; | |
392 | aes_ctr.key_len = 16; | |
393 | aes_ctr.init = ssh_aes_ctr_init; | |
394 | aes_ctr.cleanup = ssh_aes_ctr_cleanup; | |
395 | aes_ctr.do_cipher = ssh_aes_ctr; | |
396 | #ifndef SSH_OLD_EVP | |
397 | aes_ctr.flags = EVP_CIPH_CBC_MODE | EVP_CIPH_VARIABLE_LENGTH | | |
398 | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CUSTOM_IV; | |
399 | #endif | |
400 | return (&aes_ctr); | |
401 | } |