]> andersk Git - openssh.git/blame - rijndael.c
- stevesk@cvs.openbsd.org 2001/09/12 18:18:25
[openssh.git] / rijndael.c
CommitLineData
d6192346 1/* $OpenBSD: rijndael.c,v 1.8 2001/07/30 16:23:30 stevesk Exp $ */
6b523bae 2
3/* This is an independent implementation of the encryption algorithm: */
4/* */
5/* RIJNDAEL by Joan Daemen and Vincent Rijmen */
6/* */
7/* which is a candidate algorithm in the Advanced Encryption Standard */
8/* programme of the US National Institute of Standards and Technology. */
d6192346 9
10/*
11 -----------------------------------------------------------------------
12 Copyright (c) 2001 Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK
13
14 TERMS
15
16 Redistribution and use in source and binary forms, with or without
17 modification, are permitted provided that the following conditions
18 are met:
19 1. Redistributions of source code must retain the above copyright
20 notice, this list of conditions and the following disclaimer.
21 2. Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 This software is provided 'as is' with no guarantees of correctness or
26 fitness for purpose.
27 -----------------------------------------------------------------------
28*/
6b523bae 29
30/* Timing data for Rijndael (rijndael.c)
31
32Algorithm: rijndael (rijndael.c)
33
34128 bit key:
35Key Setup: 305/1389 cycles (encrypt/decrypt)
36Encrypt: 374 cycles = 68.4 mbits/sec
37Decrypt: 352 cycles = 72.7 mbits/sec
38Mean: 363 cycles = 70.5 mbits/sec
39
40192 bit key:
41Key Setup: 277/1595 cycles (encrypt/decrypt)
42Encrypt: 439 cycles = 58.3 mbits/sec
43Decrypt: 425 cycles = 60.2 mbits/sec
44Mean: 432 cycles = 59.3 mbits/sec
45
46256 bit key:
47Key Setup: 374/1960 cycles (encrypt/decrypt)
48Encrypt: 502 cycles = 51.0 mbits/sec
49Decrypt: 498 cycles = 51.4 mbits/sec
50Mean: 500 cycles = 51.2 mbits/sec
51
52*/
94ec8c6b 53
6bcf7caa 54#include "config.h"
94ec8c6b 55#include "rijndael.h"
56
6b523bae 57void gen_tabs __P((void));
58
59/* 3. Basic macros for speeding up generic operations */
60
61/* Circular rotate of 32 bit values */
62
63#define rotr(x,n) (((x) >> ((int)(n))) | ((x) << (32 - (int)(n))))
64#define rotl(x,n) (((x) << ((int)(n))) | ((x) >> (32 - (int)(n))))
65
66/* Invert byte order in a 32 bit variable */
67
68#define bswap(x) ((rotl(x, 8) & 0x00ff00ff) | (rotr(x, 8) & 0xff00ff00))
69
2b87da3b 70/* Extract byte from a 32 bit quantity (little endian notation) */
6b523bae 71
72#define byte(x,n) ((u1byte)((x) >> (8 * n)))
73
cf0c5df5 74#ifdef WORDS_BIGENDIAN
6b523bae 75#define BYTE_SWAP
76#endif
77
78#ifdef BYTE_SWAP
79#define io_swap(x) bswap(x)
80#else
81#define io_swap(x) (x)
82#endif
83
84#define LARGE_TABLES
85
86u1byte pow_tab[256];
87u1byte log_tab[256];
88u1byte sbx_tab[256];
89u1byte isb_tab[256];
90u4byte rco_tab[ 10];
91u4byte ft_tab[4][256];
92u4byte it_tab[4][256];
93
94#ifdef LARGE_TABLES
95 u4byte fl_tab[4][256];
96 u4byte il_tab[4][256];
97#endif
98
99u4byte tab_gen = 0;
100
101#define ff_mult(a,b) (a && b ? pow_tab[(log_tab[a] + log_tab[b]) % 255] : 0)
102
103#define f_rn(bo, bi, n, k) \
104 bo[n] = ft_tab[0][byte(bi[n],0)] ^ \
2b87da3b 105 ft_tab[1][byte(bi[(n + 1) & 3],1)] ^ \
106 ft_tab[2][byte(bi[(n + 2) & 3],2)] ^ \
107 ft_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n)
6b523bae 108
109#define i_rn(bo, bi, n, k) \
110 bo[n] = it_tab[0][byte(bi[n],0)] ^ \
2b87da3b 111 it_tab[1][byte(bi[(n + 3) & 3],1)] ^ \
112 it_tab[2][byte(bi[(n + 2) & 3],2)] ^ \
113 it_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n)
6b523bae 114
115#ifdef LARGE_TABLES
116
117#define ls_box(x) \
118 ( fl_tab[0][byte(x, 0)] ^ \
119 fl_tab[1][byte(x, 1)] ^ \
120 fl_tab[2][byte(x, 2)] ^ \
121 fl_tab[3][byte(x, 3)] )
122
123#define f_rl(bo, bi, n, k) \
124 bo[n] = fl_tab[0][byte(bi[n],0)] ^ \
2b87da3b 125 fl_tab[1][byte(bi[(n + 1) & 3],1)] ^ \
126 fl_tab[2][byte(bi[(n + 2) & 3],2)] ^ \
127 fl_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n)
6b523bae 128
129#define i_rl(bo, bi, n, k) \
130 bo[n] = il_tab[0][byte(bi[n],0)] ^ \
2b87da3b 131 il_tab[1][byte(bi[(n + 3) & 3],1)] ^ \
132 il_tab[2][byte(bi[(n + 2) & 3],2)] ^ \
133 il_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n)
6b523bae 134
135#else
136
137#define ls_box(x) \
138 ((u4byte)sbx_tab[byte(x, 0)] << 0) ^ \
139 ((u4byte)sbx_tab[byte(x, 1)] << 8) ^ \
140 ((u4byte)sbx_tab[byte(x, 2)] << 16) ^ \
141 ((u4byte)sbx_tab[byte(x, 3)] << 24)
142
143#define f_rl(bo, bi, n, k) \
144 bo[n] = (u4byte)sbx_tab[byte(bi[n],0)] ^ \
2b87da3b 145 rotl(((u4byte)sbx_tab[byte(bi[(n + 1) & 3],1)]), 8) ^ \
146 rotl(((u4byte)sbx_tab[byte(bi[(n + 2) & 3],2)]), 16) ^ \
147 rotl(((u4byte)sbx_tab[byte(bi[(n + 3) & 3],3)]), 24) ^ *(k + n)
6b523bae 148
149#define i_rl(bo, bi, n, k) \
150 bo[n] = (u4byte)isb_tab[byte(bi[n],0)] ^ \
2b87da3b 151 rotl(((u4byte)isb_tab[byte(bi[(n + 3) & 3],1)]), 8) ^ \
152 rotl(((u4byte)isb_tab[byte(bi[(n + 2) & 3],2)]), 16) ^ \
153 rotl(((u4byte)isb_tab[byte(bi[(n + 1) & 3],3)]), 24) ^ *(k + n)
6b523bae 154
155#endif
156
157void
158gen_tabs(void)
94ec8c6b 159{
6b523bae 160 u4byte i, t;
161 u1byte p, q;
162
163 /* log and power tables for GF(2**8) finite field with */
164 /* 0x11b as modular polynomial - the simplest prmitive */
165 /* root is 0x11, used here to generate the tables */
166
167 for(i = 0,p = 1; i < 256; ++i) {
168 pow_tab[i] = (u1byte)p; log_tab[p] = (u1byte)i;
169
170 p = p ^ (p << 1) ^ (p & 0x80 ? 0x01b : 0);
94ec8c6b 171 }
6b523bae 172
173 log_tab[1] = 0; p = 1;
174
175 for(i = 0; i < 10; ++i) {
2b87da3b 176 rco_tab[i] = p;
6b523bae 177
178 p = (p << 1) ^ (p & 0x80 ? 0x1b : 0);
94ec8c6b 179 }
94ec8c6b 180
6b523bae 181 /* note that the affine byte transformation matrix in */
182 /* rijndael specification is in big endian format with */
183 /* bit 0 as the most significant bit. In the remainder */
184 /* of the specification the bits are numbered from the */
185 /* least significant end of a byte. */
186
187 for(i = 0; i < 256; ++i) {
2b87da3b 188 p = (i ? pow_tab[255 - log_tab[i]] : 0); q = p;
189 q = (q >> 7) | (q << 1); p ^= q;
190 q = (q >> 7) | (q << 1); p ^= q;
191 q = (q >> 7) | (q << 1); p ^= q;
192 q = (q >> 7) | (q << 1); p ^= q ^ 0x63;
6b523bae 193 sbx_tab[i] = (u1byte)p; isb_tab[p] = (u1byte)i;
94ec8c6b 194 }
6b523bae 195
196 for(i = 0; i < 256; ++i) {
2b87da3b 197 p = sbx_tab[i];
198
199#ifdef LARGE_TABLES
6b523bae 200
6b523bae 201 t = p; fl_tab[0][i] = t;
202 fl_tab[1][i] = rotl(t, 8);
203 fl_tab[2][i] = rotl(t, 16);
204 fl_tab[3][i] = rotl(t, 24);
205#endif
206 t = ((u4byte)ff_mult(2, p)) |
207 ((u4byte)p << 8) |
208 ((u4byte)p << 16) |
209 ((u4byte)ff_mult(3, p) << 24);
2b87da3b 210
6b523bae 211 ft_tab[0][i] = t;
212 ft_tab[1][i] = rotl(t, 8);
213 ft_tab[2][i] = rotl(t, 16);
214 ft_tab[3][i] = rotl(t, 24);
215
2b87da3b 216 p = isb_tab[i];
6b523bae 217
2b87da3b 218#ifdef LARGE_TABLES
219
220 t = p; il_tab[0][i] = t;
221 il_tab[1][i] = rotl(t, 8);
222 il_tab[2][i] = rotl(t, 16);
6b523bae 223 il_tab[3][i] = rotl(t, 24);
2b87da3b 224#endif
6b523bae 225 t = ((u4byte)ff_mult(14, p)) |
226 ((u4byte)ff_mult( 9, p) << 8) |
227 ((u4byte)ff_mult(13, p) << 16) |
228 ((u4byte)ff_mult(11, p) << 24);
2b87da3b 229
230 it_tab[0][i] = t;
231 it_tab[1][i] = rotl(t, 8);
232 it_tab[2][i] = rotl(t, 16);
233 it_tab[3][i] = rotl(t, 24);
94ec8c6b 234 }
6b523bae 235
236 tab_gen = 1;
9ea53ba5 237}
94ec8c6b 238
6b523bae 239#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
240
241#define imix_col(y,x) \
242 u = star_x(x); \
243 v = star_x(u); \
244 w = star_x(v); \
245 t = w ^ (x); \
246 (y) = u ^ v ^ w; \
247 (y) ^= rotr(u ^ t, 8) ^ \
2b87da3b 248 rotr(v ^ t, 16) ^ \
249 rotr(t,24)
6b523bae 250
251/* initialise the key schedule from the user supplied key */
252
253#define loop4(i) \
254{ t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \
255 t ^= e_key[4 * i]; e_key[4 * i + 4] = t; \
256 t ^= e_key[4 * i + 1]; e_key[4 * i + 5] = t; \
257 t ^= e_key[4 * i + 2]; e_key[4 * i + 6] = t; \
258 t ^= e_key[4 * i + 3]; e_key[4 * i + 7] = t; \
259}
260
261#define loop6(i) \
262{ t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \
263 t ^= e_key[6 * i]; e_key[6 * i + 6] = t; \
264 t ^= e_key[6 * i + 1]; e_key[6 * i + 7] = t; \
265 t ^= e_key[6 * i + 2]; e_key[6 * i + 8] = t; \
266 t ^= e_key[6 * i + 3]; e_key[6 * i + 9] = t; \
267 t ^= e_key[6 * i + 4]; e_key[6 * i + 10] = t; \
268 t ^= e_key[6 * i + 5]; e_key[6 * i + 11] = t; \
269}
270
271#define loop8(i) \
272{ t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \
273 t ^= e_key[8 * i]; e_key[8 * i + 8] = t; \
274 t ^= e_key[8 * i + 1]; e_key[8 * i + 9] = t; \
275 t ^= e_key[8 * i + 2]; e_key[8 * i + 10] = t; \
276 t ^= e_key[8 * i + 3]; e_key[8 * i + 11] = t; \
277 t = e_key[8 * i + 4] ^ ls_box(t); \
278 e_key[8 * i + 12] = t; \
279 t ^= e_key[8 * i + 5]; e_key[8 * i + 13] = t; \
280 t ^= e_key[8 * i + 6]; e_key[8 * i + 14] = t; \
281 t ^= e_key[8 * i + 7]; e_key[8 * i + 15] = t; \
282}
283
284rijndael_ctx *
285rijndael_set_key(rijndael_ctx *ctx, const u4byte *in_key, const u4byte key_len,
286 int encrypt)
2b87da3b 287{
6b523bae 288 u4byte i, t, u, v, w;
289 u4byte *e_key = ctx->e_key;
290 u4byte *d_key = ctx->d_key;
291
292 ctx->decrypt = !encrypt;
293
294 if(!tab_gen)
295 gen_tabs();
296
297 ctx->k_len = (key_len + 31) / 32;
298
299 e_key[0] = io_swap(in_key[0]); e_key[1] = io_swap(in_key[1]);
300 e_key[2] = io_swap(in_key[2]); e_key[3] = io_swap(in_key[3]);
2b87da3b 301
6b523bae 302 switch(ctx->k_len) {
2b87da3b 303 case 4: t = e_key[3];
304 for(i = 0; i < 10; ++i)
6b523bae 305 loop4(i);
2b87da3b 306 break;
6b523bae 307
2b87da3b 308 case 6: e_key[4] = io_swap(in_key[4]); t = e_key[5] = io_swap(in_key[5]);
309 for(i = 0; i < 8; ++i)
6b523bae 310 loop6(i);
2b87da3b 311 break;
6b523bae 312
2b87da3b 313 case 8: e_key[4] = io_swap(in_key[4]); e_key[5] = io_swap(in_key[5]);
314 e_key[6] = io_swap(in_key[6]); t = e_key[7] = io_swap(in_key[7]);
315 for(i = 0; i < 7; ++i)
6b523bae 316 loop8(i);
2b87da3b 317 break;
6b523bae 318 }
319
320 if (!encrypt) {
321 d_key[0] = e_key[0]; d_key[1] = e_key[1];
322 d_key[2] = e_key[2]; d_key[3] = e_key[3];
323
324 for(i = 4; i < 4 * ctx->k_len + 24; ++i) {
325 imix_col(d_key[i], e_key[i]);
326 }
94ec8c6b 327 }
6b523bae 328
329 return ctx;
dfe89252 330}
94ec8c6b 331
6b523bae 332/* encrypt a block of text */
333
334#define f_nround(bo, bi, k) \
335 f_rn(bo, bi, 0, k); \
336 f_rn(bo, bi, 1, k); \
337 f_rn(bo, bi, 2, k); \
338 f_rn(bo, bi, 3, k); \
339 k += 4
340
341#define f_lround(bo, bi, k) \
342 f_rl(bo, bi, 0, k); \
343 f_rl(bo, bi, 1, k); \
344 f_rl(bo, bi, 2, k); \
345 f_rl(bo, bi, 3, k)
346
347void
348rijndael_encrypt(rijndael_ctx *ctx, const u4byte *in_blk, u4byte *out_blk)
2b87da3b 349{
6b523bae 350 u4byte k_len = ctx->k_len;
351 u4byte *e_key = ctx->e_key;
352 u4byte b0[4], b1[4], *kp;
353
354 b0[0] = io_swap(in_blk[0]) ^ e_key[0];
355 b0[1] = io_swap(in_blk[1]) ^ e_key[1];
356 b0[2] = io_swap(in_blk[2]) ^ e_key[2];
357 b0[3] = io_swap(in_blk[3]) ^ e_key[3];
358
359 kp = e_key + 4;
360
361 if(k_len > 6) {
362 f_nround(b1, b0, kp); f_nround(b0, b1, kp);
363 }
364
365 if(k_len > 4) {
366 f_nround(b1, b0, kp); f_nround(b0, b1, kp);
367 }
368
369 f_nround(b1, b0, kp); f_nround(b0, b1, kp);
370 f_nround(b1, b0, kp); f_nround(b0, b1, kp);
371 f_nround(b1, b0, kp); f_nround(b0, b1, kp);
372 f_nround(b1, b0, kp); f_nround(b0, b1, kp);
373 f_nround(b1, b0, kp); f_lround(b0, b1, kp);
374
375 out_blk[0] = io_swap(b0[0]); out_blk[1] = io_swap(b0[1]);
376 out_blk[2] = io_swap(b0[2]); out_blk[3] = io_swap(b0[3]);
377}
378
379/* decrypt a block of text */
380
381#define i_nround(bo, bi, k) \
382 i_rn(bo, bi, 0, k); \
383 i_rn(bo, bi, 1, k); \
384 i_rn(bo, bi, 2, k); \
385 i_rn(bo, bi, 3, k); \
386 k -= 4
387
388#define i_lround(bo, bi, k) \
389 i_rl(bo, bi, 0, k); \
390 i_rl(bo, bi, 1, k); \
391 i_rl(bo, bi, 2, k); \
392 i_rl(bo, bi, 3, k)
393
394void
395rijndael_decrypt(rijndael_ctx *ctx, const u4byte *in_blk, u4byte *out_blk)
2b87da3b 396{
6b523bae 397 u4byte b0[4], b1[4], *kp;
398 u4byte k_len = ctx->k_len;
399 u4byte *e_key = ctx->e_key;
400 u4byte *d_key = ctx->d_key;
401
402 b0[0] = io_swap(in_blk[0]) ^ e_key[4 * k_len + 24];
403 b0[1] = io_swap(in_blk[1]) ^ e_key[4 * k_len + 25];
404 b0[2] = io_swap(in_blk[2]) ^ e_key[4 * k_len + 26];
405 b0[3] = io_swap(in_blk[3]) ^ e_key[4 * k_len + 27];
406
407 kp = d_key + 4 * (k_len + 5);
408
409 if(k_len > 6) {
410 i_nround(b1, b0, kp); i_nround(b0, b1, kp);
411 }
412
413 if(k_len > 4) {
414 i_nround(b1, b0, kp); i_nround(b0, b1, kp);
415 }
416
417 i_nround(b1, b0, kp); i_nround(b0, b1, kp);
418 i_nround(b1, b0, kp); i_nround(b0, b1, kp);
419 i_nround(b1, b0, kp); i_nround(b0, b1, kp);
420 i_nround(b1, b0, kp); i_nround(b0, b1, kp);
421 i_nround(b1, b0, kp); i_lround(b0, b1, kp);
422
423 out_blk[0] = io_swap(b0[0]); out_blk[1] = io_swap(b0[1]);
424 out_blk[2] = io_swap(b0[2]); out_blk[3] = io_swap(b0[3]);
9ea53ba5 425}
This page took 0.206301 seconds and 5 git commands to generate.