]>
Commit | Line | Data |
---|---|---|
b35eb612 | 1 | /* $OpenBSD: rijndael.c,v 1.7 2001/02/04 15:32:24 stevesk Exp $ */ |
6b523bae | 2 | |
3 | /* This is an independent implementation of the encryption algorithm: */ | |
4 | /* */ | |
5 | /* RIJNDAEL by Joan Daemen and Vincent Rijmen */ | |
6 | /* */ | |
7 | /* which is a candidate algorithm in the Advanced Encryption Standard */ | |
8 | /* programme of the US National Institute of Standards and Technology. */ | |
9 | /* */ | |
10 | /* Copyright in this implementation is held by Dr B R Gladman but I */ | |
11 | /* hereby give permission for its free direct or derivative use subject */ | |
12 | /* to acknowledgment of its origin and compliance with any conditions */ | |
13 | /* that the originators of the algorithm place on its exploitation. */ | |
14 | /* */ | |
15 | /* Dr Brian Gladman (gladman@seven77.demon.co.uk) 14th January 1999 */ | |
16 | ||
17 | /* Timing data for Rijndael (rijndael.c) | |
18 | ||
19 | Algorithm: rijndael (rijndael.c) | |
20 | ||
21 | 128 bit key: | |
22 | Key Setup: 305/1389 cycles (encrypt/decrypt) | |
23 | Encrypt: 374 cycles = 68.4 mbits/sec | |
24 | Decrypt: 352 cycles = 72.7 mbits/sec | |
25 | Mean: 363 cycles = 70.5 mbits/sec | |
26 | ||
27 | 192 bit key: | |
28 | Key Setup: 277/1595 cycles (encrypt/decrypt) | |
29 | Encrypt: 439 cycles = 58.3 mbits/sec | |
30 | Decrypt: 425 cycles = 60.2 mbits/sec | |
31 | Mean: 432 cycles = 59.3 mbits/sec | |
32 | ||
33 | 256 bit key: | |
34 | Key Setup: 374/1960 cycles (encrypt/decrypt) | |
35 | Encrypt: 502 cycles = 51.0 mbits/sec | |
36 | Decrypt: 498 cycles = 51.4 mbits/sec | |
37 | Mean: 500 cycles = 51.2 mbits/sec | |
38 | ||
39 | */ | |
94ec8c6b | 40 | |
6bcf7caa | 41 | #include "config.h" |
94ec8c6b | 42 | #include "rijndael.h" |
43 | ||
6b523bae | 44 | void gen_tabs __P((void)); |
45 | ||
46 | /* 3. Basic macros for speeding up generic operations */ | |
47 | ||
48 | /* Circular rotate of 32 bit values */ | |
49 | ||
50 | #define rotr(x,n) (((x) >> ((int)(n))) | ((x) << (32 - (int)(n)))) | |
51 | #define rotl(x,n) (((x) << ((int)(n))) | ((x) >> (32 - (int)(n)))) | |
52 | ||
53 | /* Invert byte order in a 32 bit variable */ | |
54 | ||
55 | #define bswap(x) ((rotl(x, 8) & 0x00ff00ff) | (rotr(x, 8) & 0xff00ff00)) | |
56 | ||
2b87da3b | 57 | /* Extract byte from a 32 bit quantity (little endian notation) */ |
6b523bae | 58 | |
59 | #define byte(x,n) ((u1byte)((x) >> (8 * n))) | |
60 | ||
cf0c5df5 | 61 | #ifdef WORDS_BIGENDIAN |
6b523bae | 62 | #define BYTE_SWAP |
63 | #endif | |
64 | ||
65 | #ifdef BYTE_SWAP | |
66 | #define io_swap(x) bswap(x) | |
67 | #else | |
68 | #define io_swap(x) (x) | |
69 | #endif | |
70 | ||
71 | #define LARGE_TABLES | |
72 | ||
73 | u1byte pow_tab[256]; | |
74 | u1byte log_tab[256]; | |
75 | u1byte sbx_tab[256]; | |
76 | u1byte isb_tab[256]; | |
77 | u4byte rco_tab[ 10]; | |
78 | u4byte ft_tab[4][256]; | |
79 | u4byte it_tab[4][256]; | |
80 | ||
81 | #ifdef LARGE_TABLES | |
82 | u4byte fl_tab[4][256]; | |
83 | u4byte il_tab[4][256]; | |
84 | #endif | |
85 | ||
86 | u4byte tab_gen = 0; | |
87 | ||
88 | #define ff_mult(a,b) (a && b ? pow_tab[(log_tab[a] + log_tab[b]) % 255] : 0) | |
89 | ||
90 | #define f_rn(bo, bi, n, k) \ | |
91 | bo[n] = ft_tab[0][byte(bi[n],0)] ^ \ | |
2b87da3b | 92 | ft_tab[1][byte(bi[(n + 1) & 3],1)] ^ \ |
93 | ft_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ | |
94 | ft_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n) | |
6b523bae | 95 | |
96 | #define i_rn(bo, bi, n, k) \ | |
97 | bo[n] = it_tab[0][byte(bi[n],0)] ^ \ | |
2b87da3b | 98 | it_tab[1][byte(bi[(n + 3) & 3],1)] ^ \ |
99 | it_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ | |
100 | it_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n) | |
6b523bae | 101 | |
102 | #ifdef LARGE_TABLES | |
103 | ||
104 | #define ls_box(x) \ | |
105 | ( fl_tab[0][byte(x, 0)] ^ \ | |
106 | fl_tab[1][byte(x, 1)] ^ \ | |
107 | fl_tab[2][byte(x, 2)] ^ \ | |
108 | fl_tab[3][byte(x, 3)] ) | |
109 | ||
110 | #define f_rl(bo, bi, n, k) \ | |
111 | bo[n] = fl_tab[0][byte(bi[n],0)] ^ \ | |
2b87da3b | 112 | fl_tab[1][byte(bi[(n + 1) & 3],1)] ^ \ |
113 | fl_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ | |
114 | fl_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n) | |
6b523bae | 115 | |
116 | #define i_rl(bo, bi, n, k) \ | |
117 | bo[n] = il_tab[0][byte(bi[n],0)] ^ \ | |
2b87da3b | 118 | il_tab[1][byte(bi[(n + 3) & 3],1)] ^ \ |
119 | il_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ | |
120 | il_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n) | |
6b523bae | 121 | |
122 | #else | |
123 | ||
124 | #define ls_box(x) \ | |
125 | ((u4byte)sbx_tab[byte(x, 0)] << 0) ^ \ | |
126 | ((u4byte)sbx_tab[byte(x, 1)] << 8) ^ \ | |
127 | ((u4byte)sbx_tab[byte(x, 2)] << 16) ^ \ | |
128 | ((u4byte)sbx_tab[byte(x, 3)] << 24) | |
129 | ||
130 | #define f_rl(bo, bi, n, k) \ | |
131 | bo[n] = (u4byte)sbx_tab[byte(bi[n],0)] ^ \ | |
2b87da3b | 132 | rotl(((u4byte)sbx_tab[byte(bi[(n + 1) & 3],1)]), 8) ^ \ |
133 | rotl(((u4byte)sbx_tab[byte(bi[(n + 2) & 3],2)]), 16) ^ \ | |
134 | rotl(((u4byte)sbx_tab[byte(bi[(n + 3) & 3],3)]), 24) ^ *(k + n) | |
6b523bae | 135 | |
136 | #define i_rl(bo, bi, n, k) \ | |
137 | bo[n] = (u4byte)isb_tab[byte(bi[n],0)] ^ \ | |
2b87da3b | 138 | rotl(((u4byte)isb_tab[byte(bi[(n + 3) & 3],1)]), 8) ^ \ |
139 | rotl(((u4byte)isb_tab[byte(bi[(n + 2) & 3],2)]), 16) ^ \ | |
140 | rotl(((u4byte)isb_tab[byte(bi[(n + 1) & 3],3)]), 24) ^ *(k + n) | |
6b523bae | 141 | |
142 | #endif | |
143 | ||
144 | void | |
145 | gen_tabs(void) | |
94ec8c6b | 146 | { |
6b523bae | 147 | u4byte i, t; |
148 | u1byte p, q; | |
149 | ||
150 | /* log and power tables for GF(2**8) finite field with */ | |
151 | /* 0x11b as modular polynomial - the simplest prmitive */ | |
152 | /* root is 0x11, used here to generate the tables */ | |
153 | ||
154 | for(i = 0,p = 1; i < 256; ++i) { | |
155 | pow_tab[i] = (u1byte)p; log_tab[p] = (u1byte)i; | |
156 | ||
157 | p = p ^ (p << 1) ^ (p & 0x80 ? 0x01b : 0); | |
94ec8c6b | 158 | } |
6b523bae | 159 | |
160 | log_tab[1] = 0; p = 1; | |
161 | ||
162 | for(i = 0; i < 10; ++i) { | |
2b87da3b | 163 | rco_tab[i] = p; |
6b523bae | 164 | |
165 | p = (p << 1) ^ (p & 0x80 ? 0x1b : 0); | |
94ec8c6b | 166 | } |
94ec8c6b | 167 | |
6b523bae | 168 | /* note that the affine byte transformation matrix in */ |
169 | /* rijndael specification is in big endian format with */ | |
170 | /* bit 0 as the most significant bit. In the remainder */ | |
171 | /* of the specification the bits are numbered from the */ | |
172 | /* least significant end of a byte. */ | |
173 | ||
174 | for(i = 0; i < 256; ++i) { | |
2b87da3b | 175 | p = (i ? pow_tab[255 - log_tab[i]] : 0); q = p; |
176 | q = (q >> 7) | (q << 1); p ^= q; | |
177 | q = (q >> 7) | (q << 1); p ^= q; | |
178 | q = (q >> 7) | (q << 1); p ^= q; | |
179 | q = (q >> 7) | (q << 1); p ^= q ^ 0x63; | |
6b523bae | 180 | sbx_tab[i] = (u1byte)p; isb_tab[p] = (u1byte)i; |
94ec8c6b | 181 | } |
6b523bae | 182 | |
183 | for(i = 0; i < 256; ++i) { | |
2b87da3b | 184 | p = sbx_tab[i]; |
185 | ||
186 | #ifdef LARGE_TABLES | |
6b523bae | 187 | |
6b523bae | 188 | t = p; fl_tab[0][i] = t; |
189 | fl_tab[1][i] = rotl(t, 8); | |
190 | fl_tab[2][i] = rotl(t, 16); | |
191 | fl_tab[3][i] = rotl(t, 24); | |
192 | #endif | |
193 | t = ((u4byte)ff_mult(2, p)) | | |
194 | ((u4byte)p << 8) | | |
195 | ((u4byte)p << 16) | | |
196 | ((u4byte)ff_mult(3, p) << 24); | |
2b87da3b | 197 | |
6b523bae | 198 | ft_tab[0][i] = t; |
199 | ft_tab[1][i] = rotl(t, 8); | |
200 | ft_tab[2][i] = rotl(t, 16); | |
201 | ft_tab[3][i] = rotl(t, 24); | |
202 | ||
2b87da3b | 203 | p = isb_tab[i]; |
6b523bae | 204 | |
2b87da3b | 205 | #ifdef LARGE_TABLES |
206 | ||
207 | t = p; il_tab[0][i] = t; | |
208 | il_tab[1][i] = rotl(t, 8); | |
209 | il_tab[2][i] = rotl(t, 16); | |
6b523bae | 210 | il_tab[3][i] = rotl(t, 24); |
2b87da3b | 211 | #endif |
6b523bae | 212 | t = ((u4byte)ff_mult(14, p)) | |
213 | ((u4byte)ff_mult( 9, p) << 8) | | |
214 | ((u4byte)ff_mult(13, p) << 16) | | |
215 | ((u4byte)ff_mult(11, p) << 24); | |
2b87da3b | 216 | |
217 | it_tab[0][i] = t; | |
218 | it_tab[1][i] = rotl(t, 8); | |
219 | it_tab[2][i] = rotl(t, 16); | |
220 | it_tab[3][i] = rotl(t, 24); | |
94ec8c6b | 221 | } |
6b523bae | 222 | |
223 | tab_gen = 1; | |
9ea53ba5 | 224 | } |
94ec8c6b | 225 | |
6b523bae | 226 | #define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b) |
227 | ||
228 | #define imix_col(y,x) \ | |
229 | u = star_x(x); \ | |
230 | v = star_x(u); \ | |
231 | w = star_x(v); \ | |
232 | t = w ^ (x); \ | |
233 | (y) = u ^ v ^ w; \ | |
234 | (y) ^= rotr(u ^ t, 8) ^ \ | |
2b87da3b | 235 | rotr(v ^ t, 16) ^ \ |
236 | rotr(t,24) | |
6b523bae | 237 | |
238 | /* initialise the key schedule from the user supplied key */ | |
239 | ||
240 | #define loop4(i) \ | |
241 | { t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \ | |
242 | t ^= e_key[4 * i]; e_key[4 * i + 4] = t; \ | |
243 | t ^= e_key[4 * i + 1]; e_key[4 * i + 5] = t; \ | |
244 | t ^= e_key[4 * i + 2]; e_key[4 * i + 6] = t; \ | |
245 | t ^= e_key[4 * i + 3]; e_key[4 * i + 7] = t; \ | |
246 | } | |
247 | ||
248 | #define loop6(i) \ | |
249 | { t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \ | |
250 | t ^= e_key[6 * i]; e_key[6 * i + 6] = t; \ | |
251 | t ^= e_key[6 * i + 1]; e_key[6 * i + 7] = t; \ | |
252 | t ^= e_key[6 * i + 2]; e_key[6 * i + 8] = t; \ | |
253 | t ^= e_key[6 * i + 3]; e_key[6 * i + 9] = t; \ | |
254 | t ^= e_key[6 * i + 4]; e_key[6 * i + 10] = t; \ | |
255 | t ^= e_key[6 * i + 5]; e_key[6 * i + 11] = t; \ | |
256 | } | |
257 | ||
258 | #define loop8(i) \ | |
259 | { t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \ | |
260 | t ^= e_key[8 * i]; e_key[8 * i + 8] = t; \ | |
261 | t ^= e_key[8 * i + 1]; e_key[8 * i + 9] = t; \ | |
262 | t ^= e_key[8 * i + 2]; e_key[8 * i + 10] = t; \ | |
263 | t ^= e_key[8 * i + 3]; e_key[8 * i + 11] = t; \ | |
264 | t = e_key[8 * i + 4] ^ ls_box(t); \ | |
265 | e_key[8 * i + 12] = t; \ | |
266 | t ^= e_key[8 * i + 5]; e_key[8 * i + 13] = t; \ | |
267 | t ^= e_key[8 * i + 6]; e_key[8 * i + 14] = t; \ | |
268 | t ^= e_key[8 * i + 7]; e_key[8 * i + 15] = t; \ | |
269 | } | |
270 | ||
271 | rijndael_ctx * | |
272 | rijndael_set_key(rijndael_ctx *ctx, const u4byte *in_key, const u4byte key_len, | |
273 | int encrypt) | |
2b87da3b | 274 | { |
6b523bae | 275 | u4byte i, t, u, v, w; |
276 | u4byte *e_key = ctx->e_key; | |
277 | u4byte *d_key = ctx->d_key; | |
278 | ||
279 | ctx->decrypt = !encrypt; | |
280 | ||
281 | if(!tab_gen) | |
282 | gen_tabs(); | |
283 | ||
284 | ctx->k_len = (key_len + 31) / 32; | |
285 | ||
286 | e_key[0] = io_swap(in_key[0]); e_key[1] = io_swap(in_key[1]); | |
287 | e_key[2] = io_swap(in_key[2]); e_key[3] = io_swap(in_key[3]); | |
2b87da3b | 288 | |
6b523bae | 289 | switch(ctx->k_len) { |
2b87da3b | 290 | case 4: t = e_key[3]; |
291 | for(i = 0; i < 10; ++i) | |
6b523bae | 292 | loop4(i); |
2b87da3b | 293 | break; |
6b523bae | 294 | |
2b87da3b | 295 | case 6: e_key[4] = io_swap(in_key[4]); t = e_key[5] = io_swap(in_key[5]); |
296 | for(i = 0; i < 8; ++i) | |
6b523bae | 297 | loop6(i); |
2b87da3b | 298 | break; |
6b523bae | 299 | |
2b87da3b | 300 | case 8: e_key[4] = io_swap(in_key[4]); e_key[5] = io_swap(in_key[5]); |
301 | e_key[6] = io_swap(in_key[6]); t = e_key[7] = io_swap(in_key[7]); | |
302 | for(i = 0; i < 7; ++i) | |
6b523bae | 303 | loop8(i); |
2b87da3b | 304 | break; |
6b523bae | 305 | } |
306 | ||
307 | if (!encrypt) { | |
308 | d_key[0] = e_key[0]; d_key[1] = e_key[1]; | |
309 | d_key[2] = e_key[2]; d_key[3] = e_key[3]; | |
310 | ||
311 | for(i = 4; i < 4 * ctx->k_len + 24; ++i) { | |
312 | imix_col(d_key[i], e_key[i]); | |
313 | } | |
94ec8c6b | 314 | } |
6b523bae | 315 | |
316 | return ctx; | |
dfe89252 | 317 | } |
94ec8c6b | 318 | |
6b523bae | 319 | /* encrypt a block of text */ |
320 | ||
321 | #define f_nround(bo, bi, k) \ | |
322 | f_rn(bo, bi, 0, k); \ | |
323 | f_rn(bo, bi, 1, k); \ | |
324 | f_rn(bo, bi, 2, k); \ | |
325 | f_rn(bo, bi, 3, k); \ | |
326 | k += 4 | |
327 | ||
328 | #define f_lround(bo, bi, k) \ | |
329 | f_rl(bo, bi, 0, k); \ | |
330 | f_rl(bo, bi, 1, k); \ | |
331 | f_rl(bo, bi, 2, k); \ | |
332 | f_rl(bo, bi, 3, k) | |
333 | ||
334 | void | |
335 | rijndael_encrypt(rijndael_ctx *ctx, const u4byte *in_blk, u4byte *out_blk) | |
2b87da3b | 336 | { |
6b523bae | 337 | u4byte k_len = ctx->k_len; |
338 | u4byte *e_key = ctx->e_key; | |
339 | u4byte b0[4], b1[4], *kp; | |
340 | ||
341 | b0[0] = io_swap(in_blk[0]) ^ e_key[0]; | |
342 | b0[1] = io_swap(in_blk[1]) ^ e_key[1]; | |
343 | b0[2] = io_swap(in_blk[2]) ^ e_key[2]; | |
344 | b0[3] = io_swap(in_blk[3]) ^ e_key[3]; | |
345 | ||
346 | kp = e_key + 4; | |
347 | ||
348 | if(k_len > 6) { | |
349 | f_nround(b1, b0, kp); f_nround(b0, b1, kp); | |
350 | } | |
351 | ||
352 | if(k_len > 4) { | |
353 | f_nround(b1, b0, kp); f_nround(b0, b1, kp); | |
354 | } | |
355 | ||
356 | f_nround(b1, b0, kp); f_nround(b0, b1, kp); | |
357 | f_nround(b1, b0, kp); f_nround(b0, b1, kp); | |
358 | f_nround(b1, b0, kp); f_nround(b0, b1, kp); | |
359 | f_nround(b1, b0, kp); f_nround(b0, b1, kp); | |
360 | f_nround(b1, b0, kp); f_lround(b0, b1, kp); | |
361 | ||
362 | out_blk[0] = io_swap(b0[0]); out_blk[1] = io_swap(b0[1]); | |
363 | out_blk[2] = io_swap(b0[2]); out_blk[3] = io_swap(b0[3]); | |
364 | } | |
365 | ||
366 | /* decrypt a block of text */ | |
367 | ||
368 | #define i_nround(bo, bi, k) \ | |
369 | i_rn(bo, bi, 0, k); \ | |
370 | i_rn(bo, bi, 1, k); \ | |
371 | i_rn(bo, bi, 2, k); \ | |
372 | i_rn(bo, bi, 3, k); \ | |
373 | k -= 4 | |
374 | ||
375 | #define i_lround(bo, bi, k) \ | |
376 | i_rl(bo, bi, 0, k); \ | |
377 | i_rl(bo, bi, 1, k); \ | |
378 | i_rl(bo, bi, 2, k); \ | |
379 | i_rl(bo, bi, 3, k) | |
380 | ||
381 | void | |
382 | rijndael_decrypt(rijndael_ctx *ctx, const u4byte *in_blk, u4byte *out_blk) | |
2b87da3b | 383 | { |
6b523bae | 384 | u4byte b0[4], b1[4], *kp; |
385 | u4byte k_len = ctx->k_len; | |
386 | u4byte *e_key = ctx->e_key; | |
387 | u4byte *d_key = ctx->d_key; | |
388 | ||
389 | b0[0] = io_swap(in_blk[0]) ^ e_key[4 * k_len + 24]; | |
390 | b0[1] = io_swap(in_blk[1]) ^ e_key[4 * k_len + 25]; | |
391 | b0[2] = io_swap(in_blk[2]) ^ e_key[4 * k_len + 26]; | |
392 | b0[3] = io_swap(in_blk[3]) ^ e_key[4 * k_len + 27]; | |
393 | ||
394 | kp = d_key + 4 * (k_len + 5); | |
395 | ||
396 | if(k_len > 6) { | |
397 | i_nround(b1, b0, kp); i_nround(b0, b1, kp); | |
398 | } | |
399 | ||
400 | if(k_len > 4) { | |
401 | i_nround(b1, b0, kp); i_nround(b0, b1, kp); | |
402 | } | |
403 | ||
404 | i_nround(b1, b0, kp); i_nround(b0, b1, kp); | |
405 | i_nround(b1, b0, kp); i_nround(b0, b1, kp); | |
406 | i_nround(b1, b0, kp); i_nround(b0, b1, kp); | |
407 | i_nround(b1, b0, kp); i_nround(b0, b1, kp); | |
408 | i_nround(b1, b0, kp); i_lround(b0, b1, kp); | |
409 | ||
410 | out_blk[0] = io_swap(b0[0]); out_blk[1] = io_swap(b0[1]); | |
411 | out_blk[2] = io_swap(b0[2]); out_blk[3] = io_swap(b0[3]); | |
9ea53ba5 | 412 | } |