]>
Commit | Line | Data |
---|---|---|
d6192346 | 1 | /* $OpenBSD: rijndael.c,v 1.8 2001/07/30 16:23:30 stevesk Exp $ */ |
6b523bae | 2 | |
3 | /* This is an independent implementation of the encryption algorithm: */ | |
4 | /* */ | |
5 | /* RIJNDAEL by Joan Daemen and Vincent Rijmen */ | |
6 | /* */ | |
7 | /* which is a candidate algorithm in the Advanced Encryption Standard */ | |
8 | /* programme of the US National Institute of Standards and Technology. */ | |
d6192346 | 9 | |
10 | /* | |
11 | ----------------------------------------------------------------------- | |
12 | Copyright (c) 2001 Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK | |
13 | ||
14 | TERMS | |
15 | ||
16 | Redistribution and use in source and binary forms, with or without | |
17 | modification, are permitted provided that the following conditions | |
18 | are met: | |
19 | 1. Redistributions of source code must retain the above copyright | |
20 | notice, this list of conditions and the following disclaimer. | |
21 | 2. Redistributions in binary form must reproduce the above copyright | |
22 | notice, this list of conditions and the following disclaimer in the | |
23 | documentation and/or other materials provided with the distribution. | |
24 | ||
25 | This software is provided 'as is' with no guarantees of correctness or | |
26 | fitness for purpose. | |
27 | ----------------------------------------------------------------------- | |
28 | */ | |
6b523bae | 29 | |
30 | /* Timing data for Rijndael (rijndael.c) | |
31 | ||
32 | Algorithm: rijndael (rijndael.c) | |
33 | ||
34 | 128 bit key: | |
35 | Key Setup: 305/1389 cycles (encrypt/decrypt) | |
36 | Encrypt: 374 cycles = 68.4 mbits/sec | |
37 | Decrypt: 352 cycles = 72.7 mbits/sec | |
38 | Mean: 363 cycles = 70.5 mbits/sec | |
39 | ||
40 | 192 bit key: | |
41 | Key Setup: 277/1595 cycles (encrypt/decrypt) | |
42 | Encrypt: 439 cycles = 58.3 mbits/sec | |
43 | Decrypt: 425 cycles = 60.2 mbits/sec | |
44 | Mean: 432 cycles = 59.3 mbits/sec | |
45 | ||
46 | 256 bit key: | |
47 | Key Setup: 374/1960 cycles (encrypt/decrypt) | |
48 | Encrypt: 502 cycles = 51.0 mbits/sec | |
49 | Decrypt: 498 cycles = 51.4 mbits/sec | |
50 | Mean: 500 cycles = 51.2 mbits/sec | |
51 | ||
52 | */ | |
94ec8c6b | 53 | |
6bcf7caa | 54 | #include "config.h" |
94ec8c6b | 55 | #include "rijndael.h" |
56 | ||
6b523bae | 57 | void gen_tabs __P((void)); |
58 | ||
59 | /* 3. Basic macros for speeding up generic operations */ | |
60 | ||
61 | /* Circular rotate of 32 bit values */ | |
62 | ||
63 | #define rotr(x,n) (((x) >> ((int)(n))) | ((x) << (32 - (int)(n)))) | |
64 | #define rotl(x,n) (((x) << ((int)(n))) | ((x) >> (32 - (int)(n)))) | |
65 | ||
66 | /* Invert byte order in a 32 bit variable */ | |
67 | ||
68 | #define bswap(x) ((rotl(x, 8) & 0x00ff00ff) | (rotr(x, 8) & 0xff00ff00)) | |
69 | ||
2b87da3b | 70 | /* Extract byte from a 32 bit quantity (little endian notation) */ |
6b523bae | 71 | |
72 | #define byte(x,n) ((u1byte)((x) >> (8 * n))) | |
73 | ||
cf0c5df5 | 74 | #ifdef WORDS_BIGENDIAN |
6b523bae | 75 | #define BYTE_SWAP |
76 | #endif | |
77 | ||
78 | #ifdef BYTE_SWAP | |
79 | #define io_swap(x) bswap(x) | |
80 | #else | |
81 | #define io_swap(x) (x) | |
82 | #endif | |
83 | ||
84 | #define LARGE_TABLES | |
85 | ||
86 | u1byte pow_tab[256]; | |
87 | u1byte log_tab[256]; | |
88 | u1byte sbx_tab[256]; | |
89 | u1byte isb_tab[256]; | |
90 | u4byte rco_tab[ 10]; | |
91 | u4byte ft_tab[4][256]; | |
92 | u4byte it_tab[4][256]; | |
93 | ||
94 | #ifdef LARGE_TABLES | |
95 | u4byte fl_tab[4][256]; | |
96 | u4byte il_tab[4][256]; | |
97 | #endif | |
98 | ||
99 | u4byte tab_gen = 0; | |
100 | ||
101 | #define ff_mult(a,b) (a && b ? pow_tab[(log_tab[a] + log_tab[b]) % 255] : 0) | |
102 | ||
103 | #define f_rn(bo, bi, n, k) \ | |
104 | bo[n] = ft_tab[0][byte(bi[n],0)] ^ \ | |
2b87da3b | 105 | ft_tab[1][byte(bi[(n + 1) & 3],1)] ^ \ |
106 | ft_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ | |
107 | ft_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n) | |
6b523bae | 108 | |
109 | #define i_rn(bo, bi, n, k) \ | |
110 | bo[n] = it_tab[0][byte(bi[n],0)] ^ \ | |
2b87da3b | 111 | it_tab[1][byte(bi[(n + 3) & 3],1)] ^ \ |
112 | it_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ | |
113 | it_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n) | |
6b523bae | 114 | |
115 | #ifdef LARGE_TABLES | |
116 | ||
117 | #define ls_box(x) \ | |
118 | ( fl_tab[0][byte(x, 0)] ^ \ | |
119 | fl_tab[1][byte(x, 1)] ^ \ | |
120 | fl_tab[2][byte(x, 2)] ^ \ | |
121 | fl_tab[3][byte(x, 3)] ) | |
122 | ||
123 | #define f_rl(bo, bi, n, k) \ | |
124 | bo[n] = fl_tab[0][byte(bi[n],0)] ^ \ | |
2b87da3b | 125 | fl_tab[1][byte(bi[(n + 1) & 3],1)] ^ \ |
126 | fl_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ | |
127 | fl_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n) | |
6b523bae | 128 | |
129 | #define i_rl(bo, bi, n, k) \ | |
130 | bo[n] = il_tab[0][byte(bi[n],0)] ^ \ | |
2b87da3b | 131 | il_tab[1][byte(bi[(n + 3) & 3],1)] ^ \ |
132 | il_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ | |
133 | il_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n) | |
6b523bae | 134 | |
135 | #else | |
136 | ||
137 | #define ls_box(x) \ | |
138 | ((u4byte)sbx_tab[byte(x, 0)] << 0) ^ \ | |
139 | ((u4byte)sbx_tab[byte(x, 1)] << 8) ^ \ | |
140 | ((u4byte)sbx_tab[byte(x, 2)] << 16) ^ \ | |
141 | ((u4byte)sbx_tab[byte(x, 3)] << 24) | |
142 | ||
143 | #define f_rl(bo, bi, n, k) \ | |
144 | bo[n] = (u4byte)sbx_tab[byte(bi[n],0)] ^ \ | |
2b87da3b | 145 | rotl(((u4byte)sbx_tab[byte(bi[(n + 1) & 3],1)]), 8) ^ \ |
146 | rotl(((u4byte)sbx_tab[byte(bi[(n + 2) & 3],2)]), 16) ^ \ | |
147 | rotl(((u4byte)sbx_tab[byte(bi[(n + 3) & 3],3)]), 24) ^ *(k + n) | |
6b523bae | 148 | |
149 | #define i_rl(bo, bi, n, k) \ | |
150 | bo[n] = (u4byte)isb_tab[byte(bi[n],0)] ^ \ | |
2b87da3b | 151 | rotl(((u4byte)isb_tab[byte(bi[(n + 3) & 3],1)]), 8) ^ \ |
152 | rotl(((u4byte)isb_tab[byte(bi[(n + 2) & 3],2)]), 16) ^ \ | |
153 | rotl(((u4byte)isb_tab[byte(bi[(n + 1) & 3],3)]), 24) ^ *(k + n) | |
6b523bae | 154 | |
155 | #endif | |
156 | ||
157 | void | |
158 | gen_tabs(void) | |
94ec8c6b | 159 | { |
6b523bae | 160 | u4byte i, t; |
161 | u1byte p, q; | |
162 | ||
163 | /* log and power tables for GF(2**8) finite field with */ | |
164 | /* 0x11b as modular polynomial - the simplest prmitive */ | |
165 | /* root is 0x11, used here to generate the tables */ | |
166 | ||
167 | for(i = 0,p = 1; i < 256; ++i) { | |
168 | pow_tab[i] = (u1byte)p; log_tab[p] = (u1byte)i; | |
169 | ||
170 | p = p ^ (p << 1) ^ (p & 0x80 ? 0x01b : 0); | |
94ec8c6b | 171 | } |
6b523bae | 172 | |
173 | log_tab[1] = 0; p = 1; | |
174 | ||
175 | for(i = 0; i < 10; ++i) { | |
2b87da3b | 176 | rco_tab[i] = p; |
6b523bae | 177 | |
178 | p = (p << 1) ^ (p & 0x80 ? 0x1b : 0); | |
94ec8c6b | 179 | } |
94ec8c6b | 180 | |
6b523bae | 181 | /* note that the affine byte transformation matrix in */ |
182 | /* rijndael specification is in big endian format with */ | |
183 | /* bit 0 as the most significant bit. In the remainder */ | |
184 | /* of the specification the bits are numbered from the */ | |
185 | /* least significant end of a byte. */ | |
186 | ||
187 | for(i = 0; i < 256; ++i) { | |
2b87da3b | 188 | p = (i ? pow_tab[255 - log_tab[i]] : 0); q = p; |
189 | q = (q >> 7) | (q << 1); p ^= q; | |
190 | q = (q >> 7) | (q << 1); p ^= q; | |
191 | q = (q >> 7) | (q << 1); p ^= q; | |
192 | q = (q >> 7) | (q << 1); p ^= q ^ 0x63; | |
6b523bae | 193 | sbx_tab[i] = (u1byte)p; isb_tab[p] = (u1byte)i; |
94ec8c6b | 194 | } |
6b523bae | 195 | |
196 | for(i = 0; i < 256; ++i) { | |
2b87da3b | 197 | p = sbx_tab[i]; |
198 | ||
199 | #ifdef LARGE_TABLES | |
6b523bae | 200 | |
6b523bae | 201 | t = p; fl_tab[0][i] = t; |
202 | fl_tab[1][i] = rotl(t, 8); | |
203 | fl_tab[2][i] = rotl(t, 16); | |
204 | fl_tab[3][i] = rotl(t, 24); | |
205 | #endif | |
206 | t = ((u4byte)ff_mult(2, p)) | | |
207 | ((u4byte)p << 8) | | |
208 | ((u4byte)p << 16) | | |
209 | ((u4byte)ff_mult(3, p) << 24); | |
2b87da3b | 210 | |
6b523bae | 211 | ft_tab[0][i] = t; |
212 | ft_tab[1][i] = rotl(t, 8); | |
213 | ft_tab[2][i] = rotl(t, 16); | |
214 | ft_tab[3][i] = rotl(t, 24); | |
215 | ||
2b87da3b | 216 | p = isb_tab[i]; |
6b523bae | 217 | |
2b87da3b | 218 | #ifdef LARGE_TABLES |
219 | ||
220 | t = p; il_tab[0][i] = t; | |
221 | il_tab[1][i] = rotl(t, 8); | |
222 | il_tab[2][i] = rotl(t, 16); | |
6b523bae | 223 | il_tab[3][i] = rotl(t, 24); |
2b87da3b | 224 | #endif |
6b523bae | 225 | t = ((u4byte)ff_mult(14, p)) | |
226 | ((u4byte)ff_mult( 9, p) << 8) | | |
227 | ((u4byte)ff_mult(13, p) << 16) | | |
228 | ((u4byte)ff_mult(11, p) << 24); | |
2b87da3b | 229 | |
230 | it_tab[0][i] = t; | |
231 | it_tab[1][i] = rotl(t, 8); | |
232 | it_tab[2][i] = rotl(t, 16); | |
233 | it_tab[3][i] = rotl(t, 24); | |
94ec8c6b | 234 | } |
6b523bae | 235 | |
236 | tab_gen = 1; | |
9ea53ba5 | 237 | } |
94ec8c6b | 238 | |
6b523bae | 239 | #define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b) |
240 | ||
241 | #define imix_col(y,x) \ | |
242 | u = star_x(x); \ | |
243 | v = star_x(u); \ | |
244 | w = star_x(v); \ | |
245 | t = w ^ (x); \ | |
246 | (y) = u ^ v ^ w; \ | |
247 | (y) ^= rotr(u ^ t, 8) ^ \ | |
2b87da3b | 248 | rotr(v ^ t, 16) ^ \ |
249 | rotr(t,24) | |
6b523bae | 250 | |
251 | /* initialise the key schedule from the user supplied key */ | |
252 | ||
253 | #define loop4(i) \ | |
254 | { t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \ | |
255 | t ^= e_key[4 * i]; e_key[4 * i + 4] = t; \ | |
256 | t ^= e_key[4 * i + 1]; e_key[4 * i + 5] = t; \ | |
257 | t ^= e_key[4 * i + 2]; e_key[4 * i + 6] = t; \ | |
258 | t ^= e_key[4 * i + 3]; e_key[4 * i + 7] = t; \ | |
259 | } | |
260 | ||
261 | #define loop6(i) \ | |
262 | { t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \ | |
263 | t ^= e_key[6 * i]; e_key[6 * i + 6] = t; \ | |
264 | t ^= e_key[6 * i + 1]; e_key[6 * i + 7] = t; \ | |
265 | t ^= e_key[6 * i + 2]; e_key[6 * i + 8] = t; \ | |
266 | t ^= e_key[6 * i + 3]; e_key[6 * i + 9] = t; \ | |
267 | t ^= e_key[6 * i + 4]; e_key[6 * i + 10] = t; \ | |
268 | t ^= e_key[6 * i + 5]; e_key[6 * i + 11] = t; \ | |
269 | } | |
270 | ||
271 | #define loop8(i) \ | |
272 | { t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \ | |
273 | t ^= e_key[8 * i]; e_key[8 * i + 8] = t; \ | |
274 | t ^= e_key[8 * i + 1]; e_key[8 * i + 9] = t; \ | |
275 | t ^= e_key[8 * i + 2]; e_key[8 * i + 10] = t; \ | |
276 | t ^= e_key[8 * i + 3]; e_key[8 * i + 11] = t; \ | |
277 | t = e_key[8 * i + 4] ^ ls_box(t); \ | |
278 | e_key[8 * i + 12] = t; \ | |
279 | t ^= e_key[8 * i + 5]; e_key[8 * i + 13] = t; \ | |
280 | t ^= e_key[8 * i + 6]; e_key[8 * i + 14] = t; \ | |
281 | t ^= e_key[8 * i + 7]; e_key[8 * i + 15] = t; \ | |
282 | } | |
283 | ||
284 | rijndael_ctx * | |
285 | rijndael_set_key(rijndael_ctx *ctx, const u4byte *in_key, const u4byte key_len, | |
286 | int encrypt) | |
2b87da3b | 287 | { |
6b523bae | 288 | u4byte i, t, u, v, w; |
289 | u4byte *e_key = ctx->e_key; | |
290 | u4byte *d_key = ctx->d_key; | |
291 | ||
292 | ctx->decrypt = !encrypt; | |
293 | ||
294 | if(!tab_gen) | |
295 | gen_tabs(); | |
296 | ||
297 | ctx->k_len = (key_len + 31) / 32; | |
298 | ||
299 | e_key[0] = io_swap(in_key[0]); e_key[1] = io_swap(in_key[1]); | |
300 | e_key[2] = io_swap(in_key[2]); e_key[3] = io_swap(in_key[3]); | |
2b87da3b | 301 | |
6b523bae | 302 | switch(ctx->k_len) { |
2b87da3b | 303 | case 4: t = e_key[3]; |
304 | for(i = 0; i < 10; ++i) | |
6b523bae | 305 | loop4(i); |
2b87da3b | 306 | break; |
6b523bae | 307 | |
2b87da3b | 308 | case 6: e_key[4] = io_swap(in_key[4]); t = e_key[5] = io_swap(in_key[5]); |
309 | for(i = 0; i < 8; ++i) | |
6b523bae | 310 | loop6(i); |
2b87da3b | 311 | break; |
6b523bae | 312 | |
2b87da3b | 313 | case 8: e_key[4] = io_swap(in_key[4]); e_key[5] = io_swap(in_key[5]); |
314 | e_key[6] = io_swap(in_key[6]); t = e_key[7] = io_swap(in_key[7]); | |
315 | for(i = 0; i < 7; ++i) | |
6b523bae | 316 | loop8(i); |
2b87da3b | 317 | break; |
6b523bae | 318 | } |
319 | ||
320 | if (!encrypt) { | |
321 | d_key[0] = e_key[0]; d_key[1] = e_key[1]; | |
322 | d_key[2] = e_key[2]; d_key[3] = e_key[3]; | |
323 | ||
324 | for(i = 4; i < 4 * ctx->k_len + 24; ++i) { | |
325 | imix_col(d_key[i], e_key[i]); | |
326 | } | |
94ec8c6b | 327 | } |
6b523bae | 328 | |
329 | return ctx; | |
dfe89252 | 330 | } |
94ec8c6b | 331 | |
6b523bae | 332 | /* encrypt a block of text */ |
333 | ||
334 | #define f_nround(bo, bi, k) \ | |
335 | f_rn(bo, bi, 0, k); \ | |
336 | f_rn(bo, bi, 1, k); \ | |
337 | f_rn(bo, bi, 2, k); \ | |
338 | f_rn(bo, bi, 3, k); \ | |
339 | k += 4 | |
340 | ||
341 | #define f_lround(bo, bi, k) \ | |
342 | f_rl(bo, bi, 0, k); \ | |
343 | f_rl(bo, bi, 1, k); \ | |
344 | f_rl(bo, bi, 2, k); \ | |
345 | f_rl(bo, bi, 3, k) | |
346 | ||
347 | void | |
348 | rijndael_encrypt(rijndael_ctx *ctx, const u4byte *in_blk, u4byte *out_blk) | |
2b87da3b | 349 | { |
6b523bae | 350 | u4byte k_len = ctx->k_len; |
351 | u4byte *e_key = ctx->e_key; | |
352 | u4byte b0[4], b1[4], *kp; | |
353 | ||
354 | b0[0] = io_swap(in_blk[0]) ^ e_key[0]; | |
355 | b0[1] = io_swap(in_blk[1]) ^ e_key[1]; | |
356 | b0[2] = io_swap(in_blk[2]) ^ e_key[2]; | |
357 | b0[3] = io_swap(in_blk[3]) ^ e_key[3]; | |
358 | ||
359 | kp = e_key + 4; | |
360 | ||
361 | if(k_len > 6) { | |
362 | f_nround(b1, b0, kp); f_nround(b0, b1, kp); | |
363 | } | |
364 | ||
365 | if(k_len > 4) { | |
366 | f_nround(b1, b0, kp); f_nround(b0, b1, kp); | |
367 | } | |
368 | ||
369 | f_nround(b1, b0, kp); f_nround(b0, b1, kp); | |
370 | f_nround(b1, b0, kp); f_nround(b0, b1, kp); | |
371 | f_nround(b1, b0, kp); f_nround(b0, b1, kp); | |
372 | f_nround(b1, b0, kp); f_nround(b0, b1, kp); | |
373 | f_nround(b1, b0, kp); f_lround(b0, b1, kp); | |
374 | ||
375 | out_blk[0] = io_swap(b0[0]); out_blk[1] = io_swap(b0[1]); | |
376 | out_blk[2] = io_swap(b0[2]); out_blk[3] = io_swap(b0[3]); | |
377 | } | |
378 | ||
379 | /* decrypt a block of text */ | |
380 | ||
381 | #define i_nround(bo, bi, k) \ | |
382 | i_rn(bo, bi, 0, k); \ | |
383 | i_rn(bo, bi, 1, k); \ | |
384 | i_rn(bo, bi, 2, k); \ | |
385 | i_rn(bo, bi, 3, k); \ | |
386 | k -= 4 | |
387 | ||
388 | #define i_lround(bo, bi, k) \ | |
389 | i_rl(bo, bi, 0, k); \ | |
390 | i_rl(bo, bi, 1, k); \ | |
391 | i_rl(bo, bi, 2, k); \ | |
392 | i_rl(bo, bi, 3, k) | |
393 | ||
394 | void | |
395 | rijndael_decrypt(rijndael_ctx *ctx, const u4byte *in_blk, u4byte *out_blk) | |
2b87da3b | 396 | { |
6b523bae | 397 | u4byte b0[4], b1[4], *kp; |
398 | u4byte k_len = ctx->k_len; | |
399 | u4byte *e_key = ctx->e_key; | |
400 | u4byte *d_key = ctx->d_key; | |
401 | ||
402 | b0[0] = io_swap(in_blk[0]) ^ e_key[4 * k_len + 24]; | |
403 | b0[1] = io_swap(in_blk[1]) ^ e_key[4 * k_len + 25]; | |
404 | b0[2] = io_swap(in_blk[2]) ^ e_key[4 * k_len + 26]; | |
405 | b0[3] = io_swap(in_blk[3]) ^ e_key[4 * k_len + 27]; | |
406 | ||
407 | kp = d_key + 4 * (k_len + 5); | |
408 | ||
409 | if(k_len > 6) { | |
410 | i_nround(b1, b0, kp); i_nround(b0, b1, kp); | |
411 | } | |
412 | ||
413 | if(k_len > 4) { | |
414 | i_nround(b1, b0, kp); i_nround(b0, b1, kp); | |
415 | } | |
416 | ||
417 | i_nround(b1, b0, kp); i_nround(b0, b1, kp); | |
418 | i_nround(b1, b0, kp); i_nround(b0, b1, kp); | |
419 | i_nround(b1, b0, kp); i_nround(b0, b1, kp); | |
420 | i_nround(b1, b0, kp); i_nround(b0, b1, kp); | |
421 | i_nround(b1, b0, kp); i_lround(b0, b1, kp); | |
422 | ||
423 | out_blk[0] = io_swap(b0[0]); out_blk[1] = io_swap(b0[1]); | |
424 | out_blk[2] = io_swap(b0[2]); out_blk[3] = io_swap(b0[3]); | |
9ea53ba5 | 425 | } |