1 /* $OpenBSD: rijndael.c,v 1.8 2001/07/30 16:23:30 stevesk Exp $ */
3 /* This is an independent implementation of the encryption algorithm: */
5 /* RIJNDAEL by Joan Daemen and Vincent Rijmen */
7 /* which is a candidate algorithm in the Advanced Encryption Standard */
8 /* programme of the US National Institute of Standards and Technology. */
11 -----------------------------------------------------------------------
12 Copyright (c) 2001 Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK
16 Redistribution and use in source and binary forms, with or without
17 modification, are permitted provided that the following conditions
19 1. Redistributions of source code must retain the above copyright
20 notice, this list of conditions and the following disclaimer.
21 2. Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
25 This software is provided 'as is' with no guarantees of correctness or
27 -----------------------------------------------------------------------
30 /* Timing data for Rijndael (rijndael.c)
32 Algorithm: rijndael (rijndael.c)
35 Key Setup: 305/1389 cycles (encrypt/decrypt)
36 Encrypt: 374 cycles = 68.4 mbits/sec
37 Decrypt: 352 cycles = 72.7 mbits/sec
38 Mean: 363 cycles = 70.5 mbits/sec
41 Key Setup: 277/1595 cycles (encrypt/decrypt)
42 Encrypt: 439 cycles = 58.3 mbits/sec
43 Decrypt: 425 cycles = 60.2 mbits/sec
44 Mean: 432 cycles = 59.3 mbits/sec
47 Key Setup: 374/1960 cycles (encrypt/decrypt)
48 Encrypt: 502 cycles = 51.0 mbits/sec
49 Decrypt: 498 cycles = 51.4 mbits/sec
50 Mean: 500 cycles = 51.2 mbits/sec
57 void gen_tabs __P((void));
59 /* 3. Basic macros for speeding up generic operations */
61 /* Circular rotate of 32 bit values */
63 #define rotr(x,n) (((x) >> ((int)(n))) | ((x) << (32 - (int)(n))))
64 #define rotl(x,n) (((x) << ((int)(n))) | ((x) >> (32 - (int)(n))))
66 /* Invert byte order in a 32 bit variable */
68 #define bswap(x) ((rotl(x, 8) & 0x00ff00ff) | (rotr(x, 8) & 0xff00ff00))
70 /* Extract byte from a 32 bit quantity (little endian notation) */
72 #define byte(x,n) ((u1byte)((x) >> (8 * n)))
74 #ifdef WORDS_BIGENDIAN
79 #define io_swap(x) bswap(x)
81 #define io_swap(x) (x)
91 u4byte ft_tab[4][256];
92 u4byte it_tab[4][256];
95 u4byte fl_tab[4][256];
96 u4byte il_tab[4][256];
101 #define ff_mult(a,b) (a && b ? pow_tab[(log_tab[a] + log_tab[b]) % 255] : 0)
103 #define f_rn(bo, bi, n, k) \
104 bo[n] = ft_tab[0][byte(bi[n],0)] ^ \
105 ft_tab[1][byte(bi[(n + 1) & 3],1)] ^ \
106 ft_tab[2][byte(bi[(n + 2) & 3],2)] ^ \
107 ft_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n)
109 #define i_rn(bo, bi, n, k) \
110 bo[n] = it_tab[0][byte(bi[n],0)] ^ \
111 it_tab[1][byte(bi[(n + 3) & 3],1)] ^ \
112 it_tab[2][byte(bi[(n + 2) & 3],2)] ^ \
113 it_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n)
118 ( fl_tab[0][byte(x, 0)] ^ \
119 fl_tab[1][byte(x, 1)] ^ \
120 fl_tab[2][byte(x, 2)] ^ \
121 fl_tab[3][byte(x, 3)] )
123 #define f_rl(bo, bi, n, k) \
124 bo[n] = fl_tab[0][byte(bi[n],0)] ^ \
125 fl_tab[1][byte(bi[(n + 1) & 3],1)] ^ \
126 fl_tab[2][byte(bi[(n + 2) & 3],2)] ^ \
127 fl_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n)
129 #define i_rl(bo, bi, n, k) \
130 bo[n] = il_tab[0][byte(bi[n],0)] ^ \
131 il_tab[1][byte(bi[(n + 3) & 3],1)] ^ \
132 il_tab[2][byte(bi[(n + 2) & 3],2)] ^ \
133 il_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n)
138 ((u4byte)sbx_tab[byte(x, 0)] << 0) ^ \
139 ((u4byte)sbx_tab[byte(x, 1)] << 8) ^ \
140 ((u4byte)sbx_tab[byte(x, 2)] << 16) ^ \
141 ((u4byte)sbx_tab[byte(x, 3)] << 24)
143 #define f_rl(bo, bi, n, k) \
144 bo[n] = (u4byte)sbx_tab[byte(bi[n],0)] ^ \
145 rotl(((u4byte)sbx_tab[byte(bi[(n + 1) & 3],1)]), 8) ^ \
146 rotl(((u4byte)sbx_tab[byte(bi[(n + 2) & 3],2)]), 16) ^ \
147 rotl(((u4byte)sbx_tab[byte(bi[(n + 3) & 3],3)]), 24) ^ *(k + n)
149 #define i_rl(bo, bi, n, k) \
150 bo[n] = (u4byte)isb_tab[byte(bi[n],0)] ^ \
151 rotl(((u4byte)isb_tab[byte(bi[(n + 3) & 3],1)]), 8) ^ \
152 rotl(((u4byte)isb_tab[byte(bi[(n + 2) & 3],2)]), 16) ^ \
153 rotl(((u4byte)isb_tab[byte(bi[(n + 1) & 3],3)]), 24) ^ *(k + n)
163 /* log and power tables for GF(2**8) finite field with */
164 /* 0x11b as modular polynomial - the simplest prmitive */
165 /* root is 0x11, used here to generate the tables */
167 for(i = 0,p = 1; i < 256; ++i) {
168 pow_tab[i] = (u1byte)p; log_tab[p] = (u1byte)i;
170 p = p ^ (p << 1) ^ (p & 0x80 ? 0x01b : 0);
173 log_tab[1] = 0; p = 1;
175 for(i = 0; i < 10; ++i) {
178 p = (p << 1) ^ (p & 0x80 ? 0x1b : 0);
181 /* note that the affine byte transformation matrix in */
182 /* rijndael specification is in big endian format with */
183 /* bit 0 as the most significant bit. In the remainder */
184 /* of the specification the bits are numbered from the */
185 /* least significant end of a byte. */
187 for(i = 0; i < 256; ++i) {
188 p = (i ? pow_tab[255 - log_tab[i]] : 0); q = p;
189 q = (q >> 7) | (q << 1); p ^= q;
190 q = (q >> 7) | (q << 1); p ^= q;
191 q = (q >> 7) | (q << 1); p ^= q;
192 q = (q >> 7) | (q << 1); p ^= q ^ 0x63;
193 sbx_tab[i] = (u1byte)p; isb_tab[p] = (u1byte)i;
196 for(i = 0; i < 256; ++i) {
201 t = p; fl_tab[0][i] = t;
202 fl_tab[1][i] = rotl(t, 8);
203 fl_tab[2][i] = rotl(t, 16);
204 fl_tab[3][i] = rotl(t, 24);
206 t = ((u4byte)ff_mult(2, p)) |
209 ((u4byte)ff_mult(3, p) << 24);
212 ft_tab[1][i] = rotl(t, 8);
213 ft_tab[2][i] = rotl(t, 16);
214 ft_tab[3][i] = rotl(t, 24);
220 t = p; il_tab[0][i] = t;
221 il_tab[1][i] = rotl(t, 8);
222 il_tab[2][i] = rotl(t, 16);
223 il_tab[3][i] = rotl(t, 24);
225 t = ((u4byte)ff_mult(14, p)) |
226 ((u4byte)ff_mult( 9, p) << 8) |
227 ((u4byte)ff_mult(13, p) << 16) |
228 ((u4byte)ff_mult(11, p) << 24);
231 it_tab[1][i] = rotl(t, 8);
232 it_tab[2][i] = rotl(t, 16);
233 it_tab[3][i] = rotl(t, 24);
239 #define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
241 #define imix_col(y,x) \
247 (y) ^= rotr(u ^ t, 8) ^ \
251 /* initialise the key schedule from the user supplied key */
254 { t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \
255 t ^= e_key[4 * i]; e_key[4 * i + 4] = t; \
256 t ^= e_key[4 * i + 1]; e_key[4 * i + 5] = t; \
257 t ^= e_key[4 * i + 2]; e_key[4 * i + 6] = t; \
258 t ^= e_key[4 * i + 3]; e_key[4 * i + 7] = t; \
262 { t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \
263 t ^= e_key[6 * i]; e_key[6 * i + 6] = t; \
264 t ^= e_key[6 * i + 1]; e_key[6 * i + 7] = t; \
265 t ^= e_key[6 * i + 2]; e_key[6 * i + 8] = t; \
266 t ^= e_key[6 * i + 3]; e_key[6 * i + 9] = t; \
267 t ^= e_key[6 * i + 4]; e_key[6 * i + 10] = t; \
268 t ^= e_key[6 * i + 5]; e_key[6 * i + 11] = t; \
272 { t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \
273 t ^= e_key[8 * i]; e_key[8 * i + 8] = t; \
274 t ^= e_key[8 * i + 1]; e_key[8 * i + 9] = t; \
275 t ^= e_key[8 * i + 2]; e_key[8 * i + 10] = t; \
276 t ^= e_key[8 * i + 3]; e_key[8 * i + 11] = t; \
277 t = e_key[8 * i + 4] ^ ls_box(t); \
278 e_key[8 * i + 12] = t; \
279 t ^= e_key[8 * i + 5]; e_key[8 * i + 13] = t; \
280 t ^= e_key[8 * i + 6]; e_key[8 * i + 14] = t; \
281 t ^= e_key[8 * i + 7]; e_key[8 * i + 15] = t; \
285 rijndael_set_key(rijndael_ctx *ctx, const u4byte *in_key, const u4byte key_len,
288 u4byte i, t, u, v, w;
289 u4byte *e_key = ctx->e_key;
290 u4byte *d_key = ctx->d_key;
292 ctx->decrypt = !encrypt;
297 ctx->k_len = (key_len + 31) / 32;
299 e_key[0] = io_swap(in_key[0]); e_key[1] = io_swap(in_key[1]);
300 e_key[2] = io_swap(in_key[2]); e_key[3] = io_swap(in_key[3]);
303 case 4: t = e_key[3];
304 for(i = 0; i < 10; ++i)
308 case 6: e_key[4] = io_swap(in_key[4]); t = e_key[5] = io_swap(in_key[5]);
309 for(i = 0; i < 8; ++i)
313 case 8: e_key[4] = io_swap(in_key[4]); e_key[5] = io_swap(in_key[5]);
314 e_key[6] = io_swap(in_key[6]); t = e_key[7] = io_swap(in_key[7]);
315 for(i = 0; i < 7; ++i)
321 d_key[0] = e_key[0]; d_key[1] = e_key[1];
322 d_key[2] = e_key[2]; d_key[3] = e_key[3];
324 for(i = 4; i < 4 * ctx->k_len + 24; ++i) {
325 imix_col(d_key[i], e_key[i]);
332 /* encrypt a block of text */
334 #define f_nround(bo, bi, k) \
335 f_rn(bo, bi, 0, k); \
336 f_rn(bo, bi, 1, k); \
337 f_rn(bo, bi, 2, k); \
338 f_rn(bo, bi, 3, k); \
341 #define f_lround(bo, bi, k) \
342 f_rl(bo, bi, 0, k); \
343 f_rl(bo, bi, 1, k); \
344 f_rl(bo, bi, 2, k); \
348 rijndael_encrypt(rijndael_ctx *ctx, const u4byte *in_blk, u4byte *out_blk)
350 u4byte k_len = ctx->k_len;
351 u4byte *e_key = ctx->e_key;
352 u4byte b0[4], b1[4], *kp;
354 b0[0] = io_swap(in_blk[0]) ^ e_key[0];
355 b0[1] = io_swap(in_blk[1]) ^ e_key[1];
356 b0[2] = io_swap(in_blk[2]) ^ e_key[2];
357 b0[3] = io_swap(in_blk[3]) ^ e_key[3];
362 f_nround(b1, b0, kp); f_nround(b0, b1, kp);
366 f_nround(b1, b0, kp); f_nround(b0, b1, kp);
369 f_nround(b1, b0, kp); f_nround(b0, b1, kp);
370 f_nround(b1, b0, kp); f_nround(b0, b1, kp);
371 f_nround(b1, b0, kp); f_nround(b0, b1, kp);
372 f_nround(b1, b0, kp); f_nround(b0, b1, kp);
373 f_nround(b1, b0, kp); f_lround(b0, b1, kp);
375 out_blk[0] = io_swap(b0[0]); out_blk[1] = io_swap(b0[1]);
376 out_blk[2] = io_swap(b0[2]); out_blk[3] = io_swap(b0[3]);
379 /* decrypt a block of text */
381 #define i_nround(bo, bi, k) \
382 i_rn(bo, bi, 0, k); \
383 i_rn(bo, bi, 1, k); \
384 i_rn(bo, bi, 2, k); \
385 i_rn(bo, bi, 3, k); \
388 #define i_lround(bo, bi, k) \
389 i_rl(bo, bi, 0, k); \
390 i_rl(bo, bi, 1, k); \
391 i_rl(bo, bi, 2, k); \
395 rijndael_decrypt(rijndael_ctx *ctx, const u4byte *in_blk, u4byte *out_blk)
397 u4byte b0[4], b1[4], *kp;
398 u4byte k_len = ctx->k_len;
399 u4byte *e_key = ctx->e_key;
400 u4byte *d_key = ctx->d_key;
402 b0[0] = io_swap(in_blk[0]) ^ e_key[4 * k_len + 24];
403 b0[1] = io_swap(in_blk[1]) ^ e_key[4 * k_len + 25];
404 b0[2] = io_swap(in_blk[2]) ^ e_key[4 * k_len + 26];
405 b0[3] = io_swap(in_blk[3]) ^ e_key[4 * k_len + 27];
407 kp = d_key + 4 * (k_len + 5);
410 i_nround(b1, b0, kp); i_nround(b0, b1, kp);
414 i_nround(b1, b0, kp); i_nround(b0, b1, kp);
417 i_nround(b1, b0, kp); i_nround(b0, b1, kp);
418 i_nround(b1, b0, kp); i_nround(b0, b1, kp);
419 i_nround(b1, b0, kp); i_nround(b0, b1, kp);
420 i_nround(b1, b0, kp); i_nround(b0, b1, kp);
421 i_nround(b1, b0, kp); i_lround(b0, b1, kp);
423 out_blk[0] = io_swap(b0[0]); out_blk[1] = io_swap(b0[1]);
424 out_blk[2] = io_swap(b0[2]); out_blk[3] = io_swap(b0[3]);