122cd51feSMatthew Dillon /*
222cd51feSMatthew Dillon chacha-merged.c version 20080118
322cd51feSMatthew Dillon D. J. Bernstein
422cd51feSMatthew Dillon Public domain.
522cd51feSMatthew Dillon */
622cd51feSMatthew Dillon
722cd51feSMatthew Dillon /* $OpenBSD: chacha.c,v 1.1 2013/11/21 00:45:44 djm Exp $ */
822cd51feSMatthew Dillon
922cd51feSMatthew Dillon #include <sys/param.h>
1022cd51feSMatthew Dillon
1122cd51feSMatthew Dillon #include "chacha.h"
1222cd51feSMatthew Dillon
1322cd51feSMatthew Dillon typedef uint8_t u8;
1422cd51feSMatthew Dillon typedef uint32_t u32;
1522cd51feSMatthew Dillon
1622cd51feSMatthew Dillon typedef struct chacha_ctx chacha_ctx;
1722cd51feSMatthew Dillon
1822cd51feSMatthew Dillon #define U8C(v) (v##U)
1922cd51feSMatthew Dillon #define U32C(v) (v##U)
2022cd51feSMatthew Dillon
2122cd51feSMatthew Dillon #define U8V(v) ((u8)(v) & U8C(0xFF))
2222cd51feSMatthew Dillon #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
2322cd51feSMatthew Dillon
2422cd51feSMatthew Dillon #define ROTL32(v, n) \
2522cd51feSMatthew Dillon (U32V((v) << (n)) | ((v) >> (32 - (n))))
2622cd51feSMatthew Dillon
2722cd51feSMatthew Dillon #define U8TO32_LITTLE(p) \
2822cd51feSMatthew Dillon (((u32)((p)[0]) ) | \
2922cd51feSMatthew Dillon ((u32)((p)[1]) << 8) | \
3022cd51feSMatthew Dillon ((u32)((p)[2]) << 16) | \
3122cd51feSMatthew Dillon ((u32)((p)[3]) << 24))
3222cd51feSMatthew Dillon
3322cd51feSMatthew Dillon #define U32TO8_LITTLE(p, v) \
3422cd51feSMatthew Dillon do { \
3522cd51feSMatthew Dillon (p)[0] = U8V((v) ); \
3622cd51feSMatthew Dillon (p)[1] = U8V((v) >> 8); \
3722cd51feSMatthew Dillon (p)[2] = U8V((v) >> 16); \
3822cd51feSMatthew Dillon (p)[3] = U8V((v) >> 24); \
3922cd51feSMatthew Dillon } while (0)
4022cd51feSMatthew Dillon
4122cd51feSMatthew Dillon #define ROTATE(v,c) (ROTL32(v,c))
4222cd51feSMatthew Dillon #define XOR(v,w) ((v) ^ (w))
4322cd51feSMatthew Dillon #define PLUS(v,w) (U32V((v) + (w)))
4422cd51feSMatthew Dillon #define PLUSONE(v) (PLUS((v),1))
4522cd51feSMatthew Dillon
4622cd51feSMatthew Dillon #define QUARTERROUND(a,b,c,d) \
4722cd51feSMatthew Dillon a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
4822cd51feSMatthew Dillon c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
4922cd51feSMatthew Dillon a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
5022cd51feSMatthew Dillon c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
5122cd51feSMatthew Dillon
5222cd51feSMatthew Dillon static const char sigma[16] = "expand 32-byte k";
5322cd51feSMatthew Dillon static const char tau[16] = "expand 16-byte k";
5422cd51feSMatthew Dillon
5522cd51feSMatthew Dillon LOCAL void
chacha_keysetup(chacha_ctx * x,const u8 * k,u32 kbits)5622cd51feSMatthew Dillon chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits)
5722cd51feSMatthew Dillon {
5822cd51feSMatthew Dillon const char *constants;
5922cd51feSMatthew Dillon
6022cd51feSMatthew Dillon x->input[4] = U8TO32_LITTLE(k + 0);
6122cd51feSMatthew Dillon x->input[5] = U8TO32_LITTLE(k + 4);
6222cd51feSMatthew Dillon x->input[6] = U8TO32_LITTLE(k + 8);
6322cd51feSMatthew Dillon x->input[7] = U8TO32_LITTLE(k + 12);
6422cd51feSMatthew Dillon if (kbits == 256) { /* recommended */
6522cd51feSMatthew Dillon k += 16;
6622cd51feSMatthew Dillon constants = sigma;
6722cd51feSMatthew Dillon } else { /* kbits == 128 */
6822cd51feSMatthew Dillon constants = tau;
6922cd51feSMatthew Dillon }
7022cd51feSMatthew Dillon x->input[8] = U8TO32_LITTLE(k + 0);
7122cd51feSMatthew Dillon x->input[9] = U8TO32_LITTLE(k + 4);
7222cd51feSMatthew Dillon x->input[10] = U8TO32_LITTLE(k + 8);
7322cd51feSMatthew Dillon x->input[11] = U8TO32_LITTLE(k + 12);
7422cd51feSMatthew Dillon x->input[0] = U8TO32_LITTLE(constants + 0);
7522cd51feSMatthew Dillon x->input[1] = U8TO32_LITTLE(constants + 4);
7622cd51feSMatthew Dillon x->input[2] = U8TO32_LITTLE(constants + 8);
7722cd51feSMatthew Dillon x->input[3] = U8TO32_LITTLE(constants + 12);
7822cd51feSMatthew Dillon }
7922cd51feSMatthew Dillon
8022cd51feSMatthew Dillon LOCAL void
chacha_ivsetup(chacha_ctx * x,const u8 * iv,const u8 * counter)8122cd51feSMatthew Dillon chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
8222cd51feSMatthew Dillon {
8322766d5aSAaron LI #ifdef CHACHA_NONCE0_CTR128
8422766d5aSAaron LI /* 128-bit counter without IV */
8522cd51feSMatthew Dillon (void)iv;
8622cd51feSMatthew Dillon x->input[12] = U8TO32_LITTLE(counter + 0);
8722cd51feSMatthew Dillon x->input[13] = U8TO32_LITTLE(counter + 4);
8822cd51feSMatthew Dillon x->input[14] = U8TO32_LITTLE(counter + 8);
8922cd51feSMatthew Dillon x->input[15] = U8TO32_LITTLE(counter + 12);
9022766d5aSAaron LI #else
9122766d5aSAaron LI /* 64-bit IV and 64-bit counter */
9222766d5aSAaron LI x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
9322766d5aSAaron LI x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
9422766d5aSAaron LI x->input[14] = U8TO32_LITTLE(iv + 0);
9522766d5aSAaron LI x->input[15] = U8TO32_LITTLE(iv + 4);
9622cd51feSMatthew Dillon #endif
9722cd51feSMatthew Dillon }
9822cd51feSMatthew Dillon
9922cd51feSMatthew Dillon #ifdef CHACHA_NONCE0_CTR128
10022cd51feSMatthew Dillon LOCAL void
chacha_ctrsave(const chacha_ctx * x,u8 * counter)10122cd51feSMatthew Dillon chacha_ctrsave(const chacha_ctx *x, u8 *counter)
10222cd51feSMatthew Dillon {
10322cd51feSMatthew Dillon U32TO8_LITTLE(counter + 0, x->input[12]);
10422cd51feSMatthew Dillon U32TO8_LITTLE(counter + 4, x->input[13]);
10522cd51feSMatthew Dillon U32TO8_LITTLE(counter + 8, x->input[14]);
10622cd51feSMatthew Dillon U32TO8_LITTLE(counter + 12, x->input[15]);
10722cd51feSMatthew Dillon }
10822cd51feSMatthew Dillon #endif
10922cd51feSMatthew Dillon
11022cd51feSMatthew Dillon LOCAL void
chacha_encrypt_bytes(chacha_ctx * x,const u8 * m,u8 * c,u32 bytes)11122cd51feSMatthew Dillon chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes)
11222cd51feSMatthew Dillon {
11322cd51feSMatthew Dillon u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
11422cd51feSMatthew Dillon u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
11522cd51feSMatthew Dillon u8 *ctarget = NULL;
11622cd51feSMatthew Dillon u8 tmp[64];
11722766d5aSAaron LI u32 i;
11822cd51feSMatthew Dillon
11922cd51feSMatthew Dillon if (!bytes) return;
12022cd51feSMatthew Dillon
12122cd51feSMatthew Dillon j0 = x->input[0];
12222cd51feSMatthew Dillon j1 = x->input[1];
12322cd51feSMatthew Dillon j2 = x->input[2];
12422cd51feSMatthew Dillon j3 = x->input[3];
12522cd51feSMatthew Dillon j4 = x->input[4];
12622cd51feSMatthew Dillon j5 = x->input[5];
12722cd51feSMatthew Dillon j6 = x->input[6];
12822cd51feSMatthew Dillon j7 = x->input[7];
12922cd51feSMatthew Dillon j8 = x->input[8];
13022cd51feSMatthew Dillon j9 = x->input[9];
13122cd51feSMatthew Dillon j10 = x->input[10];
13222cd51feSMatthew Dillon j11 = x->input[11];
13322cd51feSMatthew Dillon j12 = x->input[12];
13422cd51feSMatthew Dillon j13 = x->input[13];
13522cd51feSMatthew Dillon j14 = x->input[14];
13622cd51feSMatthew Dillon j15 = x->input[15];
13722cd51feSMatthew Dillon
13822cd51feSMatthew Dillon for (;;) {
13922cd51feSMatthew Dillon if (bytes < 64) {
14022cd51feSMatthew Dillon #ifndef KEYSTREAM_ONLY
14122cd51feSMatthew Dillon for (i = 0;i < bytes;++i) tmp[i] = m[i];
14222cd51feSMatthew Dillon m = tmp;
1434104d691SAaron LI #else
1444104d691SAaron LI (void)m; /* suppress compiler warning */
14522cd51feSMatthew Dillon #endif
14622cd51feSMatthew Dillon ctarget = c;
14722cd51feSMatthew Dillon c = tmp;
14822cd51feSMatthew Dillon }
14922cd51feSMatthew Dillon x0 = j0;
15022cd51feSMatthew Dillon x1 = j1;
15122cd51feSMatthew Dillon x2 = j2;
15222cd51feSMatthew Dillon x3 = j3;
15322cd51feSMatthew Dillon x4 = j4;
15422cd51feSMatthew Dillon x5 = j5;
15522cd51feSMatthew Dillon x6 = j6;
15622cd51feSMatthew Dillon x7 = j7;
15722cd51feSMatthew Dillon x8 = j8;
15822cd51feSMatthew Dillon x9 = j9;
15922cd51feSMatthew Dillon x10 = j10;
16022cd51feSMatthew Dillon x11 = j11;
16122cd51feSMatthew Dillon x12 = j12;
16222cd51feSMatthew Dillon x13 = j13;
16322cd51feSMatthew Dillon x14 = j14;
16422cd51feSMatthew Dillon x15 = j15;
16522cd51feSMatthew Dillon for (i = 20;i > 0;i -= 2) {
16622cd51feSMatthew Dillon QUARTERROUND( x0, x4, x8,x12)
16722cd51feSMatthew Dillon QUARTERROUND( x1, x5, x9,x13)
16822cd51feSMatthew Dillon QUARTERROUND( x2, x6,x10,x14)
16922cd51feSMatthew Dillon QUARTERROUND( x3, x7,x11,x15)
17022cd51feSMatthew Dillon QUARTERROUND( x0, x5,x10,x15)
17122cd51feSMatthew Dillon QUARTERROUND( x1, x6,x11,x12)
17222cd51feSMatthew Dillon QUARTERROUND( x2, x7, x8,x13)
17322cd51feSMatthew Dillon QUARTERROUND( x3, x4, x9,x14)
17422cd51feSMatthew Dillon }
17522cd51feSMatthew Dillon x0 = PLUS(x0,j0);
17622cd51feSMatthew Dillon x1 = PLUS(x1,j1);
17722cd51feSMatthew Dillon x2 = PLUS(x2,j2);
17822cd51feSMatthew Dillon x3 = PLUS(x3,j3);
17922cd51feSMatthew Dillon x4 = PLUS(x4,j4);
18022cd51feSMatthew Dillon x5 = PLUS(x5,j5);
18122cd51feSMatthew Dillon x6 = PLUS(x6,j6);
18222cd51feSMatthew Dillon x7 = PLUS(x7,j7);
18322cd51feSMatthew Dillon x8 = PLUS(x8,j8);
18422cd51feSMatthew Dillon x9 = PLUS(x9,j9);
18522cd51feSMatthew Dillon x10 = PLUS(x10,j10);
18622cd51feSMatthew Dillon x11 = PLUS(x11,j11);
18722cd51feSMatthew Dillon x12 = PLUS(x12,j12);
18822cd51feSMatthew Dillon x13 = PLUS(x13,j13);
18922cd51feSMatthew Dillon x14 = PLUS(x14,j14);
19022cd51feSMatthew Dillon x15 = PLUS(x15,j15);
19122cd51feSMatthew Dillon
19222cd51feSMatthew Dillon #ifndef KEYSTREAM_ONLY
19322cd51feSMatthew Dillon x0 = XOR(x0,U8TO32_LITTLE(m + 0));
19422cd51feSMatthew Dillon x1 = XOR(x1,U8TO32_LITTLE(m + 4));
19522cd51feSMatthew Dillon x2 = XOR(x2,U8TO32_LITTLE(m + 8));
19622cd51feSMatthew Dillon x3 = XOR(x3,U8TO32_LITTLE(m + 12));
19722cd51feSMatthew Dillon x4 = XOR(x4,U8TO32_LITTLE(m + 16));
19822cd51feSMatthew Dillon x5 = XOR(x5,U8TO32_LITTLE(m + 20));
19922cd51feSMatthew Dillon x6 = XOR(x6,U8TO32_LITTLE(m + 24));
20022cd51feSMatthew Dillon x7 = XOR(x7,U8TO32_LITTLE(m + 28));
20122cd51feSMatthew Dillon x8 = XOR(x8,U8TO32_LITTLE(m + 32));
20222cd51feSMatthew Dillon x9 = XOR(x9,U8TO32_LITTLE(m + 36));
20322cd51feSMatthew Dillon x10 = XOR(x10,U8TO32_LITTLE(m + 40));
20422cd51feSMatthew Dillon x11 = XOR(x11,U8TO32_LITTLE(m + 44));
20522cd51feSMatthew Dillon x12 = XOR(x12,U8TO32_LITTLE(m + 48));
20622cd51feSMatthew Dillon x13 = XOR(x13,U8TO32_LITTLE(m + 52));
20722cd51feSMatthew Dillon x14 = XOR(x14,U8TO32_LITTLE(m + 56));
20822cd51feSMatthew Dillon x15 = XOR(x15,U8TO32_LITTLE(m + 60));
20922cd51feSMatthew Dillon #endif
21022cd51feSMatthew Dillon
21122cd51feSMatthew Dillon j12 = PLUSONE(j12);
21222cd51feSMatthew Dillon if (!j12) {
21322cd51feSMatthew Dillon j13 = PLUSONE(j13);
21422766d5aSAaron LI #ifdef CHACHA_NONCE0_CTR128
21522cd51feSMatthew Dillon if (!j13) {
21622cd51feSMatthew Dillon j14 = PLUSONE(j14);
21722cd51feSMatthew Dillon if (!j14) {
21822cd51feSMatthew Dillon j15 = PLUSONE(j15);
21922cd51feSMatthew Dillon }
22022cd51feSMatthew Dillon }
22122766d5aSAaron LI #else
22222766d5aSAaron LI /* stopping at 2^70 bytes per nonce is user's responsibility */
22322cd51feSMatthew Dillon #endif
22422cd51feSMatthew Dillon }
22522cd51feSMatthew Dillon
22622cd51feSMatthew Dillon U32TO8_LITTLE(c + 0,x0);
22722cd51feSMatthew Dillon U32TO8_LITTLE(c + 4,x1);
22822cd51feSMatthew Dillon U32TO8_LITTLE(c + 8,x2);
22922cd51feSMatthew Dillon U32TO8_LITTLE(c + 12,x3);
23022cd51feSMatthew Dillon U32TO8_LITTLE(c + 16,x4);
23122cd51feSMatthew Dillon U32TO8_LITTLE(c + 20,x5);
23222cd51feSMatthew Dillon U32TO8_LITTLE(c + 24,x6);
23322cd51feSMatthew Dillon U32TO8_LITTLE(c + 28,x7);
23422cd51feSMatthew Dillon U32TO8_LITTLE(c + 32,x8);
23522cd51feSMatthew Dillon U32TO8_LITTLE(c + 36,x9);
23622cd51feSMatthew Dillon U32TO8_LITTLE(c + 40,x10);
23722cd51feSMatthew Dillon U32TO8_LITTLE(c + 44,x11);
23822cd51feSMatthew Dillon U32TO8_LITTLE(c + 48,x12);
23922cd51feSMatthew Dillon U32TO8_LITTLE(c + 52,x13);
24022cd51feSMatthew Dillon U32TO8_LITTLE(c + 56,x14);
24122cd51feSMatthew Dillon U32TO8_LITTLE(c + 60,x15);
24222cd51feSMatthew Dillon
24322cd51feSMatthew Dillon if (bytes <= 64) {
24422cd51feSMatthew Dillon if (bytes < 64) {
24522cd51feSMatthew Dillon for (i = 0;i < bytes;++i) ctarget[i] = c[i];
24622cd51feSMatthew Dillon }
24722cd51feSMatthew Dillon x->input[12] = j12;
24822cd51feSMatthew Dillon x->input[13] = j13;
24922cd51feSMatthew Dillon #ifdef CHACHA_NONCE0_CTR128
25022cd51feSMatthew Dillon x->input[14] = j14;
25122cd51feSMatthew Dillon x->input[15] = j15;
25222cd51feSMatthew Dillon #endif
25322cd51feSMatthew Dillon return;
25422cd51feSMatthew Dillon }
25522cd51feSMatthew Dillon bytes -= 64;
25622cd51feSMatthew Dillon c += 64;
25722cd51feSMatthew Dillon #ifndef KEYSTREAM_ONLY
25822cd51feSMatthew Dillon m += 64;
25922cd51feSMatthew Dillon #endif
26022cd51feSMatthew Dillon }
26122cd51feSMatthew Dillon }
262*ec5a219cSAaron LI
263*ec5a219cSAaron LI LOCAL void
hchacha20(u8 derived_key[32],const u8 nonce[16],const u8 key[32])264*ec5a219cSAaron LI hchacha20(u8 derived_key[32], const u8 nonce[16], const u8 key[32])
265*ec5a219cSAaron LI {
266*ec5a219cSAaron LI u32 x[16] = {
267*ec5a219cSAaron LI U8TO32_LITTLE(sigma + 0),
268*ec5a219cSAaron LI U8TO32_LITTLE(sigma + 4),
269*ec5a219cSAaron LI U8TO32_LITTLE(sigma + 8),
270*ec5a219cSAaron LI U8TO32_LITTLE(sigma + 12),
271*ec5a219cSAaron LI U8TO32_LITTLE(key + 0),
272*ec5a219cSAaron LI U8TO32_LITTLE(key + 4),
273*ec5a219cSAaron LI U8TO32_LITTLE(key + 8),
274*ec5a219cSAaron LI U8TO32_LITTLE(key + 12),
275*ec5a219cSAaron LI U8TO32_LITTLE(key + 16),
276*ec5a219cSAaron LI U8TO32_LITTLE(key + 20),
277*ec5a219cSAaron LI U8TO32_LITTLE(key + 24),
278*ec5a219cSAaron LI U8TO32_LITTLE(key + 28),
279*ec5a219cSAaron LI U8TO32_LITTLE(nonce + 0),
280*ec5a219cSAaron LI U8TO32_LITTLE(nonce + 4),
281*ec5a219cSAaron LI U8TO32_LITTLE(nonce + 8),
282*ec5a219cSAaron LI U8TO32_LITTLE(nonce + 12)
283*ec5a219cSAaron LI };
284*ec5a219cSAaron LI u32 i;
285*ec5a219cSAaron LI
286*ec5a219cSAaron LI for (i = 20; i > 0; i -= 2) {
287*ec5a219cSAaron LI QUARTERROUND( x[0], x[4], x[8],x[12])
288*ec5a219cSAaron LI QUARTERROUND( x[1], x[5], x[9],x[13])
289*ec5a219cSAaron LI QUARTERROUND( x[2], x[6],x[10],x[14])
290*ec5a219cSAaron LI QUARTERROUND( x[3], x[7],x[11],x[15])
291*ec5a219cSAaron LI QUARTERROUND( x[0], x[5],x[10],x[15])
292*ec5a219cSAaron LI QUARTERROUND( x[1], x[6],x[11],x[12])
293*ec5a219cSAaron LI QUARTERROUND( x[2], x[7], x[8],x[13])
294*ec5a219cSAaron LI QUARTERROUND( x[3], x[4], x[9],x[14])
295*ec5a219cSAaron LI }
296*ec5a219cSAaron LI
297*ec5a219cSAaron LI U32TO8_LITTLE(derived_key + 0, x[0]);
298*ec5a219cSAaron LI U32TO8_LITTLE(derived_key + 4, x[1]);
299*ec5a219cSAaron LI U32TO8_LITTLE(derived_key + 8, x[2]);
300*ec5a219cSAaron LI U32TO8_LITTLE(derived_key + 12, x[3]);
301*ec5a219cSAaron LI U32TO8_LITTLE(derived_key + 16, x[12]);
302*ec5a219cSAaron LI U32TO8_LITTLE(derived_key + 20, x[13]);
303*ec5a219cSAaron LI U32TO8_LITTLE(derived_key + 24, x[14]);
304*ec5a219cSAaron LI U32TO8_LITTLE(derived_key + 28, x[15]);
305*ec5a219cSAaron LI }
306