190c1fad7Smarkus /*
290c1fad7Smarkus chacha-merged.c version 20080118
390c1fad7Smarkus D. J. Bernstein
490c1fad7Smarkus Public domain.
590c1fad7Smarkus */
690c1fad7Smarkus
7*822b16c6Sdtucker /* $OpenBSD: chacha_private.h,v 1.3 2022/02/28 21:56:29 dtucker Exp $ */
8cf5504b8Sdjm
990c1fad7Smarkus typedef unsigned char u8;
1090c1fad7Smarkus typedef unsigned int u32;
1190c1fad7Smarkus
1290c1fad7Smarkus typedef struct
1390c1fad7Smarkus {
1490c1fad7Smarkus u32 input[16]; /* could be compressed */
1590c1fad7Smarkus } chacha_ctx;
1690c1fad7Smarkus
1790c1fad7Smarkus #define U8C(v) (v##U)
1890c1fad7Smarkus #define U32C(v) (v##U)
1990c1fad7Smarkus
2090c1fad7Smarkus #define U8V(v) ((u8)(v) & U8C(0xFF))
2190c1fad7Smarkus #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
2290c1fad7Smarkus
2390c1fad7Smarkus #define ROTL32(v, n) \
2490c1fad7Smarkus (U32V((v) << (n)) | ((v) >> (32 - (n))))
2590c1fad7Smarkus
2690c1fad7Smarkus #define U8TO32_LITTLE(p) \
2790c1fad7Smarkus (((u32)((p)[0]) ) | \
2890c1fad7Smarkus ((u32)((p)[1]) << 8) | \
2990c1fad7Smarkus ((u32)((p)[2]) << 16) | \
3090c1fad7Smarkus ((u32)((p)[3]) << 24))
3190c1fad7Smarkus
3290c1fad7Smarkus #define U32TO8_LITTLE(p, v) \
3390c1fad7Smarkus do { \
3490c1fad7Smarkus (p)[0] = U8V((v) ); \
3590c1fad7Smarkus (p)[1] = U8V((v) >> 8); \
3690c1fad7Smarkus (p)[2] = U8V((v) >> 16); \
3790c1fad7Smarkus (p)[3] = U8V((v) >> 24); \
3890c1fad7Smarkus } while (0)
3990c1fad7Smarkus
4090c1fad7Smarkus #define ROTATE(v,c) (ROTL32(v,c))
4190c1fad7Smarkus #define XOR(v,w) ((v) ^ (w))
4290c1fad7Smarkus #define PLUS(v,w) (U32V((v) + (w)))
4390c1fad7Smarkus #define PLUSONE(v) (PLUS((v),1))
4490c1fad7Smarkus
4590c1fad7Smarkus #define QUARTERROUND(a,b,c,d) \
4690c1fad7Smarkus a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
4790c1fad7Smarkus c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
4890c1fad7Smarkus a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
4990c1fad7Smarkus c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
5090c1fad7Smarkus
5190c1fad7Smarkus static const char sigma[16] = "expand 32-byte k";
5290c1fad7Smarkus static const char tau[16] = "expand 16-byte k";
5390c1fad7Smarkus
5490c1fad7Smarkus static void
chacha_keysetup(chacha_ctx * x,const u8 * k,u32 kbits)55*822b16c6Sdtucker chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
5690c1fad7Smarkus {
5790c1fad7Smarkus const char *constants;
5890c1fad7Smarkus
5990c1fad7Smarkus x->input[4] = U8TO32_LITTLE(k + 0);
6090c1fad7Smarkus x->input[5] = U8TO32_LITTLE(k + 4);
6190c1fad7Smarkus x->input[6] = U8TO32_LITTLE(k + 8);
6290c1fad7Smarkus x->input[7] = U8TO32_LITTLE(k + 12);
6390c1fad7Smarkus if (kbits == 256) { /* recommended */
6490c1fad7Smarkus k += 16;
6590c1fad7Smarkus constants = sigma;
6690c1fad7Smarkus } else { /* kbits == 128 */
6790c1fad7Smarkus constants = tau;
6890c1fad7Smarkus }
6990c1fad7Smarkus x->input[8] = U8TO32_LITTLE(k + 0);
7090c1fad7Smarkus x->input[9] = U8TO32_LITTLE(k + 4);
7190c1fad7Smarkus x->input[10] = U8TO32_LITTLE(k + 8);
7290c1fad7Smarkus x->input[11] = U8TO32_LITTLE(k + 12);
7390c1fad7Smarkus x->input[0] = U8TO32_LITTLE(constants + 0);
7490c1fad7Smarkus x->input[1] = U8TO32_LITTLE(constants + 4);
7590c1fad7Smarkus x->input[2] = U8TO32_LITTLE(constants + 8);
7690c1fad7Smarkus x->input[3] = U8TO32_LITTLE(constants + 12);
7790c1fad7Smarkus }
7890c1fad7Smarkus
7990c1fad7Smarkus static void
chacha_ivsetup(chacha_ctx * x,const u8 * iv)8090c1fad7Smarkus chacha_ivsetup(chacha_ctx *x,const u8 *iv)
8190c1fad7Smarkus {
8290c1fad7Smarkus x->input[12] = 0;
8390c1fad7Smarkus x->input[13] = 0;
8490c1fad7Smarkus x->input[14] = U8TO32_LITTLE(iv + 0);
8590c1fad7Smarkus x->input[15] = U8TO32_LITTLE(iv + 4);
8690c1fad7Smarkus }
8790c1fad7Smarkus
8890c1fad7Smarkus static void
chacha_encrypt_bytes(chacha_ctx * x,const u8 * m,u8 * c,u32 bytes)8990c1fad7Smarkus chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
9090c1fad7Smarkus {
9190c1fad7Smarkus u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
9290c1fad7Smarkus u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
93cf5504b8Sdjm u8 *ctarget = NULL;
9490c1fad7Smarkus u8 tmp[64];
95cf5504b8Sdjm u_int i;
9690c1fad7Smarkus
9790c1fad7Smarkus if (!bytes) return;
9890c1fad7Smarkus
9990c1fad7Smarkus j0 = x->input[0];
10090c1fad7Smarkus j1 = x->input[1];
10190c1fad7Smarkus j2 = x->input[2];
10290c1fad7Smarkus j3 = x->input[3];
10390c1fad7Smarkus j4 = x->input[4];
10490c1fad7Smarkus j5 = x->input[5];
10590c1fad7Smarkus j6 = x->input[6];
10690c1fad7Smarkus j7 = x->input[7];
10790c1fad7Smarkus j8 = x->input[8];
10890c1fad7Smarkus j9 = x->input[9];
10990c1fad7Smarkus j10 = x->input[10];
11090c1fad7Smarkus j11 = x->input[11];
11190c1fad7Smarkus j12 = x->input[12];
11290c1fad7Smarkus j13 = x->input[13];
11390c1fad7Smarkus j14 = x->input[14];
11490c1fad7Smarkus j15 = x->input[15];
11590c1fad7Smarkus
11690c1fad7Smarkus for (;;) {
11790c1fad7Smarkus if (bytes < 64) {
11890c1fad7Smarkus for (i = 0;i < bytes;++i) tmp[i] = m[i];
11990c1fad7Smarkus m = tmp;
12090c1fad7Smarkus ctarget = c;
12190c1fad7Smarkus c = tmp;
12290c1fad7Smarkus }
12390c1fad7Smarkus x0 = j0;
12490c1fad7Smarkus x1 = j1;
12590c1fad7Smarkus x2 = j2;
12690c1fad7Smarkus x3 = j3;
12790c1fad7Smarkus x4 = j4;
12890c1fad7Smarkus x5 = j5;
12990c1fad7Smarkus x6 = j6;
13090c1fad7Smarkus x7 = j7;
13190c1fad7Smarkus x8 = j8;
13290c1fad7Smarkus x9 = j9;
13390c1fad7Smarkus x10 = j10;
13490c1fad7Smarkus x11 = j11;
13590c1fad7Smarkus x12 = j12;
13690c1fad7Smarkus x13 = j13;
13790c1fad7Smarkus x14 = j14;
13890c1fad7Smarkus x15 = j15;
13990c1fad7Smarkus for (i = 20;i > 0;i -= 2) {
14090c1fad7Smarkus QUARTERROUND( x0, x4, x8,x12)
14190c1fad7Smarkus QUARTERROUND( x1, x5, x9,x13)
14290c1fad7Smarkus QUARTERROUND( x2, x6,x10,x14)
14390c1fad7Smarkus QUARTERROUND( x3, x7,x11,x15)
14490c1fad7Smarkus QUARTERROUND( x0, x5,x10,x15)
14590c1fad7Smarkus QUARTERROUND( x1, x6,x11,x12)
14690c1fad7Smarkus QUARTERROUND( x2, x7, x8,x13)
14790c1fad7Smarkus QUARTERROUND( x3, x4, x9,x14)
14890c1fad7Smarkus }
14990c1fad7Smarkus x0 = PLUS(x0,j0);
15090c1fad7Smarkus x1 = PLUS(x1,j1);
15190c1fad7Smarkus x2 = PLUS(x2,j2);
15290c1fad7Smarkus x3 = PLUS(x3,j3);
15390c1fad7Smarkus x4 = PLUS(x4,j4);
15490c1fad7Smarkus x5 = PLUS(x5,j5);
15590c1fad7Smarkus x6 = PLUS(x6,j6);
15690c1fad7Smarkus x7 = PLUS(x7,j7);
15790c1fad7Smarkus x8 = PLUS(x8,j8);
15890c1fad7Smarkus x9 = PLUS(x9,j9);
15990c1fad7Smarkus x10 = PLUS(x10,j10);
16090c1fad7Smarkus x11 = PLUS(x11,j11);
16190c1fad7Smarkus x12 = PLUS(x12,j12);
16290c1fad7Smarkus x13 = PLUS(x13,j13);
16390c1fad7Smarkus x14 = PLUS(x14,j14);
16490c1fad7Smarkus x15 = PLUS(x15,j15);
16590c1fad7Smarkus
16690c1fad7Smarkus #ifndef KEYSTREAM_ONLY
16790c1fad7Smarkus x0 = XOR(x0,U8TO32_LITTLE(m + 0));
16890c1fad7Smarkus x1 = XOR(x1,U8TO32_LITTLE(m + 4));
16990c1fad7Smarkus x2 = XOR(x2,U8TO32_LITTLE(m + 8));
17090c1fad7Smarkus x3 = XOR(x3,U8TO32_LITTLE(m + 12));
17190c1fad7Smarkus x4 = XOR(x4,U8TO32_LITTLE(m + 16));
17290c1fad7Smarkus x5 = XOR(x5,U8TO32_LITTLE(m + 20));
17390c1fad7Smarkus x6 = XOR(x6,U8TO32_LITTLE(m + 24));
17490c1fad7Smarkus x7 = XOR(x7,U8TO32_LITTLE(m + 28));
17590c1fad7Smarkus x8 = XOR(x8,U8TO32_LITTLE(m + 32));
17690c1fad7Smarkus x9 = XOR(x9,U8TO32_LITTLE(m + 36));
17790c1fad7Smarkus x10 = XOR(x10,U8TO32_LITTLE(m + 40));
17890c1fad7Smarkus x11 = XOR(x11,U8TO32_LITTLE(m + 44));
17990c1fad7Smarkus x12 = XOR(x12,U8TO32_LITTLE(m + 48));
18090c1fad7Smarkus x13 = XOR(x13,U8TO32_LITTLE(m + 52));
18190c1fad7Smarkus x14 = XOR(x14,U8TO32_LITTLE(m + 56));
18290c1fad7Smarkus x15 = XOR(x15,U8TO32_LITTLE(m + 60));
18390c1fad7Smarkus #endif
18490c1fad7Smarkus
18590c1fad7Smarkus j12 = PLUSONE(j12);
18690c1fad7Smarkus if (!j12) {
18790c1fad7Smarkus j13 = PLUSONE(j13);
18890c1fad7Smarkus /* stopping at 2^70 bytes per nonce is user's responsibility */
18990c1fad7Smarkus }
19090c1fad7Smarkus
19190c1fad7Smarkus U32TO8_LITTLE(c + 0,x0);
19290c1fad7Smarkus U32TO8_LITTLE(c + 4,x1);
19390c1fad7Smarkus U32TO8_LITTLE(c + 8,x2);
19490c1fad7Smarkus U32TO8_LITTLE(c + 12,x3);
19590c1fad7Smarkus U32TO8_LITTLE(c + 16,x4);
19690c1fad7Smarkus U32TO8_LITTLE(c + 20,x5);
19790c1fad7Smarkus U32TO8_LITTLE(c + 24,x6);
19890c1fad7Smarkus U32TO8_LITTLE(c + 28,x7);
19990c1fad7Smarkus U32TO8_LITTLE(c + 32,x8);
20090c1fad7Smarkus U32TO8_LITTLE(c + 36,x9);
20190c1fad7Smarkus U32TO8_LITTLE(c + 40,x10);
20290c1fad7Smarkus U32TO8_LITTLE(c + 44,x11);
20390c1fad7Smarkus U32TO8_LITTLE(c + 48,x12);
20490c1fad7Smarkus U32TO8_LITTLE(c + 52,x13);
20590c1fad7Smarkus U32TO8_LITTLE(c + 56,x14);
20690c1fad7Smarkus U32TO8_LITTLE(c + 60,x15);
20790c1fad7Smarkus
20890c1fad7Smarkus if (bytes <= 64) {
20990c1fad7Smarkus if (bytes < 64) {
21090c1fad7Smarkus for (i = 0;i < bytes;++i) ctarget[i] = c[i];
21190c1fad7Smarkus }
21290c1fad7Smarkus x->input[12] = j12;
21390c1fad7Smarkus x->input[13] = j13;
21490c1fad7Smarkus return;
21590c1fad7Smarkus }
21690c1fad7Smarkus bytes -= 64;
21790c1fad7Smarkus c += 64;
21890c1fad7Smarkus #ifndef KEYSTREAM_ONLY
21990c1fad7Smarkus m += 64;
22090c1fad7Smarkus #endif
22190c1fad7Smarkus }
22290c1fad7Smarkus }
223