xref: /openbsd-src/lib/libcrypto/chacha/chacha-merged.c (revision ca66f509c660038c9c498650d232ce8d0fa5d470)
1*ca66f509Stb /* $OpenBSD: chacha-merged.c,v 1.13 2024/06/05 19:43:06 tb Exp $ */
293d23226Sjsing /*
393d23226Sjsing chacha-merged.c version 20080118
493d23226Sjsing D. J. Bernstein
593d23226Sjsing Public domain.
693d23226Sjsing */
793d23226Sjsing 
8bef316a1Sbcook #include <stdint.h>
9bef316a1Sbcook 
1093d23226Sjsing #define CHACHA_MINKEYLEN	16
1193d23226Sjsing #define CHACHA_NONCELEN		8
1293d23226Sjsing #define CHACHA_CTRLEN		8
1393d23226Sjsing #define CHACHA_STATELEN		(CHACHA_NONCELEN+CHACHA_CTRLEN)
1493d23226Sjsing #define CHACHA_BLOCKLEN		64
1593d23226Sjsing 
161f8d1a56Stb typedef uint8_t u8;
171f8d1a56Stb typedef uint32_t u32;
181f8d1a56Stb 
1961bfdc17Sjsing struct chacha_ctx {
201f8d1a56Stb 	u32 input[16];
211f8d1a56Stb 	u8 ks[CHACHA_BLOCKLEN];
221f8d1a56Stb 	u8 unused;
2361bfdc17Sjsing };
2461bfdc17Sjsing 
251f8d1a56Stb static inline void chacha_keysetup(struct chacha_ctx *x, const u8 *k, u32 kbits)
2681306bffSdjm     __attribute__((__bounded__(__minbytes__, 2, CHACHA_MINKEYLEN)));
271f8d1a56Stb static inline void chacha_ivsetup(struct chacha_ctx *x, const u8 *iv,
281f8d1a56Stb     const u8 *ctr)
2981306bffSdjm     __attribute__((__bounded__(__minbytes__, 2, CHACHA_NONCELEN)))
3081306bffSdjm     __attribute__((__bounded__(__minbytes__, 3, CHACHA_CTRLEN)));
311f8d1a56Stb static inline void chacha_encrypt_bytes(struct chacha_ctx *x, const u8 *m,
321f8d1a56Stb     u8 *c, u32 bytes)
3381306bffSdjm     __attribute__((__bounded__(__buffer__, 2, 4)))
3481306bffSdjm     __attribute__((__bounded__(__buffer__, 3, 4)));
3593d23226Sjsing 
3693d23226Sjsing typedef struct chacha_ctx chacha_ctx;
3793d23226Sjsing 
3893d23226Sjsing #define U8C(v) (v##U)
3993d23226Sjsing #define U32C(v) (v##U)
4093d23226Sjsing 
4193d23226Sjsing #define U8V(v) ((u8)(v) & U8C(0xFF))
4293d23226Sjsing #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
4393d23226Sjsing 
4493d23226Sjsing #define ROTL32(v, n) \
4593d23226Sjsing   (U32V((v) << (n)) | ((v) >> (32 - (n))))
4693d23226Sjsing 
4793d23226Sjsing #define U8TO32_LITTLE(p) \
4893d23226Sjsing   (((u32)((p)[0])) | \
4993d23226Sjsing    ((u32)((p)[1]) <<  8) | \
5093d23226Sjsing    ((u32)((p)[2]) << 16) | \
5193d23226Sjsing    ((u32)((p)[3]) << 24))
5293d23226Sjsing 
5393d23226Sjsing #define U32TO8_LITTLE(p, v) \
5493d23226Sjsing   do { \
5593d23226Sjsing     (p)[0] = U8V((v)); \
5693d23226Sjsing     (p)[1] = U8V((v) >>  8); \
5793d23226Sjsing     (p)[2] = U8V((v) >> 16); \
5893d23226Sjsing     (p)[3] = U8V((v) >> 24); \
5993d23226Sjsing   } while (0)
6093d23226Sjsing 
6193d23226Sjsing #define ROTATE(v,c) (ROTL32(v,c))
6293d23226Sjsing #define XOR(v,w) ((v) ^ (w))
6393d23226Sjsing #define PLUS(v,w) (U32V((v) + (w)))
6493d23226Sjsing #define PLUSONE(v) (PLUS((v),1))
6593d23226Sjsing 
6693d23226Sjsing #define QUARTERROUND(a,b,c,d) \
6793d23226Sjsing   a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
6893d23226Sjsing   c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
6993d23226Sjsing   a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
7093d23226Sjsing   c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
7193d23226Sjsing 
722890ee3dSjsing /* Initialise with "expand 32-byte k". */
732890ee3dSjsing static const char sigma[16] = {
742890ee3dSjsing 	0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x33,
752890ee3dSjsing 	0x32, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b,
762890ee3dSjsing };
772890ee3dSjsing 
782890ee3dSjsing /* Initialise with "expand 16-byte k". */
792890ee3dSjsing static const char tau[16] = {
802890ee3dSjsing 	0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x31,
812890ee3dSjsing 	0x36, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b,
822890ee3dSjsing };
8393d23226Sjsing 
8493d23226Sjsing static inline void
chacha_keysetup(chacha_ctx * x,const u8 * k,u32 kbits)8593d23226Sjsing chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits)
8693d23226Sjsing {
8793d23226Sjsing 	const char *constants;
8893d23226Sjsing 
8993d23226Sjsing 	x->input[4] = U8TO32_LITTLE(k + 0);
9093d23226Sjsing 	x->input[5] = U8TO32_LITTLE(k + 4);
9193d23226Sjsing 	x->input[6] = U8TO32_LITTLE(k + 8);
9293d23226Sjsing 	x->input[7] = U8TO32_LITTLE(k + 12);
9393d23226Sjsing 	if (kbits == 256) { /* recommended */
9493d23226Sjsing 		k += 16;
9593d23226Sjsing 		constants = sigma;
9693d23226Sjsing 	} else { /* kbits == 128 */
9793d23226Sjsing 		constants = tau;
9893d23226Sjsing 	}
9993d23226Sjsing 	x->input[8] = U8TO32_LITTLE(k + 0);
10093d23226Sjsing 	x->input[9] = U8TO32_LITTLE(k + 4);
10193d23226Sjsing 	x->input[10] = U8TO32_LITTLE(k + 8);
10293d23226Sjsing 	x->input[11] = U8TO32_LITTLE(k + 12);
10393d23226Sjsing 	x->input[0] = U8TO32_LITTLE(constants + 0);
10493d23226Sjsing 	x->input[1] = U8TO32_LITTLE(constants + 4);
10593d23226Sjsing 	x->input[2] = U8TO32_LITTLE(constants + 8);
10693d23226Sjsing 	x->input[3] = U8TO32_LITTLE(constants + 12);
10793d23226Sjsing }
10893d23226Sjsing 
10993d23226Sjsing static inline void
chacha_ivsetup(chacha_ctx * x,const u8 * iv,const u8 * counter)11093d23226Sjsing chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
11193d23226Sjsing {
11293d23226Sjsing 	x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
11393d23226Sjsing 	x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
11493d23226Sjsing 	x->input[14] = U8TO32_LITTLE(iv + 0);
11593d23226Sjsing 	x->input[15] = U8TO32_LITTLE(iv + 4);
11693d23226Sjsing }
11793d23226Sjsing 
11893d23226Sjsing static inline void
chacha_encrypt_bytes(chacha_ctx * x,const u8 * m,u8 * c,u32 bytes)11993d23226Sjsing chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes)
12093d23226Sjsing {
12121f5cfdbSjsing 	u32 x0, x1, x2, x3, x4, x5, x6, x7;
12221f5cfdbSjsing 	u32 x8, x9, x10, x11, x12, x13, x14, x15;
12321f5cfdbSjsing 	u32 j0, j1, j2, j3, j4, j5, j6, j7;
12421f5cfdbSjsing 	u32 j8, j9, j10, j11, j12, j13, j14, j15;
12593d23226Sjsing 	u8 *ctarget = NULL;
12693d23226Sjsing 	u8 tmp[64];
1271f8d1a56Stb 	u32 i;
12893d23226Sjsing 
1295ffa517cSjsing 	if (!bytes)
1305ffa517cSjsing 		return;
13193d23226Sjsing 
13293d23226Sjsing 	j0 = x->input[0];
13393d23226Sjsing 	j1 = x->input[1];
13493d23226Sjsing 	j2 = x->input[2];
13593d23226Sjsing 	j3 = x->input[3];
13693d23226Sjsing 	j4 = x->input[4];
13793d23226Sjsing 	j5 = x->input[5];
13893d23226Sjsing 	j6 = x->input[6];
13993d23226Sjsing 	j7 = x->input[7];
14093d23226Sjsing 	j8 = x->input[8];
14193d23226Sjsing 	j9 = x->input[9];
14293d23226Sjsing 	j10 = x->input[10];
14393d23226Sjsing 	j11 = x->input[11];
14493d23226Sjsing 	j12 = x->input[12];
14593d23226Sjsing 	j13 = x->input[13];
14693d23226Sjsing 	j14 = x->input[14];
14793d23226Sjsing 	j15 = x->input[15];
14893d23226Sjsing 
14993d23226Sjsing 	for (;;) {
15093d23226Sjsing 		if (bytes < 64) {
15121f5cfdbSjsing 			for (i = 0; i < bytes; ++i)
15221f5cfdbSjsing 				tmp[i] = m[i];
15393d23226Sjsing 			m = tmp;
15493d23226Sjsing 			ctarget = c;
15593d23226Sjsing 			c = tmp;
15693d23226Sjsing 		}
15793d23226Sjsing 		x0 = j0;
15893d23226Sjsing 		x1 = j1;
15993d23226Sjsing 		x2 = j2;
16093d23226Sjsing 		x3 = j3;
16193d23226Sjsing 		x4 = j4;
16293d23226Sjsing 		x5 = j5;
16393d23226Sjsing 		x6 = j6;
16493d23226Sjsing 		x7 = j7;
16593d23226Sjsing 		x8 = j8;
16693d23226Sjsing 		x9 = j9;
16793d23226Sjsing 		x10 = j10;
16893d23226Sjsing 		x11 = j11;
16993d23226Sjsing 		x12 = j12;
17093d23226Sjsing 		x13 = j13;
17193d23226Sjsing 		x14 = j14;
17293d23226Sjsing 		x15 = j15;
17393d23226Sjsing 		for (i = 20; i > 0; i -= 2) {
17493d23226Sjsing 			QUARTERROUND(x0, x4, x8, x12)
17593d23226Sjsing 			QUARTERROUND(x1, x5, x9, x13)
17693d23226Sjsing 			QUARTERROUND(x2, x6, x10, x14)
17793d23226Sjsing 			QUARTERROUND(x3, x7, x11, x15)
17893d23226Sjsing 			QUARTERROUND(x0, x5, x10, x15)
17993d23226Sjsing 			QUARTERROUND(x1, x6, x11, x12)
18093d23226Sjsing 			QUARTERROUND(x2, x7, x8, x13)
18193d23226Sjsing 			QUARTERROUND(x3, x4, x9, x14)
18293d23226Sjsing 		}
18393d23226Sjsing 		x0 = PLUS(x0, j0);
18493d23226Sjsing 		x1 = PLUS(x1, j1);
18593d23226Sjsing 		x2 = PLUS(x2, j2);
18693d23226Sjsing 		x3 = PLUS(x3, j3);
18793d23226Sjsing 		x4 = PLUS(x4, j4);
18893d23226Sjsing 		x5 = PLUS(x5, j5);
18993d23226Sjsing 		x6 = PLUS(x6, j6);
19093d23226Sjsing 		x7 = PLUS(x7, j7);
19193d23226Sjsing 		x8 = PLUS(x8, j8);
19293d23226Sjsing 		x9 = PLUS(x9, j9);
19393d23226Sjsing 		x10 = PLUS(x10, j10);
19493d23226Sjsing 		x11 = PLUS(x11, j11);
19593d23226Sjsing 		x12 = PLUS(x12, j12);
19693d23226Sjsing 		x13 = PLUS(x13, j13);
19793d23226Sjsing 		x14 = PLUS(x14, j14);
19893d23226Sjsing 		x15 = PLUS(x15, j15);
19993d23226Sjsing 
20061bfdc17Sjsing 		if (bytes < 64) {
20161bfdc17Sjsing 			U32TO8_LITTLE(x->ks + 0, x0);
20261bfdc17Sjsing 			U32TO8_LITTLE(x->ks + 4, x1);
20361bfdc17Sjsing 			U32TO8_LITTLE(x->ks + 8, x2);
20461bfdc17Sjsing 			U32TO8_LITTLE(x->ks + 12, x3);
20561bfdc17Sjsing 			U32TO8_LITTLE(x->ks + 16, x4);
20661bfdc17Sjsing 			U32TO8_LITTLE(x->ks + 20, x5);
20761bfdc17Sjsing 			U32TO8_LITTLE(x->ks + 24, x6);
20861bfdc17Sjsing 			U32TO8_LITTLE(x->ks + 28, x7);
20961bfdc17Sjsing 			U32TO8_LITTLE(x->ks + 32, x8);
21061bfdc17Sjsing 			U32TO8_LITTLE(x->ks + 36, x9);
21161bfdc17Sjsing 			U32TO8_LITTLE(x->ks + 40, x10);
21261bfdc17Sjsing 			U32TO8_LITTLE(x->ks + 44, x11);
21361bfdc17Sjsing 			U32TO8_LITTLE(x->ks + 48, x12);
21461bfdc17Sjsing 			U32TO8_LITTLE(x->ks + 52, x13);
21561bfdc17Sjsing 			U32TO8_LITTLE(x->ks + 56, x14);
21661bfdc17Sjsing 			U32TO8_LITTLE(x->ks + 60, x15);
21761bfdc17Sjsing 		}
21861bfdc17Sjsing 
21993d23226Sjsing 		x0 = XOR(x0, U8TO32_LITTLE(m + 0));
22093d23226Sjsing 		x1 = XOR(x1, U8TO32_LITTLE(m + 4));
22193d23226Sjsing 		x2 = XOR(x2, U8TO32_LITTLE(m + 8));
22293d23226Sjsing 		x3 = XOR(x3, U8TO32_LITTLE(m + 12));
22393d23226Sjsing 		x4 = XOR(x4, U8TO32_LITTLE(m + 16));
22493d23226Sjsing 		x5 = XOR(x5, U8TO32_LITTLE(m + 20));
22593d23226Sjsing 		x6 = XOR(x6, U8TO32_LITTLE(m + 24));
22693d23226Sjsing 		x7 = XOR(x7, U8TO32_LITTLE(m + 28));
22793d23226Sjsing 		x8 = XOR(x8, U8TO32_LITTLE(m + 32));
22893d23226Sjsing 		x9 = XOR(x9, U8TO32_LITTLE(m + 36));
22993d23226Sjsing 		x10 = XOR(x10, U8TO32_LITTLE(m + 40));
23093d23226Sjsing 		x11 = XOR(x11, U8TO32_LITTLE(m + 44));
23193d23226Sjsing 		x12 = XOR(x12, U8TO32_LITTLE(m + 48));
23293d23226Sjsing 		x13 = XOR(x13, U8TO32_LITTLE(m + 52));
23393d23226Sjsing 		x14 = XOR(x14, U8TO32_LITTLE(m + 56));
23493d23226Sjsing 		x15 = XOR(x15, U8TO32_LITTLE(m + 60));
23593d23226Sjsing 
23693d23226Sjsing 		j12 = PLUSONE(j12);
23793d23226Sjsing 		if (!j12) {
23893d23226Sjsing 			j13 = PLUSONE(j13);
23921f5cfdbSjsing 			/*
24021f5cfdbSjsing 			 * Stopping at 2^70 bytes per nonce is the user's
24121f5cfdbSjsing 			 * responsibility.
24221f5cfdbSjsing 			 */
24393d23226Sjsing 		}
24493d23226Sjsing 
24593d23226Sjsing 		U32TO8_LITTLE(c + 0, x0);
24693d23226Sjsing 		U32TO8_LITTLE(c + 4, x1);
24793d23226Sjsing 		U32TO8_LITTLE(c + 8, x2);
24893d23226Sjsing 		U32TO8_LITTLE(c + 12, x3);
24993d23226Sjsing 		U32TO8_LITTLE(c + 16, x4);
25093d23226Sjsing 		U32TO8_LITTLE(c + 20, x5);
25193d23226Sjsing 		U32TO8_LITTLE(c + 24, x6);
25293d23226Sjsing 		U32TO8_LITTLE(c + 28, x7);
25393d23226Sjsing 		U32TO8_LITTLE(c + 32, x8);
25493d23226Sjsing 		U32TO8_LITTLE(c + 36, x9);
25593d23226Sjsing 		U32TO8_LITTLE(c + 40, x10);
25693d23226Sjsing 		U32TO8_LITTLE(c + 44, x11);
25793d23226Sjsing 		U32TO8_LITTLE(c + 48, x12);
25893d23226Sjsing 		U32TO8_LITTLE(c + 52, x13);
25993d23226Sjsing 		U32TO8_LITTLE(c + 56, x14);
26093d23226Sjsing 		U32TO8_LITTLE(c + 60, x15);
26193d23226Sjsing 
26293d23226Sjsing 		if (bytes <= 64) {
26393d23226Sjsing 			if (bytes < 64) {
2645ffa517cSjsing 				for (i = 0; i < bytes; ++i)
2655ffa517cSjsing 					ctarget[i] = c[i];
26693d23226Sjsing 			}
26793d23226Sjsing 			x->input[12] = j12;
26893d23226Sjsing 			x->input[13] = j13;
26961bfdc17Sjsing 			x->unused = 64 - bytes;
27093d23226Sjsing 			return;
27193d23226Sjsing 		}
27293d23226Sjsing 		bytes -= 64;
27393d23226Sjsing 		c += 64;
27493d23226Sjsing 		m += 64;
27593d23226Sjsing 	}
27693d23226Sjsing }
27764bf2397Sdlg 
27864bf2397Sdlg void
CRYPTO_hchacha_20(unsigned char subkey[32],const unsigned char key[32],const unsigned char nonce[16])27964bf2397Sdlg CRYPTO_hchacha_20(unsigned char subkey[32], const unsigned char key[32],
28064bf2397Sdlg     const unsigned char nonce[16])
28164bf2397Sdlg {
28264bf2397Sdlg 	uint32_t x[16];
28364bf2397Sdlg 	int i;
28464bf2397Sdlg 
28564bf2397Sdlg 	x[0] = U8TO32_LITTLE(sigma + 0);
28664bf2397Sdlg 	x[1] = U8TO32_LITTLE(sigma + 4);
28764bf2397Sdlg 	x[2] = U8TO32_LITTLE(sigma + 8);
28864bf2397Sdlg 	x[3] = U8TO32_LITTLE(sigma + 12);
28964bf2397Sdlg 	x[4] = U8TO32_LITTLE(key + 0);
29064bf2397Sdlg 	x[5] = U8TO32_LITTLE(key + 4);
29164bf2397Sdlg 	x[6] = U8TO32_LITTLE(key + 8);
29264bf2397Sdlg 	x[7] = U8TO32_LITTLE(key + 12);
29364bf2397Sdlg 	x[8] = U8TO32_LITTLE(key + 16);
29464bf2397Sdlg 	x[9] = U8TO32_LITTLE(key + 20);
29564bf2397Sdlg 	x[10] = U8TO32_LITTLE(key + 24);
29664bf2397Sdlg 	x[11] = U8TO32_LITTLE(key + 28);
29764bf2397Sdlg 	x[12] = U8TO32_LITTLE(nonce + 0);
29864bf2397Sdlg 	x[13] = U8TO32_LITTLE(nonce + 4);
29964bf2397Sdlg 	x[14] = U8TO32_LITTLE(nonce + 8);
30064bf2397Sdlg 	x[15] = U8TO32_LITTLE(nonce + 12);
30164bf2397Sdlg 
30264bf2397Sdlg 	for (i = 20; i > 0; i -= 2) {
30364bf2397Sdlg 		QUARTERROUND(x[0], x[4], x[8], x[12])
30464bf2397Sdlg 		QUARTERROUND(x[1], x[5], x[9], x[13])
30564bf2397Sdlg 		QUARTERROUND(x[2], x[6], x[10], x[14])
30664bf2397Sdlg 		QUARTERROUND(x[3], x[7], x[11], x[15])
30764bf2397Sdlg 		QUARTERROUND(x[0], x[5], x[10], x[15])
30864bf2397Sdlg 		QUARTERROUND(x[1], x[6], x[11], x[12])
30964bf2397Sdlg 		QUARTERROUND(x[2], x[7], x[8], x[13])
31064bf2397Sdlg 		QUARTERROUND(x[3], x[4], x[9], x[14])
31164bf2397Sdlg 	}
31264bf2397Sdlg 
31364bf2397Sdlg 	U32TO8_LITTLE(subkey + 0, x[0]);
31464bf2397Sdlg 	U32TO8_LITTLE(subkey + 4, x[1]);
31564bf2397Sdlg 	U32TO8_LITTLE(subkey + 8, x[2]);
31664bf2397Sdlg 	U32TO8_LITTLE(subkey + 12, x[3]);
31764bf2397Sdlg 
31864bf2397Sdlg 	U32TO8_LITTLE(subkey + 16, x[12]);
31964bf2397Sdlg 	U32TO8_LITTLE(subkey + 20, x[13]);
32064bf2397Sdlg 	U32TO8_LITTLE(subkey + 24, x[14]);
32164bf2397Sdlg 	U32TO8_LITTLE(subkey + 28, x[15]);
32264bf2397Sdlg }
3231ec3c770Sbeck LCRYPTO_ALIAS(CRYPTO_hchacha_20);
324