1*ca66f509Stb /* $OpenBSD: chacha-merged.c,v 1.13 2024/06/05 19:43:06 tb Exp $ */
293d23226Sjsing /*
393d23226Sjsing chacha-merged.c version 20080118
493d23226Sjsing D. J. Bernstein
593d23226Sjsing Public domain.
693d23226Sjsing */
793d23226Sjsing
8bef316a1Sbcook #include <stdint.h>
9bef316a1Sbcook
1093d23226Sjsing #define CHACHA_MINKEYLEN 16
1193d23226Sjsing #define CHACHA_NONCELEN 8
1293d23226Sjsing #define CHACHA_CTRLEN 8
1393d23226Sjsing #define CHACHA_STATELEN (CHACHA_NONCELEN+CHACHA_CTRLEN)
1493d23226Sjsing #define CHACHA_BLOCKLEN 64
1593d23226Sjsing
161f8d1a56Stb typedef uint8_t u8;
171f8d1a56Stb typedef uint32_t u32;
181f8d1a56Stb
1961bfdc17Sjsing struct chacha_ctx {
201f8d1a56Stb u32 input[16];
211f8d1a56Stb u8 ks[CHACHA_BLOCKLEN];
221f8d1a56Stb u8 unused;
2361bfdc17Sjsing };
2461bfdc17Sjsing
251f8d1a56Stb static inline void chacha_keysetup(struct chacha_ctx *x, const u8 *k, u32 kbits)
2681306bffSdjm __attribute__((__bounded__(__minbytes__, 2, CHACHA_MINKEYLEN)));
271f8d1a56Stb static inline void chacha_ivsetup(struct chacha_ctx *x, const u8 *iv,
281f8d1a56Stb const u8 *ctr)
2981306bffSdjm __attribute__((__bounded__(__minbytes__, 2, CHACHA_NONCELEN)))
3081306bffSdjm __attribute__((__bounded__(__minbytes__, 3, CHACHA_CTRLEN)));
311f8d1a56Stb static inline void chacha_encrypt_bytes(struct chacha_ctx *x, const u8 *m,
321f8d1a56Stb u8 *c, u32 bytes)
3381306bffSdjm __attribute__((__bounded__(__buffer__, 2, 4)))
3481306bffSdjm __attribute__((__bounded__(__buffer__, 3, 4)));
3593d23226Sjsing
3693d23226Sjsing typedef struct chacha_ctx chacha_ctx;
3793d23226Sjsing
3893d23226Sjsing #define U8C(v) (v##U)
3993d23226Sjsing #define U32C(v) (v##U)
4093d23226Sjsing
4193d23226Sjsing #define U8V(v) ((u8)(v) & U8C(0xFF))
4293d23226Sjsing #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
4393d23226Sjsing
4493d23226Sjsing #define ROTL32(v, n) \
4593d23226Sjsing (U32V((v) << (n)) | ((v) >> (32 - (n))))
4693d23226Sjsing
4793d23226Sjsing #define U8TO32_LITTLE(p) \
4893d23226Sjsing (((u32)((p)[0])) | \
4993d23226Sjsing ((u32)((p)[1]) << 8) | \
5093d23226Sjsing ((u32)((p)[2]) << 16) | \
5193d23226Sjsing ((u32)((p)[3]) << 24))
5293d23226Sjsing
5393d23226Sjsing #define U32TO8_LITTLE(p, v) \
5493d23226Sjsing do { \
5593d23226Sjsing (p)[0] = U8V((v)); \
5693d23226Sjsing (p)[1] = U8V((v) >> 8); \
5793d23226Sjsing (p)[2] = U8V((v) >> 16); \
5893d23226Sjsing (p)[3] = U8V((v) >> 24); \
5993d23226Sjsing } while (0)
6093d23226Sjsing
6193d23226Sjsing #define ROTATE(v,c) (ROTL32(v,c))
6293d23226Sjsing #define XOR(v,w) ((v) ^ (w))
6393d23226Sjsing #define PLUS(v,w) (U32V((v) + (w)))
6493d23226Sjsing #define PLUSONE(v) (PLUS((v),1))
6593d23226Sjsing
6693d23226Sjsing #define QUARTERROUND(a,b,c,d) \
6793d23226Sjsing a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
6893d23226Sjsing c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
6993d23226Sjsing a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
7093d23226Sjsing c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
7193d23226Sjsing
722890ee3dSjsing /* Initialise with "expand 32-byte k". */
732890ee3dSjsing static const char sigma[16] = {
742890ee3dSjsing 0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x33,
752890ee3dSjsing 0x32, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b,
762890ee3dSjsing };
772890ee3dSjsing
782890ee3dSjsing /* Initialise with "expand 16-byte k". */
792890ee3dSjsing static const char tau[16] = {
802890ee3dSjsing 0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x31,
812890ee3dSjsing 0x36, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b,
822890ee3dSjsing };
8393d23226Sjsing
8493d23226Sjsing static inline void
chacha_keysetup(chacha_ctx * x,const u8 * k,u32 kbits)8593d23226Sjsing chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits)
8693d23226Sjsing {
8793d23226Sjsing const char *constants;
8893d23226Sjsing
8993d23226Sjsing x->input[4] = U8TO32_LITTLE(k + 0);
9093d23226Sjsing x->input[5] = U8TO32_LITTLE(k + 4);
9193d23226Sjsing x->input[6] = U8TO32_LITTLE(k + 8);
9293d23226Sjsing x->input[7] = U8TO32_LITTLE(k + 12);
9393d23226Sjsing if (kbits == 256) { /* recommended */
9493d23226Sjsing k += 16;
9593d23226Sjsing constants = sigma;
9693d23226Sjsing } else { /* kbits == 128 */
9793d23226Sjsing constants = tau;
9893d23226Sjsing }
9993d23226Sjsing x->input[8] = U8TO32_LITTLE(k + 0);
10093d23226Sjsing x->input[9] = U8TO32_LITTLE(k + 4);
10193d23226Sjsing x->input[10] = U8TO32_LITTLE(k + 8);
10293d23226Sjsing x->input[11] = U8TO32_LITTLE(k + 12);
10393d23226Sjsing x->input[0] = U8TO32_LITTLE(constants + 0);
10493d23226Sjsing x->input[1] = U8TO32_LITTLE(constants + 4);
10593d23226Sjsing x->input[2] = U8TO32_LITTLE(constants + 8);
10693d23226Sjsing x->input[3] = U8TO32_LITTLE(constants + 12);
10793d23226Sjsing }
10893d23226Sjsing
10993d23226Sjsing static inline void
chacha_ivsetup(chacha_ctx * x,const u8 * iv,const u8 * counter)11093d23226Sjsing chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
11193d23226Sjsing {
11293d23226Sjsing x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
11393d23226Sjsing x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
11493d23226Sjsing x->input[14] = U8TO32_LITTLE(iv + 0);
11593d23226Sjsing x->input[15] = U8TO32_LITTLE(iv + 4);
11693d23226Sjsing }
11793d23226Sjsing
11893d23226Sjsing static inline void
chacha_encrypt_bytes(chacha_ctx * x,const u8 * m,u8 * c,u32 bytes)11993d23226Sjsing chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes)
12093d23226Sjsing {
12121f5cfdbSjsing u32 x0, x1, x2, x3, x4, x5, x6, x7;
12221f5cfdbSjsing u32 x8, x9, x10, x11, x12, x13, x14, x15;
12321f5cfdbSjsing u32 j0, j1, j2, j3, j4, j5, j6, j7;
12421f5cfdbSjsing u32 j8, j9, j10, j11, j12, j13, j14, j15;
12593d23226Sjsing u8 *ctarget = NULL;
12693d23226Sjsing u8 tmp[64];
1271f8d1a56Stb u32 i;
12893d23226Sjsing
1295ffa517cSjsing if (!bytes)
1305ffa517cSjsing return;
13193d23226Sjsing
13293d23226Sjsing j0 = x->input[0];
13393d23226Sjsing j1 = x->input[1];
13493d23226Sjsing j2 = x->input[2];
13593d23226Sjsing j3 = x->input[3];
13693d23226Sjsing j4 = x->input[4];
13793d23226Sjsing j5 = x->input[5];
13893d23226Sjsing j6 = x->input[6];
13993d23226Sjsing j7 = x->input[7];
14093d23226Sjsing j8 = x->input[8];
14193d23226Sjsing j9 = x->input[9];
14293d23226Sjsing j10 = x->input[10];
14393d23226Sjsing j11 = x->input[11];
14493d23226Sjsing j12 = x->input[12];
14593d23226Sjsing j13 = x->input[13];
14693d23226Sjsing j14 = x->input[14];
14793d23226Sjsing j15 = x->input[15];
14893d23226Sjsing
14993d23226Sjsing for (;;) {
15093d23226Sjsing if (bytes < 64) {
15121f5cfdbSjsing for (i = 0; i < bytes; ++i)
15221f5cfdbSjsing tmp[i] = m[i];
15393d23226Sjsing m = tmp;
15493d23226Sjsing ctarget = c;
15593d23226Sjsing c = tmp;
15693d23226Sjsing }
15793d23226Sjsing x0 = j0;
15893d23226Sjsing x1 = j1;
15993d23226Sjsing x2 = j2;
16093d23226Sjsing x3 = j3;
16193d23226Sjsing x4 = j4;
16293d23226Sjsing x5 = j5;
16393d23226Sjsing x6 = j6;
16493d23226Sjsing x7 = j7;
16593d23226Sjsing x8 = j8;
16693d23226Sjsing x9 = j9;
16793d23226Sjsing x10 = j10;
16893d23226Sjsing x11 = j11;
16993d23226Sjsing x12 = j12;
17093d23226Sjsing x13 = j13;
17193d23226Sjsing x14 = j14;
17293d23226Sjsing x15 = j15;
17393d23226Sjsing for (i = 20; i > 0; i -= 2) {
17493d23226Sjsing QUARTERROUND(x0, x4, x8, x12)
17593d23226Sjsing QUARTERROUND(x1, x5, x9, x13)
17693d23226Sjsing QUARTERROUND(x2, x6, x10, x14)
17793d23226Sjsing QUARTERROUND(x3, x7, x11, x15)
17893d23226Sjsing QUARTERROUND(x0, x5, x10, x15)
17993d23226Sjsing QUARTERROUND(x1, x6, x11, x12)
18093d23226Sjsing QUARTERROUND(x2, x7, x8, x13)
18193d23226Sjsing QUARTERROUND(x3, x4, x9, x14)
18293d23226Sjsing }
18393d23226Sjsing x0 = PLUS(x0, j0);
18493d23226Sjsing x1 = PLUS(x1, j1);
18593d23226Sjsing x2 = PLUS(x2, j2);
18693d23226Sjsing x3 = PLUS(x3, j3);
18793d23226Sjsing x4 = PLUS(x4, j4);
18893d23226Sjsing x5 = PLUS(x5, j5);
18993d23226Sjsing x6 = PLUS(x6, j6);
19093d23226Sjsing x7 = PLUS(x7, j7);
19193d23226Sjsing x8 = PLUS(x8, j8);
19293d23226Sjsing x9 = PLUS(x9, j9);
19393d23226Sjsing x10 = PLUS(x10, j10);
19493d23226Sjsing x11 = PLUS(x11, j11);
19593d23226Sjsing x12 = PLUS(x12, j12);
19693d23226Sjsing x13 = PLUS(x13, j13);
19793d23226Sjsing x14 = PLUS(x14, j14);
19893d23226Sjsing x15 = PLUS(x15, j15);
19993d23226Sjsing
20061bfdc17Sjsing if (bytes < 64) {
20161bfdc17Sjsing U32TO8_LITTLE(x->ks + 0, x0);
20261bfdc17Sjsing U32TO8_LITTLE(x->ks + 4, x1);
20361bfdc17Sjsing U32TO8_LITTLE(x->ks + 8, x2);
20461bfdc17Sjsing U32TO8_LITTLE(x->ks + 12, x3);
20561bfdc17Sjsing U32TO8_LITTLE(x->ks + 16, x4);
20661bfdc17Sjsing U32TO8_LITTLE(x->ks + 20, x5);
20761bfdc17Sjsing U32TO8_LITTLE(x->ks + 24, x6);
20861bfdc17Sjsing U32TO8_LITTLE(x->ks + 28, x7);
20961bfdc17Sjsing U32TO8_LITTLE(x->ks + 32, x8);
21061bfdc17Sjsing U32TO8_LITTLE(x->ks + 36, x9);
21161bfdc17Sjsing U32TO8_LITTLE(x->ks + 40, x10);
21261bfdc17Sjsing U32TO8_LITTLE(x->ks + 44, x11);
21361bfdc17Sjsing U32TO8_LITTLE(x->ks + 48, x12);
21461bfdc17Sjsing U32TO8_LITTLE(x->ks + 52, x13);
21561bfdc17Sjsing U32TO8_LITTLE(x->ks + 56, x14);
21661bfdc17Sjsing U32TO8_LITTLE(x->ks + 60, x15);
21761bfdc17Sjsing }
21861bfdc17Sjsing
21993d23226Sjsing x0 = XOR(x0, U8TO32_LITTLE(m + 0));
22093d23226Sjsing x1 = XOR(x1, U8TO32_LITTLE(m + 4));
22193d23226Sjsing x2 = XOR(x2, U8TO32_LITTLE(m + 8));
22293d23226Sjsing x3 = XOR(x3, U8TO32_LITTLE(m + 12));
22393d23226Sjsing x4 = XOR(x4, U8TO32_LITTLE(m + 16));
22493d23226Sjsing x5 = XOR(x5, U8TO32_LITTLE(m + 20));
22593d23226Sjsing x6 = XOR(x6, U8TO32_LITTLE(m + 24));
22693d23226Sjsing x7 = XOR(x7, U8TO32_LITTLE(m + 28));
22793d23226Sjsing x8 = XOR(x8, U8TO32_LITTLE(m + 32));
22893d23226Sjsing x9 = XOR(x9, U8TO32_LITTLE(m + 36));
22993d23226Sjsing x10 = XOR(x10, U8TO32_LITTLE(m + 40));
23093d23226Sjsing x11 = XOR(x11, U8TO32_LITTLE(m + 44));
23193d23226Sjsing x12 = XOR(x12, U8TO32_LITTLE(m + 48));
23293d23226Sjsing x13 = XOR(x13, U8TO32_LITTLE(m + 52));
23393d23226Sjsing x14 = XOR(x14, U8TO32_LITTLE(m + 56));
23493d23226Sjsing x15 = XOR(x15, U8TO32_LITTLE(m + 60));
23593d23226Sjsing
23693d23226Sjsing j12 = PLUSONE(j12);
23793d23226Sjsing if (!j12) {
23893d23226Sjsing j13 = PLUSONE(j13);
23921f5cfdbSjsing /*
24021f5cfdbSjsing * Stopping at 2^70 bytes per nonce is the user's
24121f5cfdbSjsing * responsibility.
24221f5cfdbSjsing */
24393d23226Sjsing }
24493d23226Sjsing
24593d23226Sjsing U32TO8_LITTLE(c + 0, x0);
24693d23226Sjsing U32TO8_LITTLE(c + 4, x1);
24793d23226Sjsing U32TO8_LITTLE(c + 8, x2);
24893d23226Sjsing U32TO8_LITTLE(c + 12, x3);
24993d23226Sjsing U32TO8_LITTLE(c + 16, x4);
25093d23226Sjsing U32TO8_LITTLE(c + 20, x5);
25193d23226Sjsing U32TO8_LITTLE(c + 24, x6);
25293d23226Sjsing U32TO8_LITTLE(c + 28, x7);
25393d23226Sjsing U32TO8_LITTLE(c + 32, x8);
25493d23226Sjsing U32TO8_LITTLE(c + 36, x9);
25593d23226Sjsing U32TO8_LITTLE(c + 40, x10);
25693d23226Sjsing U32TO8_LITTLE(c + 44, x11);
25793d23226Sjsing U32TO8_LITTLE(c + 48, x12);
25893d23226Sjsing U32TO8_LITTLE(c + 52, x13);
25993d23226Sjsing U32TO8_LITTLE(c + 56, x14);
26093d23226Sjsing U32TO8_LITTLE(c + 60, x15);
26193d23226Sjsing
26293d23226Sjsing if (bytes <= 64) {
26393d23226Sjsing if (bytes < 64) {
2645ffa517cSjsing for (i = 0; i < bytes; ++i)
2655ffa517cSjsing ctarget[i] = c[i];
26693d23226Sjsing }
26793d23226Sjsing x->input[12] = j12;
26893d23226Sjsing x->input[13] = j13;
26961bfdc17Sjsing x->unused = 64 - bytes;
27093d23226Sjsing return;
27193d23226Sjsing }
27293d23226Sjsing bytes -= 64;
27393d23226Sjsing c += 64;
27493d23226Sjsing m += 64;
27593d23226Sjsing }
27693d23226Sjsing }
27764bf2397Sdlg
27864bf2397Sdlg void
CRYPTO_hchacha_20(unsigned char subkey[32],const unsigned char key[32],const unsigned char nonce[16])27964bf2397Sdlg CRYPTO_hchacha_20(unsigned char subkey[32], const unsigned char key[32],
28064bf2397Sdlg const unsigned char nonce[16])
28164bf2397Sdlg {
28264bf2397Sdlg uint32_t x[16];
28364bf2397Sdlg int i;
28464bf2397Sdlg
28564bf2397Sdlg x[0] = U8TO32_LITTLE(sigma + 0);
28664bf2397Sdlg x[1] = U8TO32_LITTLE(sigma + 4);
28764bf2397Sdlg x[2] = U8TO32_LITTLE(sigma + 8);
28864bf2397Sdlg x[3] = U8TO32_LITTLE(sigma + 12);
28964bf2397Sdlg x[4] = U8TO32_LITTLE(key + 0);
29064bf2397Sdlg x[5] = U8TO32_LITTLE(key + 4);
29164bf2397Sdlg x[6] = U8TO32_LITTLE(key + 8);
29264bf2397Sdlg x[7] = U8TO32_LITTLE(key + 12);
29364bf2397Sdlg x[8] = U8TO32_LITTLE(key + 16);
29464bf2397Sdlg x[9] = U8TO32_LITTLE(key + 20);
29564bf2397Sdlg x[10] = U8TO32_LITTLE(key + 24);
29664bf2397Sdlg x[11] = U8TO32_LITTLE(key + 28);
29764bf2397Sdlg x[12] = U8TO32_LITTLE(nonce + 0);
29864bf2397Sdlg x[13] = U8TO32_LITTLE(nonce + 4);
29964bf2397Sdlg x[14] = U8TO32_LITTLE(nonce + 8);
30064bf2397Sdlg x[15] = U8TO32_LITTLE(nonce + 12);
30164bf2397Sdlg
30264bf2397Sdlg for (i = 20; i > 0; i -= 2) {
30364bf2397Sdlg QUARTERROUND(x[0], x[4], x[8], x[12])
30464bf2397Sdlg QUARTERROUND(x[1], x[5], x[9], x[13])
30564bf2397Sdlg QUARTERROUND(x[2], x[6], x[10], x[14])
30664bf2397Sdlg QUARTERROUND(x[3], x[7], x[11], x[15])
30764bf2397Sdlg QUARTERROUND(x[0], x[5], x[10], x[15])
30864bf2397Sdlg QUARTERROUND(x[1], x[6], x[11], x[12])
30964bf2397Sdlg QUARTERROUND(x[2], x[7], x[8], x[13])
31064bf2397Sdlg QUARTERROUND(x[3], x[4], x[9], x[14])
31164bf2397Sdlg }
31264bf2397Sdlg
31364bf2397Sdlg U32TO8_LITTLE(subkey + 0, x[0]);
31464bf2397Sdlg U32TO8_LITTLE(subkey + 4, x[1]);
31564bf2397Sdlg U32TO8_LITTLE(subkey + 8, x[2]);
31664bf2397Sdlg U32TO8_LITTLE(subkey + 12, x[3]);
31764bf2397Sdlg
31864bf2397Sdlg U32TO8_LITTLE(subkey + 16, x[12]);
31964bf2397Sdlg U32TO8_LITTLE(subkey + 20, x[13]);
32064bf2397Sdlg U32TO8_LITTLE(subkey + 24, x[14]);
32164bf2397Sdlg U32TO8_LITTLE(subkey + 28, x[15]);
32264bf2397Sdlg }
3231ec3c770Sbeck LCRYPTO_ALIAS(CRYPTO_hchacha_20);
324