xref: /netbsd-src/crypto/external/bsd/openssh/dist/chacha.c (revision a629fefc36f2e87b36355a611e948fafe62680b4)
1 /* $OpenBSD: chacha.c,v 1.2 2023/07/17 05:26:38 djm Exp $ */
2 /*
3 chacha-merged.c version 20080118
4 D. J. Bernstein
5 Public domain.
6 */
7 
8 #include "includes.h"
9 __RCSID("$NetBSD: chacha.c,v 1.6 2023/10/25 20:19:57 christos Exp $");
10 
11 #include <stdio.h>	/* for NULL */
12 #include "chacha.h"
13 
14 typedef unsigned char u8;
15 typedef unsigned int u32;
16 
17 typedef struct chacha_ctx chacha_ctx;
18 
19 #define U8C(v) (v##U)
20 #define U32C(v) (v##U)
21 
22 #define U8V(v) ((u8)(v) & U8C(0xFF))
23 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
24 
25 #define ROTL32(v, n) \
26   (U32V((v) << (n)) | ((v) >> (32 - (n))))
27 
28 #define U8TO32_LITTLE(p) \
29   (((u32)((p)[0])      ) | \
30    ((u32)((p)[1]) <<  8) | \
31    ((u32)((p)[2]) << 16) | \
32    ((u32)((p)[3]) << 24))
33 
34 #define U32TO8_LITTLE(p, v) \
35   do { \
36     (p)[0] = U8V((v)      ); \
37     (p)[1] = U8V((v) >>  8); \
38     (p)[2] = U8V((v) >> 16); \
39     (p)[3] = U8V((v) >> 24); \
40   } while (0)
41 
42 #define ROTATE(v,c) (ROTL32(v,c))
43 #define XOR(v,w) ((v) ^ (w))
44 #define PLUS(v,w) (U32V((v) + (w)))
45 #define PLUSONE(v) (PLUS((v),1))
46 
47 #define QUARTERROUND(a,b,c,d) \
48   a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
49   c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
50   a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
51   c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
52 
53 static const char sigma[16] = "expand 32-byte k";
54 static const char tau[16] = "expand 16-byte k";
55 
56 void
chacha_keysetup(chacha_ctx * x,const u8 * k,u32 kbits)57 chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
58 {
59   const char *constants;
60 
61   x->input[4] = U8TO32_LITTLE(k + 0);
62   x->input[5] = U8TO32_LITTLE(k + 4);
63   x->input[6] = U8TO32_LITTLE(k + 8);
64   x->input[7] = U8TO32_LITTLE(k + 12);
65   if (kbits == 256) { /* recommended */
66     k += 16;
67     constants = sigma;
68   } else { /* kbits == 128 */
69     constants = tau;
70   }
71   x->input[8] = U8TO32_LITTLE(k + 0);
72   x->input[9] = U8TO32_LITTLE(k + 4);
73   x->input[10] = U8TO32_LITTLE(k + 8);
74   x->input[11] = U8TO32_LITTLE(k + 12);
75   x->input[0] = U8TO32_LITTLE(constants + 0);
76   x->input[1] = U8TO32_LITTLE(constants + 4);
77   x->input[2] = U8TO32_LITTLE(constants + 8);
78   x->input[3] = U8TO32_LITTLE(constants + 12);
79 }
80 
81 void
chacha_ivsetup(chacha_ctx * x,const u8 * iv,const u8 * counter)82 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
83 {
84   x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
85   x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
86   x->input[14] = U8TO32_LITTLE(iv + 0);
87   x->input[15] = U8TO32_LITTLE(iv + 4);
88 }
89 
90 void
chacha_encrypt_bytes(chacha_ctx * x,const u8 * m,u8 * c,u32 bytes)91 chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
92 {
93   u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
94   u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
95   u8 *ctarget = NULL;
96   u8 tmp[64];
97   u_int i;
98 
99   if (!bytes) return;
100 
101   j0 = x->input[0];
102   j1 = x->input[1];
103   j2 = x->input[2];
104   j3 = x->input[3];
105   j4 = x->input[4];
106   j5 = x->input[5];
107   j6 = x->input[6];
108   j7 = x->input[7];
109   j8 = x->input[8];
110   j9 = x->input[9];
111   j10 = x->input[10];
112   j11 = x->input[11];
113   j12 = x->input[12];
114   j13 = x->input[13];
115   j14 = x->input[14];
116   j15 = x->input[15];
117 
118   for (;;) {
119     if (bytes < 64) {
120       for (i = 0;i < bytes;++i) tmp[i] = m[i];
121       m = tmp;
122       ctarget = c;
123       c = tmp;
124     }
125     x0 = j0;
126     x1 = j1;
127     x2 = j2;
128     x3 = j3;
129     x4 = j4;
130     x5 = j5;
131     x6 = j6;
132     x7 = j7;
133     x8 = j8;
134     x9 = j9;
135     x10 = j10;
136     x11 = j11;
137     x12 = j12;
138     x13 = j13;
139     x14 = j14;
140     x15 = j15;
141     for (i = 20;i > 0;i -= 2) {
142       QUARTERROUND( x0, x4, x8,x12)
143       QUARTERROUND( x1, x5, x9,x13)
144       QUARTERROUND( x2, x6,x10,x14)
145       QUARTERROUND( x3, x7,x11,x15)
146       QUARTERROUND( x0, x5,x10,x15)
147       QUARTERROUND( x1, x6,x11,x12)
148       QUARTERROUND( x2, x7, x8,x13)
149       QUARTERROUND( x3, x4, x9,x14)
150     }
151     x0 = PLUS(x0,j0);
152     x1 = PLUS(x1,j1);
153     x2 = PLUS(x2,j2);
154     x3 = PLUS(x3,j3);
155     x4 = PLUS(x4,j4);
156     x5 = PLUS(x5,j5);
157     x6 = PLUS(x6,j6);
158     x7 = PLUS(x7,j7);
159     x8 = PLUS(x8,j8);
160     x9 = PLUS(x9,j9);
161     x10 = PLUS(x10,j10);
162     x11 = PLUS(x11,j11);
163     x12 = PLUS(x12,j12);
164     x13 = PLUS(x13,j13);
165     x14 = PLUS(x14,j14);
166     x15 = PLUS(x15,j15);
167 
168     x0 = XOR(x0,U8TO32_LITTLE(m + 0));
169     x1 = XOR(x1,U8TO32_LITTLE(m + 4));
170     x2 = XOR(x2,U8TO32_LITTLE(m + 8));
171     x3 = XOR(x3,U8TO32_LITTLE(m + 12));
172     x4 = XOR(x4,U8TO32_LITTLE(m + 16));
173     x5 = XOR(x5,U8TO32_LITTLE(m + 20));
174     x6 = XOR(x6,U8TO32_LITTLE(m + 24));
175     x7 = XOR(x7,U8TO32_LITTLE(m + 28));
176     x8 = XOR(x8,U8TO32_LITTLE(m + 32));
177     x9 = XOR(x9,U8TO32_LITTLE(m + 36));
178     x10 = XOR(x10,U8TO32_LITTLE(m + 40));
179     x11 = XOR(x11,U8TO32_LITTLE(m + 44));
180     x12 = XOR(x12,U8TO32_LITTLE(m + 48));
181     x13 = XOR(x13,U8TO32_LITTLE(m + 52));
182     x14 = XOR(x14,U8TO32_LITTLE(m + 56));
183     x15 = XOR(x15,U8TO32_LITTLE(m + 60));
184 
185     j12 = PLUSONE(j12);
186     if (!j12) {
187       j13 = PLUSONE(j13);
188       /* stopping at 2^70 bytes per nonce is user's responsibility */
189     }
190 
191     U32TO8_LITTLE(c + 0,x0);
192     U32TO8_LITTLE(c + 4,x1);
193     U32TO8_LITTLE(c + 8,x2);
194     U32TO8_LITTLE(c + 12,x3);
195     U32TO8_LITTLE(c + 16,x4);
196     U32TO8_LITTLE(c + 20,x5);
197     U32TO8_LITTLE(c + 24,x6);
198     U32TO8_LITTLE(c + 28,x7);
199     U32TO8_LITTLE(c + 32,x8);
200     U32TO8_LITTLE(c + 36,x9);
201     U32TO8_LITTLE(c + 40,x10);
202     U32TO8_LITTLE(c + 44,x11);
203     U32TO8_LITTLE(c + 48,x12);
204     U32TO8_LITTLE(c + 52,x13);
205     U32TO8_LITTLE(c + 56,x14);
206     U32TO8_LITTLE(c + 60,x15);
207 
208     if (bytes <= 64) {
209       if (bytes < 64) {
210         for (i = 0;i < bytes;++i) ctarget[i] = c[i];
211       }
212       x->input[12] = j12;
213       x->input[13] = j13;
214       return;
215     }
216     bytes -= 64;
217     c += 64;
218     m += 64;
219   }
220 }
221