xref: /netbsd-src/sys/crypto/chacha/chacha_ref.c (revision fa79152618fc8fd4c1019d2f19f11c1e3d1b3013)
1 /*	$NetBSD: chacha_ref.c,v 1.1 2020/07/25 22:46:34 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * ChaCha pseudorandom function family and stream cipher portable C
31  * implementation.  Derived from the specification,
32  *
33  *	Daniel J. Bernstein, `ChaCha, a variant of Salsa20', Workshop
34  *	Record of the State of the Art in Stream Ciphers -- SASC 2008.
35  *	https://cr.yp.to/papers.html#chacha
36  *
37  * which in turn builds on the specification of Salsa20 available at
38  * <https://cr.yp.to/snuffle.html>.  The particular parametrization of
39  * the stream cipher, with a 32-bit block counter and 96-bit nonce, is
40  * described in
41  *
42  *	Y. Nir and A. Langley, `ChaCha20 and Poly1305 for IETF
43  *	Protocols', IETF RFC 8439, June 2018.
44  *	https://tools.ietf.org/html/rfc8439
45  */
46 
47 #include "chacha_ref.h"
48 
49 static uint32_t
rol32(uint32_t u,unsigned c)50 rol32(uint32_t u, unsigned c)
51 {
52 
53 	return (u << c) | (u >> (32 - c));
54 }
55 
56 #define	CHACHA_QUARTERROUND(a, b, c, d) do				      \
57 {									      \
58 	(a) += (b); (d) ^= (a); (d) = rol32((d), 16);			      \
59 	(c) += (d); (b) ^= (c); (b) = rol32((b), 12);			      \
60 	(a) += (b); (d) ^= (a); (d) = rol32((d),  8);			      \
61 	(c) += (d); (b) ^= (c); (b) = rol32((b),  7);			      \
62 } while (/*CONSTCOND*/0)
63 
64 const uint8_t chacha_const32[16] = "expand 32-byte k";
65 
66 static void
chacha_core_ref(uint8_t out[restrict static64],const uint8_t in[static16],const uint8_t k[static32],const uint8_t c[static16],unsigned nr)67 chacha_core_ref(uint8_t out[restrict static 64], const uint8_t in[static 16],
68     const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr)
69 {
70 	uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
71 	uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
72 
73 	x0 = y0 = le32dec(c + 0);
74 	x1 = y1 = le32dec(c + 4);
75 	x2 = y2 = le32dec(c + 8);
76 	x3 = y3 = le32dec(c + 12);
77 	x4 = y4 = le32dec(k + 0);
78 	x5 = y5 = le32dec(k + 4);
79 	x6 = y6 = le32dec(k + 8);
80 	x7 = y7 = le32dec(k + 12);
81 	x8 = y8 = le32dec(k + 16);
82 	x9 = y9 = le32dec(k + 20);
83 	x10 = y10 = le32dec(k + 24);
84 	x11 = y11 = le32dec(k + 28);
85 	x12 = y12 = le32dec(in + 0);
86 	x13 = y13 = le32dec(in + 4);
87 	x14 = y14 = le32dec(in + 8);
88 	x15 = y15 = le32dec(in + 12);
89 
90 	for (; nr > 0; nr -= 2) {
91 		CHACHA_QUARTERROUND( y0, y4, y8,y12);
92 		CHACHA_QUARTERROUND( y1, y5, y9,y13);
93 		CHACHA_QUARTERROUND( y2, y6,y10,y14);
94 		CHACHA_QUARTERROUND( y3, y7,y11,y15);
95 		CHACHA_QUARTERROUND( y0, y5,y10,y15);
96 		CHACHA_QUARTERROUND( y1, y6,y11,y12);
97 		CHACHA_QUARTERROUND( y2, y7, y8,y13);
98 		CHACHA_QUARTERROUND( y3, y4, y9,y14);
99 	}
100 
101 	le32enc(out + 0, x0 + y0);
102 	le32enc(out + 4, x1 + y1);
103 	le32enc(out + 8, x2 + y2);
104 	le32enc(out + 12, x3 + y3);
105 	le32enc(out + 16, x4 + y4);
106 	le32enc(out + 20, x5 + y5);
107 	le32enc(out + 24, x6 + y6);
108 	le32enc(out + 28, x7 + y7);
109 	le32enc(out + 32, x8 + y8);
110 	le32enc(out + 36, x9 + y9);
111 	le32enc(out + 40, x10 + y10);
112 	le32enc(out + 44, x11 + y11);
113 	le32enc(out + 48, x12 + y12);
114 	le32enc(out + 52, x13 + y13);
115 	le32enc(out + 56, x14 + y14);
116 	le32enc(out + 60, x15 + y15);
117 }
118 
119 /* ChaCha stream cipher (IETF style, 96-bit nonce and 32-bit block counter) */
120 
121 static void
chacha_stream_ref(uint8_t * restrict s,size_t nbytes,uint32_t blkno,const uint8_t nonce[static12],const uint8_t k[static32],unsigned nr)122 chacha_stream_ref(uint8_t *restrict s, size_t nbytes,
123     uint32_t blkno,
124     const uint8_t nonce[static 12],
125     const uint8_t k[static 32],
126     unsigned nr)
127 {
128 	const uint8_t *c = chacha_const32;
129 	uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
130 	uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
131 	unsigned i;
132 
133 	x0 = le32dec(c + 0);
134 	x1 = le32dec(c + 4);
135 	x2 = le32dec(c + 8);
136 	x3 = le32dec(c + 12);
137 	x4 = le32dec(k + 0);
138 	x5 = le32dec(k + 4);
139 	x6 = le32dec(k + 8);
140 	x7 = le32dec(k + 12);
141 	x8 = le32dec(k + 16);
142 	x9 = le32dec(k + 20);
143 	x10 = le32dec(k + 24);
144 	x11 = le32dec(k + 28);
145 	/* x12 = blkno */
146 	x13 = le32dec(nonce + 0);
147 	x14 = le32dec(nonce + 4);
148 	x15 = le32dec(nonce + 8);
149 
150 	for (; nbytes >= 64; nbytes -= 64, s += 64, blkno++) {
151 		y0 = x0;
152 		y1 = x1;
153 		y2 = x2;
154 		y3 = x3;
155 		y4 = x4;
156 		y5 = x5;
157 		y6 = x6;
158 		y7 = x7;
159 		y8 = x8;
160 		y9 = x9;
161 		y10 = x10;
162 		y11 = x11;
163 		y12 = x12 = blkno;
164 		y13 = x13;
165 		y14 = x14;
166 		y15 = x15;
167 		for (i = nr; i > 0; i -= 2) {
168 			CHACHA_QUARTERROUND( y0, y4, y8,y12);
169 			CHACHA_QUARTERROUND( y1, y5, y9,y13);
170 			CHACHA_QUARTERROUND( y2, y6,y10,y14);
171 			CHACHA_QUARTERROUND( y3, y7,y11,y15);
172 			CHACHA_QUARTERROUND( y0, y5,y10,y15);
173 			CHACHA_QUARTERROUND( y1, y6,y11,y12);
174 			CHACHA_QUARTERROUND( y2, y7, y8,y13);
175 			CHACHA_QUARTERROUND( y3, y4, y9,y14);
176 		}
177 		le32enc(s + 0, x0 + y0);
178 		le32enc(s + 4, x1 + y1);
179 		le32enc(s + 8, x2 + y2);
180 		le32enc(s + 12, x3 + y3);
181 		le32enc(s + 16, x4 + y4);
182 		le32enc(s + 20, x5 + y5);
183 		le32enc(s + 24, x6 + y6);
184 		le32enc(s + 28, x7 + y7);
185 		le32enc(s + 32, x8 + y8);
186 		le32enc(s + 36, x9 + y9);
187 		le32enc(s + 40, x10 + y10);
188 		le32enc(s + 44, x11 + y11);
189 		le32enc(s + 48, x12 + y12);
190 		le32enc(s + 52, x13 + y13);
191 		le32enc(s + 56, x14 + y14);
192 		le32enc(s + 60, x15 + y15);
193 	}
194 
195 	if (nbytes) {
196 		uint8_t buf[64];
197 
198 		y0 = x0;
199 		y1 = x1;
200 		y2 = x2;
201 		y3 = x3;
202 		y4 = x4;
203 		y5 = x5;
204 		y6 = x6;
205 		y7 = x7;
206 		y8 = x8;
207 		y9 = x9;
208 		y10 = x10;
209 		y11 = x11;
210 		y12 = x12 = blkno;
211 		y13 = x13;
212 		y14 = x14;
213 		y15 = x15;
214 		for (i = nr; i > 0; i -= 2) {
215 			CHACHA_QUARTERROUND( y0, y4, y8,y12);
216 			CHACHA_QUARTERROUND( y1, y5, y9,y13);
217 			CHACHA_QUARTERROUND( y2, y6,y10,y14);
218 			CHACHA_QUARTERROUND( y3, y7,y11,y15);
219 			CHACHA_QUARTERROUND( y0, y5,y10,y15);
220 			CHACHA_QUARTERROUND( y1, y6,y11,y12);
221 			CHACHA_QUARTERROUND( y2, y7, y8,y13);
222 			CHACHA_QUARTERROUND( y3, y4, y9,y14);
223 		}
224 		le32enc(buf + 0, x0 + y0);
225 		le32enc(buf + 4, x1 + y1);
226 		le32enc(buf + 8, x2 + y2);
227 		le32enc(buf + 12, x3 + y3);
228 		le32enc(buf + 16, x4 + y4);
229 		le32enc(buf + 20, x5 + y5);
230 		le32enc(buf + 24, x6 + y6);
231 		le32enc(buf + 28, x7 + y7);
232 		le32enc(buf + 32, x8 + y8);
233 		le32enc(buf + 36, x9 + y9);
234 		le32enc(buf + 40, x10 + y10);
235 		le32enc(buf + 44, x11 + y11);
236 		le32enc(buf + 48, x12 + y12);
237 		le32enc(buf + 52, x13 + y13);
238 		le32enc(buf + 56, x14 + y14);
239 		le32enc(buf + 60, x15 + y15);
240 		memcpy(s, buf, nbytes);
241 	}
242 }
243 
244 static void
chacha_stream_xor_ref(uint8_t * s,const uint8_t * p,size_t nbytes,uint32_t blkno,const uint8_t nonce[static12],const uint8_t k[static32],unsigned nr)245 chacha_stream_xor_ref(uint8_t *s, const uint8_t *p, size_t nbytes,
246     uint32_t blkno,
247     const uint8_t nonce[static 12],
248     const uint8_t k[static 32],
249     unsigned nr)
250 {
251 	const uint8_t *c = chacha_const32;
252 	uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
253 	uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
254 	unsigned i;
255 
256 	x0 = le32dec(c + 0);
257 	x1 = le32dec(c + 4);
258 	x2 = le32dec(c + 8);
259 	x3 = le32dec(c + 12);
260 	x4 = le32dec(k + 0);
261 	x5 = le32dec(k + 4);
262 	x6 = le32dec(k + 8);
263 	x7 = le32dec(k + 12);
264 	x8 = le32dec(k + 16);
265 	x9 = le32dec(k + 20);
266 	x10 = le32dec(k + 24);
267 	x11 = le32dec(k + 28);
268 	/* x12 = blkno */
269 	x13 = le32dec(nonce + 0);
270 	x14 = le32dec(nonce + 4);
271 	x15 = le32dec(nonce + 8);
272 
273 	for (; nbytes >= 64; nbytes -= 64, s += 64, p += 64, blkno++) {
274 		y0 = x0;
275 		y1 = x1;
276 		y2 = x2;
277 		y3 = x3;
278 		y4 = x4;
279 		y5 = x5;
280 		y6 = x6;
281 		y7 = x7;
282 		y8 = x8;
283 		y9 = x9;
284 		y10 = x10;
285 		y11 = x11;
286 		y12 = x12 = blkno;
287 		y13 = x13;
288 		y14 = x14;
289 		y15 = x15;
290 		for (i = nr; i > 0; i -= 2) {
291 			CHACHA_QUARTERROUND( y0, y4, y8,y12);
292 			CHACHA_QUARTERROUND( y1, y5, y9,y13);
293 			CHACHA_QUARTERROUND( y2, y6,y10,y14);
294 			CHACHA_QUARTERROUND( y3, y7,y11,y15);
295 			CHACHA_QUARTERROUND( y0, y5,y10,y15);
296 			CHACHA_QUARTERROUND( y1, y6,y11,y12);
297 			CHACHA_QUARTERROUND( y2, y7, y8,y13);
298 			CHACHA_QUARTERROUND( y3, y4, y9,y14);
299 		}
300 		le32enc(s + 0, (x0 + y0) ^ le32dec(p + 0));
301 		le32enc(s + 4, (x1 + y1) ^ le32dec(p + 4));
302 		le32enc(s + 8, (x2 + y2) ^ le32dec(p + 8));
303 		le32enc(s + 12, (x3 + y3) ^ le32dec(p + 12));
304 		le32enc(s + 16, (x4 + y4) ^ le32dec(p + 16));
305 		le32enc(s + 20, (x5 + y5) ^ le32dec(p + 20));
306 		le32enc(s + 24, (x6 + y6) ^ le32dec(p + 24));
307 		le32enc(s + 28, (x7 + y7) ^ le32dec(p + 28));
308 		le32enc(s + 32, (x8 + y8) ^ le32dec(p + 32));
309 		le32enc(s + 36, (x9 + y9) ^ le32dec(p + 36));
310 		le32enc(s + 40, (x10 + y10) ^ le32dec(p + 40));
311 		le32enc(s + 44, (x11 + y11) ^ le32dec(p + 44));
312 		le32enc(s + 48, (x12 + y12) ^ le32dec(p + 48));
313 		le32enc(s + 52, (x13 + y13) ^ le32dec(p + 52));
314 		le32enc(s + 56, (x14 + y14) ^ le32dec(p + 56));
315 		le32enc(s + 60, (x15 + y15) ^ le32dec(p + 60));
316 	}
317 
318 	if (nbytes) {
319 		uint8_t buf[64];
320 
321 		y0 = x0;
322 		y1 = x1;
323 		y2 = x2;
324 		y3 = x3;
325 		y4 = x4;
326 		y5 = x5;
327 		y6 = x6;
328 		y7 = x7;
329 		y8 = x8;
330 		y9 = x9;
331 		y10 = x10;
332 		y11 = x11;
333 		y12 = x12 = blkno;
334 		y13 = x13;
335 		y14 = x14;
336 		y15 = x15;
337 		for (i = nr; i > 0; i -= 2) {
338 			CHACHA_QUARTERROUND( y0, y4, y8,y12);
339 			CHACHA_QUARTERROUND( y1, y5, y9,y13);
340 			CHACHA_QUARTERROUND( y2, y6,y10,y14);
341 			CHACHA_QUARTERROUND( y3, y7,y11,y15);
342 			CHACHA_QUARTERROUND( y0, y5,y10,y15);
343 			CHACHA_QUARTERROUND( y1, y6,y11,y12);
344 			CHACHA_QUARTERROUND( y2, y7, y8,y13);
345 			CHACHA_QUARTERROUND( y3, y4, y9,y14);
346 		}
347 		le32enc(buf + 0, x0 + y0);
348 		le32enc(buf + 4, x1 + y1);
349 		le32enc(buf + 8, x2 + y2);
350 		le32enc(buf + 12, x3 + y3);
351 		le32enc(buf + 16, x4 + y4);
352 		le32enc(buf + 20, x5 + y5);
353 		le32enc(buf + 24, x6 + y6);
354 		le32enc(buf + 28, x7 + y7);
355 		le32enc(buf + 32, x8 + y8);
356 		le32enc(buf + 36, x9 + y9);
357 		le32enc(buf + 40, x10 + y10);
358 		le32enc(buf + 44, x11 + y11);
359 		le32enc(buf + 48, x12 + y12);
360 		le32enc(buf + 52, x13 + y13);
361 		le32enc(buf + 56, x14 + y14);
362 		le32enc(buf + 60, x15 + y15);
363 		for (i = 0; i < nbytes - nbytes%4; i += 4)
364 			le32enc(s + i, le32dec(p + i) ^ le32dec(buf + i));
365 		for (; i < nbytes; i++)
366 			s[i] = p[i] ^ buf[i];
367 	}
368 }
369 
370 /* HChaCha */
371 
372 static void
hchacha_ref(uint8_t out[restrict static32],const uint8_t in[static16],const uint8_t k[static32],const uint8_t c[static16],unsigned nr)373 hchacha_ref(uint8_t out[restrict static 32], const uint8_t in[static 16],
374     const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr)
375 {
376 	uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
377 
378 	y0 = le32dec(c + 0);
379 	y1 = le32dec(c + 4);
380 	y2 = le32dec(c + 8);
381 	y3 = le32dec(c + 12);
382 	y4 = le32dec(k + 0);
383 	y5 = le32dec(k + 4);
384 	y6 = le32dec(k + 8);
385 	y7 = le32dec(k + 12);
386 	y8 = le32dec(k + 16);
387 	y9 = le32dec(k + 20);
388 	y10 = le32dec(k + 24);
389 	y11 = le32dec(k + 28);
390 	y12 = le32dec(in + 0);
391 	y13 = le32dec(in + 4);
392 	y14 = le32dec(in + 8);
393 	y15 = le32dec(in + 12);
394 
395 	for (; nr > 0; nr -= 2) {
396 		CHACHA_QUARTERROUND( y0, y4, y8,y12);
397 		CHACHA_QUARTERROUND( y1, y5, y9,y13);
398 		CHACHA_QUARTERROUND( y2, y6,y10,y14);
399 		CHACHA_QUARTERROUND( y3, y7,y11,y15);
400 		CHACHA_QUARTERROUND( y0, y5,y10,y15);
401 		CHACHA_QUARTERROUND( y1, y6,y11,y12);
402 		CHACHA_QUARTERROUND( y2, y7, y8,y13);
403 		CHACHA_QUARTERROUND( y3, y4, y9,y14);
404 	}
405 
406 	le32enc(out + 0, y0);
407 	le32enc(out + 4, y1);
408 	le32enc(out + 8, y2);
409 	le32enc(out + 12, y3);
410 	le32enc(out + 16, y12);
411 	le32enc(out + 20, y13);
412 	le32enc(out + 24, y14);
413 	le32enc(out + 28, y15);
414 }
415 
416 /* XChaCha stream cipher */
417 
418 /* https://tools.ietf.org/html/draft-irtf-cfrg-xchacha-03 */
419 
420 static void
xchacha_stream_ref(uint8_t * restrict s,size_t nbytes,uint32_t blkno,const uint8_t nonce[static24],const uint8_t k[static32],unsigned nr)421 xchacha_stream_ref(uint8_t *restrict s, size_t nbytes, uint32_t blkno,
422     const uint8_t nonce[static 24], const uint8_t k[static 32], unsigned nr)
423 {
424 	uint8_t subkey[32];
425 	uint8_t subnonce[12];
426 
427 	hchacha_ref(subkey, nonce/*[0:16)*/, k, chacha_const32, nr);
428 	memset(subnonce, 0, 4);
429 	memcpy(subnonce + 4, nonce + 16, 8);
430 	chacha_stream_ref(s, nbytes, blkno, subnonce, subkey, nr);
431 }
432 
433 static void
xchacha_stream_xor_ref(uint8_t * restrict c,const uint8_t * p,size_t nbytes,uint32_t blkno,const uint8_t nonce[static24],const uint8_t k[static32],unsigned nr)434 xchacha_stream_xor_ref(uint8_t *restrict c, const uint8_t *p, size_t nbytes,
435     uint32_t blkno,
436     const uint8_t nonce[static 24],
437     const uint8_t k[static 32],
438     unsigned nr)
439 {
440 	uint8_t subkey[32];
441 	uint8_t subnonce[12];
442 
443 	hchacha_ref(subkey, nonce/*[0:16)*/, k, chacha_const32, nr);
444 	memset(subnonce, 0, 4);
445 	memcpy(subnonce + 4, nonce + 16, 8);
446 	chacha_stream_xor_ref(c, p, nbytes, blkno, subnonce, subkey, nr);
447 }
448 
449 static int
chacha_probe_ref(void)450 chacha_probe_ref(void)
451 {
452 
453 	/* The reference implementation is always available.  */
454 	return 0;
455 }
456 
457 const struct chacha_impl chacha_ref_impl = {
458 	.ci_name = "Portable C ChaCha",
459 	.ci_probe = chacha_probe_ref,
460 	.ci_chacha_core = chacha_core_ref,
461 	.ci_hchacha = hchacha_ref,
462 	.ci_chacha_stream = chacha_stream_ref,
463 	.ci_chacha_stream_xor = chacha_stream_xor_ref,
464 	.ci_xchacha_stream = xchacha_stream_ref,
465 	.ci_xchacha_stream_xor = xchacha_stream_xor_ref,
466 };
467