1 /* $NetBSD: chacha_ref.c,v 1.1 2020/07/25 22:46:34 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * ChaCha pseudorandom function family and stream cipher portable C
31 * implementation. Derived from the specification,
32 *
33 * Daniel J. Bernstein, `ChaCha, a variant of Salsa20', Workshop
34 * Record of the State of the Art in Stream Ciphers -- SASC 2008.
35 * https://cr.yp.to/papers.html#chacha
36 *
37 * which in turn builds on the specification of Salsa20 available at
38 * <https://cr.yp.to/snuffle.html>. The particular parametrization of
39 * the stream cipher, with a 32-bit block counter and 96-bit nonce, is
40 * described in
41 *
42 * Y. Nir and A. Langley, `ChaCha20 and Poly1305 for IETF
43 * Protocols', IETF RFC 8439, June 2018.
44 * https://tools.ietf.org/html/rfc8439
45 */
46
47 #include "chacha_ref.h"
48
49 static uint32_t
rol32(uint32_t u,unsigned c)50 rol32(uint32_t u, unsigned c)
51 {
52
53 return (u << c) | (u >> (32 - c));
54 }
55
56 #define CHACHA_QUARTERROUND(a, b, c, d) do \
57 { \
58 (a) += (b); (d) ^= (a); (d) = rol32((d), 16); \
59 (c) += (d); (b) ^= (c); (b) = rol32((b), 12); \
60 (a) += (b); (d) ^= (a); (d) = rol32((d), 8); \
61 (c) += (d); (b) ^= (c); (b) = rol32((b), 7); \
62 } while (/*CONSTCOND*/0)
63
64 const uint8_t chacha_const32[16] = "expand 32-byte k";
65
66 static void
chacha_core_ref(uint8_t out[restrict static64],const uint8_t in[static16],const uint8_t k[static32],const uint8_t c[static16],unsigned nr)67 chacha_core_ref(uint8_t out[restrict static 64], const uint8_t in[static 16],
68 const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr)
69 {
70 uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
71 uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
72
73 x0 = y0 = le32dec(c + 0);
74 x1 = y1 = le32dec(c + 4);
75 x2 = y2 = le32dec(c + 8);
76 x3 = y3 = le32dec(c + 12);
77 x4 = y4 = le32dec(k + 0);
78 x5 = y5 = le32dec(k + 4);
79 x6 = y6 = le32dec(k + 8);
80 x7 = y7 = le32dec(k + 12);
81 x8 = y8 = le32dec(k + 16);
82 x9 = y9 = le32dec(k + 20);
83 x10 = y10 = le32dec(k + 24);
84 x11 = y11 = le32dec(k + 28);
85 x12 = y12 = le32dec(in + 0);
86 x13 = y13 = le32dec(in + 4);
87 x14 = y14 = le32dec(in + 8);
88 x15 = y15 = le32dec(in + 12);
89
90 for (; nr > 0; nr -= 2) {
91 CHACHA_QUARTERROUND( y0, y4, y8,y12);
92 CHACHA_QUARTERROUND( y1, y5, y9,y13);
93 CHACHA_QUARTERROUND( y2, y6,y10,y14);
94 CHACHA_QUARTERROUND( y3, y7,y11,y15);
95 CHACHA_QUARTERROUND( y0, y5,y10,y15);
96 CHACHA_QUARTERROUND( y1, y6,y11,y12);
97 CHACHA_QUARTERROUND( y2, y7, y8,y13);
98 CHACHA_QUARTERROUND( y3, y4, y9,y14);
99 }
100
101 le32enc(out + 0, x0 + y0);
102 le32enc(out + 4, x1 + y1);
103 le32enc(out + 8, x2 + y2);
104 le32enc(out + 12, x3 + y3);
105 le32enc(out + 16, x4 + y4);
106 le32enc(out + 20, x5 + y5);
107 le32enc(out + 24, x6 + y6);
108 le32enc(out + 28, x7 + y7);
109 le32enc(out + 32, x8 + y8);
110 le32enc(out + 36, x9 + y9);
111 le32enc(out + 40, x10 + y10);
112 le32enc(out + 44, x11 + y11);
113 le32enc(out + 48, x12 + y12);
114 le32enc(out + 52, x13 + y13);
115 le32enc(out + 56, x14 + y14);
116 le32enc(out + 60, x15 + y15);
117 }
118
119 /* ChaCha stream cipher (IETF style, 96-bit nonce and 32-bit block counter) */
120
121 static void
chacha_stream_ref(uint8_t * restrict s,size_t nbytes,uint32_t blkno,const uint8_t nonce[static12],const uint8_t k[static32],unsigned nr)122 chacha_stream_ref(uint8_t *restrict s, size_t nbytes,
123 uint32_t blkno,
124 const uint8_t nonce[static 12],
125 const uint8_t k[static 32],
126 unsigned nr)
127 {
128 const uint8_t *c = chacha_const32;
129 uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
130 uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
131 unsigned i;
132
133 x0 = le32dec(c + 0);
134 x1 = le32dec(c + 4);
135 x2 = le32dec(c + 8);
136 x3 = le32dec(c + 12);
137 x4 = le32dec(k + 0);
138 x5 = le32dec(k + 4);
139 x6 = le32dec(k + 8);
140 x7 = le32dec(k + 12);
141 x8 = le32dec(k + 16);
142 x9 = le32dec(k + 20);
143 x10 = le32dec(k + 24);
144 x11 = le32dec(k + 28);
145 /* x12 = blkno */
146 x13 = le32dec(nonce + 0);
147 x14 = le32dec(nonce + 4);
148 x15 = le32dec(nonce + 8);
149
150 for (; nbytes >= 64; nbytes -= 64, s += 64, blkno++) {
151 y0 = x0;
152 y1 = x1;
153 y2 = x2;
154 y3 = x3;
155 y4 = x4;
156 y5 = x5;
157 y6 = x6;
158 y7 = x7;
159 y8 = x8;
160 y9 = x9;
161 y10 = x10;
162 y11 = x11;
163 y12 = x12 = blkno;
164 y13 = x13;
165 y14 = x14;
166 y15 = x15;
167 for (i = nr; i > 0; i -= 2) {
168 CHACHA_QUARTERROUND( y0, y4, y8,y12);
169 CHACHA_QUARTERROUND( y1, y5, y9,y13);
170 CHACHA_QUARTERROUND( y2, y6,y10,y14);
171 CHACHA_QUARTERROUND( y3, y7,y11,y15);
172 CHACHA_QUARTERROUND( y0, y5,y10,y15);
173 CHACHA_QUARTERROUND( y1, y6,y11,y12);
174 CHACHA_QUARTERROUND( y2, y7, y8,y13);
175 CHACHA_QUARTERROUND( y3, y4, y9,y14);
176 }
177 le32enc(s + 0, x0 + y0);
178 le32enc(s + 4, x1 + y1);
179 le32enc(s + 8, x2 + y2);
180 le32enc(s + 12, x3 + y3);
181 le32enc(s + 16, x4 + y4);
182 le32enc(s + 20, x5 + y5);
183 le32enc(s + 24, x6 + y6);
184 le32enc(s + 28, x7 + y7);
185 le32enc(s + 32, x8 + y8);
186 le32enc(s + 36, x9 + y9);
187 le32enc(s + 40, x10 + y10);
188 le32enc(s + 44, x11 + y11);
189 le32enc(s + 48, x12 + y12);
190 le32enc(s + 52, x13 + y13);
191 le32enc(s + 56, x14 + y14);
192 le32enc(s + 60, x15 + y15);
193 }
194
195 if (nbytes) {
196 uint8_t buf[64];
197
198 y0 = x0;
199 y1 = x1;
200 y2 = x2;
201 y3 = x3;
202 y4 = x4;
203 y5 = x5;
204 y6 = x6;
205 y7 = x7;
206 y8 = x8;
207 y9 = x9;
208 y10 = x10;
209 y11 = x11;
210 y12 = x12 = blkno;
211 y13 = x13;
212 y14 = x14;
213 y15 = x15;
214 for (i = nr; i > 0; i -= 2) {
215 CHACHA_QUARTERROUND( y0, y4, y8,y12);
216 CHACHA_QUARTERROUND( y1, y5, y9,y13);
217 CHACHA_QUARTERROUND( y2, y6,y10,y14);
218 CHACHA_QUARTERROUND( y3, y7,y11,y15);
219 CHACHA_QUARTERROUND( y0, y5,y10,y15);
220 CHACHA_QUARTERROUND( y1, y6,y11,y12);
221 CHACHA_QUARTERROUND( y2, y7, y8,y13);
222 CHACHA_QUARTERROUND( y3, y4, y9,y14);
223 }
224 le32enc(buf + 0, x0 + y0);
225 le32enc(buf + 4, x1 + y1);
226 le32enc(buf + 8, x2 + y2);
227 le32enc(buf + 12, x3 + y3);
228 le32enc(buf + 16, x4 + y4);
229 le32enc(buf + 20, x5 + y5);
230 le32enc(buf + 24, x6 + y6);
231 le32enc(buf + 28, x7 + y7);
232 le32enc(buf + 32, x8 + y8);
233 le32enc(buf + 36, x9 + y9);
234 le32enc(buf + 40, x10 + y10);
235 le32enc(buf + 44, x11 + y11);
236 le32enc(buf + 48, x12 + y12);
237 le32enc(buf + 52, x13 + y13);
238 le32enc(buf + 56, x14 + y14);
239 le32enc(buf + 60, x15 + y15);
240 memcpy(s, buf, nbytes);
241 }
242 }
243
244 static void
chacha_stream_xor_ref(uint8_t * s,const uint8_t * p,size_t nbytes,uint32_t blkno,const uint8_t nonce[static12],const uint8_t k[static32],unsigned nr)245 chacha_stream_xor_ref(uint8_t *s, const uint8_t *p, size_t nbytes,
246 uint32_t blkno,
247 const uint8_t nonce[static 12],
248 const uint8_t k[static 32],
249 unsigned nr)
250 {
251 const uint8_t *c = chacha_const32;
252 uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
253 uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
254 unsigned i;
255
256 x0 = le32dec(c + 0);
257 x1 = le32dec(c + 4);
258 x2 = le32dec(c + 8);
259 x3 = le32dec(c + 12);
260 x4 = le32dec(k + 0);
261 x5 = le32dec(k + 4);
262 x6 = le32dec(k + 8);
263 x7 = le32dec(k + 12);
264 x8 = le32dec(k + 16);
265 x9 = le32dec(k + 20);
266 x10 = le32dec(k + 24);
267 x11 = le32dec(k + 28);
268 /* x12 = blkno */
269 x13 = le32dec(nonce + 0);
270 x14 = le32dec(nonce + 4);
271 x15 = le32dec(nonce + 8);
272
273 for (; nbytes >= 64; nbytes -= 64, s += 64, p += 64, blkno++) {
274 y0 = x0;
275 y1 = x1;
276 y2 = x2;
277 y3 = x3;
278 y4 = x4;
279 y5 = x5;
280 y6 = x6;
281 y7 = x7;
282 y8 = x8;
283 y9 = x9;
284 y10 = x10;
285 y11 = x11;
286 y12 = x12 = blkno;
287 y13 = x13;
288 y14 = x14;
289 y15 = x15;
290 for (i = nr; i > 0; i -= 2) {
291 CHACHA_QUARTERROUND( y0, y4, y8,y12);
292 CHACHA_QUARTERROUND( y1, y5, y9,y13);
293 CHACHA_QUARTERROUND( y2, y6,y10,y14);
294 CHACHA_QUARTERROUND( y3, y7,y11,y15);
295 CHACHA_QUARTERROUND( y0, y5,y10,y15);
296 CHACHA_QUARTERROUND( y1, y6,y11,y12);
297 CHACHA_QUARTERROUND( y2, y7, y8,y13);
298 CHACHA_QUARTERROUND( y3, y4, y9,y14);
299 }
300 le32enc(s + 0, (x0 + y0) ^ le32dec(p + 0));
301 le32enc(s + 4, (x1 + y1) ^ le32dec(p + 4));
302 le32enc(s + 8, (x2 + y2) ^ le32dec(p + 8));
303 le32enc(s + 12, (x3 + y3) ^ le32dec(p + 12));
304 le32enc(s + 16, (x4 + y4) ^ le32dec(p + 16));
305 le32enc(s + 20, (x5 + y5) ^ le32dec(p + 20));
306 le32enc(s + 24, (x6 + y6) ^ le32dec(p + 24));
307 le32enc(s + 28, (x7 + y7) ^ le32dec(p + 28));
308 le32enc(s + 32, (x8 + y8) ^ le32dec(p + 32));
309 le32enc(s + 36, (x9 + y9) ^ le32dec(p + 36));
310 le32enc(s + 40, (x10 + y10) ^ le32dec(p + 40));
311 le32enc(s + 44, (x11 + y11) ^ le32dec(p + 44));
312 le32enc(s + 48, (x12 + y12) ^ le32dec(p + 48));
313 le32enc(s + 52, (x13 + y13) ^ le32dec(p + 52));
314 le32enc(s + 56, (x14 + y14) ^ le32dec(p + 56));
315 le32enc(s + 60, (x15 + y15) ^ le32dec(p + 60));
316 }
317
318 if (nbytes) {
319 uint8_t buf[64];
320
321 y0 = x0;
322 y1 = x1;
323 y2 = x2;
324 y3 = x3;
325 y4 = x4;
326 y5 = x5;
327 y6 = x6;
328 y7 = x7;
329 y8 = x8;
330 y9 = x9;
331 y10 = x10;
332 y11 = x11;
333 y12 = x12 = blkno;
334 y13 = x13;
335 y14 = x14;
336 y15 = x15;
337 for (i = nr; i > 0; i -= 2) {
338 CHACHA_QUARTERROUND( y0, y4, y8,y12);
339 CHACHA_QUARTERROUND( y1, y5, y9,y13);
340 CHACHA_QUARTERROUND( y2, y6,y10,y14);
341 CHACHA_QUARTERROUND( y3, y7,y11,y15);
342 CHACHA_QUARTERROUND( y0, y5,y10,y15);
343 CHACHA_QUARTERROUND( y1, y6,y11,y12);
344 CHACHA_QUARTERROUND( y2, y7, y8,y13);
345 CHACHA_QUARTERROUND( y3, y4, y9,y14);
346 }
347 le32enc(buf + 0, x0 + y0);
348 le32enc(buf + 4, x1 + y1);
349 le32enc(buf + 8, x2 + y2);
350 le32enc(buf + 12, x3 + y3);
351 le32enc(buf + 16, x4 + y4);
352 le32enc(buf + 20, x5 + y5);
353 le32enc(buf + 24, x6 + y6);
354 le32enc(buf + 28, x7 + y7);
355 le32enc(buf + 32, x8 + y8);
356 le32enc(buf + 36, x9 + y9);
357 le32enc(buf + 40, x10 + y10);
358 le32enc(buf + 44, x11 + y11);
359 le32enc(buf + 48, x12 + y12);
360 le32enc(buf + 52, x13 + y13);
361 le32enc(buf + 56, x14 + y14);
362 le32enc(buf + 60, x15 + y15);
363 for (i = 0; i < nbytes - nbytes%4; i += 4)
364 le32enc(s + i, le32dec(p + i) ^ le32dec(buf + i));
365 for (; i < nbytes; i++)
366 s[i] = p[i] ^ buf[i];
367 }
368 }
369
370 /* HChaCha */
371
372 static void
hchacha_ref(uint8_t out[restrict static32],const uint8_t in[static16],const uint8_t k[static32],const uint8_t c[static16],unsigned nr)373 hchacha_ref(uint8_t out[restrict static 32], const uint8_t in[static 16],
374 const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr)
375 {
376 uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
377
378 y0 = le32dec(c + 0);
379 y1 = le32dec(c + 4);
380 y2 = le32dec(c + 8);
381 y3 = le32dec(c + 12);
382 y4 = le32dec(k + 0);
383 y5 = le32dec(k + 4);
384 y6 = le32dec(k + 8);
385 y7 = le32dec(k + 12);
386 y8 = le32dec(k + 16);
387 y9 = le32dec(k + 20);
388 y10 = le32dec(k + 24);
389 y11 = le32dec(k + 28);
390 y12 = le32dec(in + 0);
391 y13 = le32dec(in + 4);
392 y14 = le32dec(in + 8);
393 y15 = le32dec(in + 12);
394
395 for (; nr > 0; nr -= 2) {
396 CHACHA_QUARTERROUND( y0, y4, y8,y12);
397 CHACHA_QUARTERROUND( y1, y5, y9,y13);
398 CHACHA_QUARTERROUND( y2, y6,y10,y14);
399 CHACHA_QUARTERROUND( y3, y7,y11,y15);
400 CHACHA_QUARTERROUND( y0, y5,y10,y15);
401 CHACHA_QUARTERROUND( y1, y6,y11,y12);
402 CHACHA_QUARTERROUND( y2, y7, y8,y13);
403 CHACHA_QUARTERROUND( y3, y4, y9,y14);
404 }
405
406 le32enc(out + 0, y0);
407 le32enc(out + 4, y1);
408 le32enc(out + 8, y2);
409 le32enc(out + 12, y3);
410 le32enc(out + 16, y12);
411 le32enc(out + 20, y13);
412 le32enc(out + 24, y14);
413 le32enc(out + 28, y15);
414 }
415
416 /* XChaCha stream cipher */
417
418 /* https://tools.ietf.org/html/draft-irtf-cfrg-xchacha-03 */
419
420 static void
xchacha_stream_ref(uint8_t * restrict s,size_t nbytes,uint32_t blkno,const uint8_t nonce[static24],const uint8_t k[static32],unsigned nr)421 xchacha_stream_ref(uint8_t *restrict s, size_t nbytes, uint32_t blkno,
422 const uint8_t nonce[static 24], const uint8_t k[static 32], unsigned nr)
423 {
424 uint8_t subkey[32];
425 uint8_t subnonce[12];
426
427 hchacha_ref(subkey, nonce/*[0:16)*/, k, chacha_const32, nr);
428 memset(subnonce, 0, 4);
429 memcpy(subnonce + 4, nonce + 16, 8);
430 chacha_stream_ref(s, nbytes, blkno, subnonce, subkey, nr);
431 }
432
433 static void
xchacha_stream_xor_ref(uint8_t * restrict c,const uint8_t * p,size_t nbytes,uint32_t blkno,const uint8_t nonce[static24],const uint8_t k[static32],unsigned nr)434 xchacha_stream_xor_ref(uint8_t *restrict c, const uint8_t *p, size_t nbytes,
435 uint32_t blkno,
436 const uint8_t nonce[static 24],
437 const uint8_t k[static 32],
438 unsigned nr)
439 {
440 uint8_t subkey[32];
441 uint8_t subnonce[12];
442
443 hchacha_ref(subkey, nonce/*[0:16)*/, k, chacha_const32, nr);
444 memset(subnonce, 0, 4);
445 memcpy(subnonce + 4, nonce + 16, 8);
446 chacha_stream_xor_ref(c, p, nbytes, blkno, subnonce, subkey, nr);
447 }
448
449 static int
chacha_probe_ref(void)450 chacha_probe_ref(void)
451 {
452
453 /* The reference implementation is always available. */
454 return 0;
455 }
456
457 const struct chacha_impl chacha_ref_impl = {
458 .ci_name = "Portable C ChaCha",
459 .ci_probe = chacha_probe_ref,
460 .ci_chacha_core = chacha_core_ref,
461 .ci_hchacha = hchacha_ref,
462 .ci_chacha_stream = chacha_stream_ref,
463 .ci_chacha_stream_xor = chacha_stream_xor_ref,
464 .ci_xchacha_stream = xchacha_stream_ref,
465 .ci_xchacha_stream_xor = xchacha_stream_xor_ref,
466 };
467