xref: /netbsd-src/sys/crypto/chacha/chacha_ref.c (revision fa79152618fc8fd4c1019d2f19f11c1e3d1b3013)
1*fa791526Sriastradh /*	$NetBSD: chacha_ref.c,v 1.1 2020/07/25 22:46:34 riastradh Exp $	*/
2*fa791526Sriastradh 
3*fa791526Sriastradh /*-
4*fa791526Sriastradh  * Copyright (c) 2020 The NetBSD Foundation, Inc.
5*fa791526Sriastradh  * All rights reserved.
6*fa791526Sriastradh  *
7*fa791526Sriastradh  * Redistribution and use in source and binary forms, with or without
8*fa791526Sriastradh  * modification, are permitted provided that the following conditions
9*fa791526Sriastradh  * are met:
10*fa791526Sriastradh  * 1. Redistributions of source code must retain the above copyright
11*fa791526Sriastradh  *    notice, this list of conditions and the following disclaimer.
12*fa791526Sriastradh  * 2. Redistributions in binary form must reproduce the above copyright
13*fa791526Sriastradh  *    notice, this list of conditions and the following disclaimer in the
14*fa791526Sriastradh  *    documentation and/or other materials provided with the distribution.
15*fa791526Sriastradh  *
16*fa791526Sriastradh  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17*fa791526Sriastradh  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18*fa791526Sriastradh  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19*fa791526Sriastradh  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20*fa791526Sriastradh  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21*fa791526Sriastradh  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22*fa791526Sriastradh  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23*fa791526Sriastradh  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24*fa791526Sriastradh  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25*fa791526Sriastradh  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26*fa791526Sriastradh  * POSSIBILITY OF SUCH DAMAGE.
27*fa791526Sriastradh  */
28*fa791526Sriastradh 
29*fa791526Sriastradh /*
30*fa791526Sriastradh  * ChaCha pseudorandom function family and stream cipher portable C
31*fa791526Sriastradh  * implementation.  Derived from the specification,
32*fa791526Sriastradh  *
33*fa791526Sriastradh  *	Daniel J. Bernstein, `ChaCha, a variant of Salsa20', Workshop
34*fa791526Sriastradh  *	Record of the State of the Art in Stream Ciphers -- SASC 2008.
35*fa791526Sriastradh  *	https://cr.yp.to/papers.html#chacha
36*fa791526Sriastradh  *
37*fa791526Sriastradh  * which in turn builds on the specification of Salsa20 available at
38*fa791526Sriastradh  * <https://cr.yp.to/snuffle.html>.  The particular parametrization of
39*fa791526Sriastradh  * the stream cipher, with a 32-bit block counter and 96-bit nonce, is
40*fa791526Sriastradh  * described in
41*fa791526Sriastradh  *
42*fa791526Sriastradh  *	Y. Nir and A. Langley, `ChaCha20 and Poly1305 for IETF
43*fa791526Sriastradh  *	Protocols', IETF RFC 8439, June 2018.
44*fa791526Sriastradh  *	https://tools.ietf.org/html/rfc8439
45*fa791526Sriastradh  */
46*fa791526Sriastradh 
47*fa791526Sriastradh #include "chacha_ref.h"
48*fa791526Sriastradh 
49*fa791526Sriastradh static uint32_t
rol32(uint32_t u,unsigned c)50*fa791526Sriastradh rol32(uint32_t u, unsigned c)
51*fa791526Sriastradh {
52*fa791526Sriastradh 
53*fa791526Sriastradh 	return (u << c) | (u >> (32 - c));
54*fa791526Sriastradh }
55*fa791526Sriastradh 
56*fa791526Sriastradh #define	CHACHA_QUARTERROUND(a, b, c, d) do				      \
57*fa791526Sriastradh {									      \
58*fa791526Sriastradh 	(a) += (b); (d) ^= (a); (d) = rol32((d), 16);			      \
59*fa791526Sriastradh 	(c) += (d); (b) ^= (c); (b) = rol32((b), 12);			      \
60*fa791526Sriastradh 	(a) += (b); (d) ^= (a); (d) = rol32((d),  8);			      \
61*fa791526Sriastradh 	(c) += (d); (b) ^= (c); (b) = rol32((b),  7);			      \
62*fa791526Sriastradh } while (/*CONSTCOND*/0)
63*fa791526Sriastradh 
64*fa791526Sriastradh const uint8_t chacha_const32[16] = "expand 32-byte k";
65*fa791526Sriastradh 
66*fa791526Sriastradh static void
chacha_core_ref(uint8_t out[restrict static64],const uint8_t in[static16],const uint8_t k[static32],const uint8_t c[static16],unsigned nr)67*fa791526Sriastradh chacha_core_ref(uint8_t out[restrict static 64], const uint8_t in[static 16],
68*fa791526Sriastradh     const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr)
69*fa791526Sriastradh {
70*fa791526Sriastradh 	uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
71*fa791526Sriastradh 	uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
72*fa791526Sriastradh 
73*fa791526Sriastradh 	x0 = y0 = le32dec(c + 0);
74*fa791526Sriastradh 	x1 = y1 = le32dec(c + 4);
75*fa791526Sriastradh 	x2 = y2 = le32dec(c + 8);
76*fa791526Sriastradh 	x3 = y3 = le32dec(c + 12);
77*fa791526Sriastradh 	x4 = y4 = le32dec(k + 0);
78*fa791526Sriastradh 	x5 = y5 = le32dec(k + 4);
79*fa791526Sriastradh 	x6 = y6 = le32dec(k + 8);
80*fa791526Sriastradh 	x7 = y7 = le32dec(k + 12);
81*fa791526Sriastradh 	x8 = y8 = le32dec(k + 16);
82*fa791526Sriastradh 	x9 = y9 = le32dec(k + 20);
83*fa791526Sriastradh 	x10 = y10 = le32dec(k + 24);
84*fa791526Sriastradh 	x11 = y11 = le32dec(k + 28);
85*fa791526Sriastradh 	x12 = y12 = le32dec(in + 0);
86*fa791526Sriastradh 	x13 = y13 = le32dec(in + 4);
87*fa791526Sriastradh 	x14 = y14 = le32dec(in + 8);
88*fa791526Sriastradh 	x15 = y15 = le32dec(in + 12);
89*fa791526Sriastradh 
90*fa791526Sriastradh 	for (; nr > 0; nr -= 2) {
91*fa791526Sriastradh 		CHACHA_QUARTERROUND( y0, y4, y8,y12);
92*fa791526Sriastradh 		CHACHA_QUARTERROUND( y1, y5, y9,y13);
93*fa791526Sriastradh 		CHACHA_QUARTERROUND( y2, y6,y10,y14);
94*fa791526Sriastradh 		CHACHA_QUARTERROUND( y3, y7,y11,y15);
95*fa791526Sriastradh 		CHACHA_QUARTERROUND( y0, y5,y10,y15);
96*fa791526Sriastradh 		CHACHA_QUARTERROUND( y1, y6,y11,y12);
97*fa791526Sriastradh 		CHACHA_QUARTERROUND( y2, y7, y8,y13);
98*fa791526Sriastradh 		CHACHA_QUARTERROUND( y3, y4, y9,y14);
99*fa791526Sriastradh 	}
100*fa791526Sriastradh 
101*fa791526Sriastradh 	le32enc(out + 0, x0 + y0);
102*fa791526Sriastradh 	le32enc(out + 4, x1 + y1);
103*fa791526Sriastradh 	le32enc(out + 8, x2 + y2);
104*fa791526Sriastradh 	le32enc(out + 12, x3 + y3);
105*fa791526Sriastradh 	le32enc(out + 16, x4 + y4);
106*fa791526Sriastradh 	le32enc(out + 20, x5 + y5);
107*fa791526Sriastradh 	le32enc(out + 24, x6 + y6);
108*fa791526Sriastradh 	le32enc(out + 28, x7 + y7);
109*fa791526Sriastradh 	le32enc(out + 32, x8 + y8);
110*fa791526Sriastradh 	le32enc(out + 36, x9 + y9);
111*fa791526Sriastradh 	le32enc(out + 40, x10 + y10);
112*fa791526Sriastradh 	le32enc(out + 44, x11 + y11);
113*fa791526Sriastradh 	le32enc(out + 48, x12 + y12);
114*fa791526Sriastradh 	le32enc(out + 52, x13 + y13);
115*fa791526Sriastradh 	le32enc(out + 56, x14 + y14);
116*fa791526Sriastradh 	le32enc(out + 60, x15 + y15);
117*fa791526Sriastradh }
118*fa791526Sriastradh 
119*fa791526Sriastradh /* ChaCha stream cipher (IETF style, 96-bit nonce and 32-bit block counter) */
120*fa791526Sriastradh 
121*fa791526Sriastradh static void
chacha_stream_ref(uint8_t * restrict s,size_t nbytes,uint32_t blkno,const uint8_t nonce[static12],const uint8_t k[static32],unsigned nr)122*fa791526Sriastradh chacha_stream_ref(uint8_t *restrict s, size_t nbytes,
123*fa791526Sriastradh     uint32_t blkno,
124*fa791526Sriastradh     const uint8_t nonce[static 12],
125*fa791526Sriastradh     const uint8_t k[static 32],
126*fa791526Sriastradh     unsigned nr)
127*fa791526Sriastradh {
128*fa791526Sriastradh 	const uint8_t *c = chacha_const32;
129*fa791526Sriastradh 	uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
130*fa791526Sriastradh 	uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
131*fa791526Sriastradh 	unsigned i;
132*fa791526Sriastradh 
133*fa791526Sriastradh 	x0 = le32dec(c + 0);
134*fa791526Sriastradh 	x1 = le32dec(c + 4);
135*fa791526Sriastradh 	x2 = le32dec(c + 8);
136*fa791526Sriastradh 	x3 = le32dec(c + 12);
137*fa791526Sriastradh 	x4 = le32dec(k + 0);
138*fa791526Sriastradh 	x5 = le32dec(k + 4);
139*fa791526Sriastradh 	x6 = le32dec(k + 8);
140*fa791526Sriastradh 	x7 = le32dec(k + 12);
141*fa791526Sriastradh 	x8 = le32dec(k + 16);
142*fa791526Sriastradh 	x9 = le32dec(k + 20);
143*fa791526Sriastradh 	x10 = le32dec(k + 24);
144*fa791526Sriastradh 	x11 = le32dec(k + 28);
145*fa791526Sriastradh 	/* x12 = blkno */
146*fa791526Sriastradh 	x13 = le32dec(nonce + 0);
147*fa791526Sriastradh 	x14 = le32dec(nonce + 4);
148*fa791526Sriastradh 	x15 = le32dec(nonce + 8);
149*fa791526Sriastradh 
150*fa791526Sriastradh 	for (; nbytes >= 64; nbytes -= 64, s += 64, blkno++) {
151*fa791526Sriastradh 		y0 = x0;
152*fa791526Sriastradh 		y1 = x1;
153*fa791526Sriastradh 		y2 = x2;
154*fa791526Sriastradh 		y3 = x3;
155*fa791526Sriastradh 		y4 = x4;
156*fa791526Sriastradh 		y5 = x5;
157*fa791526Sriastradh 		y6 = x6;
158*fa791526Sriastradh 		y7 = x7;
159*fa791526Sriastradh 		y8 = x8;
160*fa791526Sriastradh 		y9 = x9;
161*fa791526Sriastradh 		y10 = x10;
162*fa791526Sriastradh 		y11 = x11;
163*fa791526Sriastradh 		y12 = x12 = blkno;
164*fa791526Sriastradh 		y13 = x13;
165*fa791526Sriastradh 		y14 = x14;
166*fa791526Sriastradh 		y15 = x15;
167*fa791526Sriastradh 		for (i = nr; i > 0; i -= 2) {
168*fa791526Sriastradh 			CHACHA_QUARTERROUND( y0, y4, y8,y12);
169*fa791526Sriastradh 			CHACHA_QUARTERROUND( y1, y5, y9,y13);
170*fa791526Sriastradh 			CHACHA_QUARTERROUND( y2, y6,y10,y14);
171*fa791526Sriastradh 			CHACHA_QUARTERROUND( y3, y7,y11,y15);
172*fa791526Sriastradh 			CHACHA_QUARTERROUND( y0, y5,y10,y15);
173*fa791526Sriastradh 			CHACHA_QUARTERROUND( y1, y6,y11,y12);
174*fa791526Sriastradh 			CHACHA_QUARTERROUND( y2, y7, y8,y13);
175*fa791526Sriastradh 			CHACHA_QUARTERROUND( y3, y4, y9,y14);
176*fa791526Sriastradh 		}
177*fa791526Sriastradh 		le32enc(s + 0, x0 + y0);
178*fa791526Sriastradh 		le32enc(s + 4, x1 + y1);
179*fa791526Sriastradh 		le32enc(s + 8, x2 + y2);
180*fa791526Sriastradh 		le32enc(s + 12, x3 + y3);
181*fa791526Sriastradh 		le32enc(s + 16, x4 + y4);
182*fa791526Sriastradh 		le32enc(s + 20, x5 + y5);
183*fa791526Sriastradh 		le32enc(s + 24, x6 + y6);
184*fa791526Sriastradh 		le32enc(s + 28, x7 + y7);
185*fa791526Sriastradh 		le32enc(s + 32, x8 + y8);
186*fa791526Sriastradh 		le32enc(s + 36, x9 + y9);
187*fa791526Sriastradh 		le32enc(s + 40, x10 + y10);
188*fa791526Sriastradh 		le32enc(s + 44, x11 + y11);
189*fa791526Sriastradh 		le32enc(s + 48, x12 + y12);
190*fa791526Sriastradh 		le32enc(s + 52, x13 + y13);
191*fa791526Sriastradh 		le32enc(s + 56, x14 + y14);
192*fa791526Sriastradh 		le32enc(s + 60, x15 + y15);
193*fa791526Sriastradh 	}
194*fa791526Sriastradh 
195*fa791526Sriastradh 	if (nbytes) {
196*fa791526Sriastradh 		uint8_t buf[64];
197*fa791526Sriastradh 
198*fa791526Sriastradh 		y0 = x0;
199*fa791526Sriastradh 		y1 = x1;
200*fa791526Sriastradh 		y2 = x2;
201*fa791526Sriastradh 		y3 = x3;
202*fa791526Sriastradh 		y4 = x4;
203*fa791526Sriastradh 		y5 = x5;
204*fa791526Sriastradh 		y6 = x6;
205*fa791526Sriastradh 		y7 = x7;
206*fa791526Sriastradh 		y8 = x8;
207*fa791526Sriastradh 		y9 = x9;
208*fa791526Sriastradh 		y10 = x10;
209*fa791526Sriastradh 		y11 = x11;
210*fa791526Sriastradh 		y12 = x12 = blkno;
211*fa791526Sriastradh 		y13 = x13;
212*fa791526Sriastradh 		y14 = x14;
213*fa791526Sriastradh 		y15 = x15;
214*fa791526Sriastradh 		for (i = nr; i > 0; i -= 2) {
215*fa791526Sriastradh 			CHACHA_QUARTERROUND( y0, y4, y8,y12);
216*fa791526Sriastradh 			CHACHA_QUARTERROUND( y1, y5, y9,y13);
217*fa791526Sriastradh 			CHACHA_QUARTERROUND( y2, y6,y10,y14);
218*fa791526Sriastradh 			CHACHA_QUARTERROUND( y3, y7,y11,y15);
219*fa791526Sriastradh 			CHACHA_QUARTERROUND( y0, y5,y10,y15);
220*fa791526Sriastradh 			CHACHA_QUARTERROUND( y1, y6,y11,y12);
221*fa791526Sriastradh 			CHACHA_QUARTERROUND( y2, y7, y8,y13);
222*fa791526Sriastradh 			CHACHA_QUARTERROUND( y3, y4, y9,y14);
223*fa791526Sriastradh 		}
224*fa791526Sriastradh 		le32enc(buf + 0, x0 + y0);
225*fa791526Sriastradh 		le32enc(buf + 4, x1 + y1);
226*fa791526Sriastradh 		le32enc(buf + 8, x2 + y2);
227*fa791526Sriastradh 		le32enc(buf + 12, x3 + y3);
228*fa791526Sriastradh 		le32enc(buf + 16, x4 + y4);
229*fa791526Sriastradh 		le32enc(buf + 20, x5 + y5);
230*fa791526Sriastradh 		le32enc(buf + 24, x6 + y6);
231*fa791526Sriastradh 		le32enc(buf + 28, x7 + y7);
232*fa791526Sriastradh 		le32enc(buf + 32, x8 + y8);
233*fa791526Sriastradh 		le32enc(buf + 36, x9 + y9);
234*fa791526Sriastradh 		le32enc(buf + 40, x10 + y10);
235*fa791526Sriastradh 		le32enc(buf + 44, x11 + y11);
236*fa791526Sriastradh 		le32enc(buf + 48, x12 + y12);
237*fa791526Sriastradh 		le32enc(buf + 52, x13 + y13);
238*fa791526Sriastradh 		le32enc(buf + 56, x14 + y14);
239*fa791526Sriastradh 		le32enc(buf + 60, x15 + y15);
240*fa791526Sriastradh 		memcpy(s, buf, nbytes);
241*fa791526Sriastradh 	}
242*fa791526Sriastradh }
243*fa791526Sriastradh 
244*fa791526Sriastradh static void
chacha_stream_xor_ref(uint8_t * s,const uint8_t * p,size_t nbytes,uint32_t blkno,const uint8_t nonce[static12],const uint8_t k[static32],unsigned nr)245*fa791526Sriastradh chacha_stream_xor_ref(uint8_t *s, const uint8_t *p, size_t nbytes,
246*fa791526Sriastradh     uint32_t blkno,
247*fa791526Sriastradh     const uint8_t nonce[static 12],
248*fa791526Sriastradh     const uint8_t k[static 32],
249*fa791526Sriastradh     unsigned nr)
250*fa791526Sriastradh {
251*fa791526Sriastradh 	const uint8_t *c = chacha_const32;
252*fa791526Sriastradh 	uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
253*fa791526Sriastradh 	uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
254*fa791526Sriastradh 	unsigned i;
255*fa791526Sriastradh 
256*fa791526Sriastradh 	x0 = le32dec(c + 0);
257*fa791526Sriastradh 	x1 = le32dec(c + 4);
258*fa791526Sriastradh 	x2 = le32dec(c + 8);
259*fa791526Sriastradh 	x3 = le32dec(c + 12);
260*fa791526Sriastradh 	x4 = le32dec(k + 0);
261*fa791526Sriastradh 	x5 = le32dec(k + 4);
262*fa791526Sriastradh 	x6 = le32dec(k + 8);
263*fa791526Sriastradh 	x7 = le32dec(k + 12);
264*fa791526Sriastradh 	x8 = le32dec(k + 16);
265*fa791526Sriastradh 	x9 = le32dec(k + 20);
266*fa791526Sriastradh 	x10 = le32dec(k + 24);
267*fa791526Sriastradh 	x11 = le32dec(k + 28);
268*fa791526Sriastradh 	/* x12 = blkno */
269*fa791526Sriastradh 	x13 = le32dec(nonce + 0);
270*fa791526Sriastradh 	x14 = le32dec(nonce + 4);
271*fa791526Sriastradh 	x15 = le32dec(nonce + 8);
272*fa791526Sriastradh 
273*fa791526Sriastradh 	for (; nbytes >= 64; nbytes -= 64, s += 64, p += 64, blkno++) {
274*fa791526Sriastradh 		y0 = x0;
275*fa791526Sriastradh 		y1 = x1;
276*fa791526Sriastradh 		y2 = x2;
277*fa791526Sriastradh 		y3 = x3;
278*fa791526Sriastradh 		y4 = x4;
279*fa791526Sriastradh 		y5 = x5;
280*fa791526Sriastradh 		y6 = x6;
281*fa791526Sriastradh 		y7 = x7;
282*fa791526Sriastradh 		y8 = x8;
283*fa791526Sriastradh 		y9 = x9;
284*fa791526Sriastradh 		y10 = x10;
285*fa791526Sriastradh 		y11 = x11;
286*fa791526Sriastradh 		y12 = x12 = blkno;
287*fa791526Sriastradh 		y13 = x13;
288*fa791526Sriastradh 		y14 = x14;
289*fa791526Sriastradh 		y15 = x15;
290*fa791526Sriastradh 		for (i = nr; i > 0; i -= 2) {
291*fa791526Sriastradh 			CHACHA_QUARTERROUND( y0, y4, y8,y12);
292*fa791526Sriastradh 			CHACHA_QUARTERROUND( y1, y5, y9,y13);
293*fa791526Sriastradh 			CHACHA_QUARTERROUND( y2, y6,y10,y14);
294*fa791526Sriastradh 			CHACHA_QUARTERROUND( y3, y7,y11,y15);
295*fa791526Sriastradh 			CHACHA_QUARTERROUND( y0, y5,y10,y15);
296*fa791526Sriastradh 			CHACHA_QUARTERROUND( y1, y6,y11,y12);
297*fa791526Sriastradh 			CHACHA_QUARTERROUND( y2, y7, y8,y13);
298*fa791526Sriastradh 			CHACHA_QUARTERROUND( y3, y4, y9,y14);
299*fa791526Sriastradh 		}
300*fa791526Sriastradh 		le32enc(s + 0, (x0 + y0) ^ le32dec(p + 0));
301*fa791526Sriastradh 		le32enc(s + 4, (x1 + y1) ^ le32dec(p + 4));
302*fa791526Sriastradh 		le32enc(s + 8, (x2 + y2) ^ le32dec(p + 8));
303*fa791526Sriastradh 		le32enc(s + 12, (x3 + y3) ^ le32dec(p + 12));
304*fa791526Sriastradh 		le32enc(s + 16, (x4 + y4) ^ le32dec(p + 16));
305*fa791526Sriastradh 		le32enc(s + 20, (x5 + y5) ^ le32dec(p + 20));
306*fa791526Sriastradh 		le32enc(s + 24, (x6 + y6) ^ le32dec(p + 24));
307*fa791526Sriastradh 		le32enc(s + 28, (x7 + y7) ^ le32dec(p + 28));
308*fa791526Sriastradh 		le32enc(s + 32, (x8 + y8) ^ le32dec(p + 32));
309*fa791526Sriastradh 		le32enc(s + 36, (x9 + y9) ^ le32dec(p + 36));
310*fa791526Sriastradh 		le32enc(s + 40, (x10 + y10) ^ le32dec(p + 40));
311*fa791526Sriastradh 		le32enc(s + 44, (x11 + y11) ^ le32dec(p + 44));
312*fa791526Sriastradh 		le32enc(s + 48, (x12 + y12) ^ le32dec(p + 48));
313*fa791526Sriastradh 		le32enc(s + 52, (x13 + y13) ^ le32dec(p + 52));
314*fa791526Sriastradh 		le32enc(s + 56, (x14 + y14) ^ le32dec(p + 56));
315*fa791526Sriastradh 		le32enc(s + 60, (x15 + y15) ^ le32dec(p + 60));
316*fa791526Sriastradh 	}
317*fa791526Sriastradh 
318*fa791526Sriastradh 	if (nbytes) {
319*fa791526Sriastradh 		uint8_t buf[64];
320*fa791526Sriastradh 
321*fa791526Sriastradh 		y0 = x0;
322*fa791526Sriastradh 		y1 = x1;
323*fa791526Sriastradh 		y2 = x2;
324*fa791526Sriastradh 		y3 = x3;
325*fa791526Sriastradh 		y4 = x4;
326*fa791526Sriastradh 		y5 = x5;
327*fa791526Sriastradh 		y6 = x6;
328*fa791526Sriastradh 		y7 = x7;
329*fa791526Sriastradh 		y8 = x8;
330*fa791526Sriastradh 		y9 = x9;
331*fa791526Sriastradh 		y10 = x10;
332*fa791526Sriastradh 		y11 = x11;
333*fa791526Sriastradh 		y12 = x12 = blkno;
334*fa791526Sriastradh 		y13 = x13;
335*fa791526Sriastradh 		y14 = x14;
336*fa791526Sriastradh 		y15 = x15;
337*fa791526Sriastradh 		for (i = nr; i > 0; i -= 2) {
338*fa791526Sriastradh 			CHACHA_QUARTERROUND( y0, y4, y8,y12);
339*fa791526Sriastradh 			CHACHA_QUARTERROUND( y1, y5, y9,y13);
340*fa791526Sriastradh 			CHACHA_QUARTERROUND( y2, y6,y10,y14);
341*fa791526Sriastradh 			CHACHA_QUARTERROUND( y3, y7,y11,y15);
342*fa791526Sriastradh 			CHACHA_QUARTERROUND( y0, y5,y10,y15);
343*fa791526Sriastradh 			CHACHA_QUARTERROUND( y1, y6,y11,y12);
344*fa791526Sriastradh 			CHACHA_QUARTERROUND( y2, y7, y8,y13);
345*fa791526Sriastradh 			CHACHA_QUARTERROUND( y3, y4, y9,y14);
346*fa791526Sriastradh 		}
347*fa791526Sriastradh 		le32enc(buf + 0, x0 + y0);
348*fa791526Sriastradh 		le32enc(buf + 4, x1 + y1);
349*fa791526Sriastradh 		le32enc(buf + 8, x2 + y2);
350*fa791526Sriastradh 		le32enc(buf + 12, x3 + y3);
351*fa791526Sriastradh 		le32enc(buf + 16, x4 + y4);
352*fa791526Sriastradh 		le32enc(buf + 20, x5 + y5);
353*fa791526Sriastradh 		le32enc(buf + 24, x6 + y6);
354*fa791526Sriastradh 		le32enc(buf + 28, x7 + y7);
355*fa791526Sriastradh 		le32enc(buf + 32, x8 + y8);
356*fa791526Sriastradh 		le32enc(buf + 36, x9 + y9);
357*fa791526Sriastradh 		le32enc(buf + 40, x10 + y10);
358*fa791526Sriastradh 		le32enc(buf + 44, x11 + y11);
359*fa791526Sriastradh 		le32enc(buf + 48, x12 + y12);
360*fa791526Sriastradh 		le32enc(buf + 52, x13 + y13);
361*fa791526Sriastradh 		le32enc(buf + 56, x14 + y14);
362*fa791526Sriastradh 		le32enc(buf + 60, x15 + y15);
363*fa791526Sriastradh 		for (i = 0; i < nbytes - nbytes%4; i += 4)
364*fa791526Sriastradh 			le32enc(s + i, le32dec(p + i) ^ le32dec(buf + i));
365*fa791526Sriastradh 		for (; i < nbytes; i++)
366*fa791526Sriastradh 			s[i] = p[i] ^ buf[i];
367*fa791526Sriastradh 	}
368*fa791526Sriastradh }
369*fa791526Sriastradh 
370*fa791526Sriastradh /* HChaCha */
371*fa791526Sriastradh 
372*fa791526Sriastradh static void
hchacha_ref(uint8_t out[restrict static32],const uint8_t in[static16],const uint8_t k[static32],const uint8_t c[static16],unsigned nr)373*fa791526Sriastradh hchacha_ref(uint8_t out[restrict static 32], const uint8_t in[static 16],
374*fa791526Sriastradh     const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr)
375*fa791526Sriastradh {
376*fa791526Sriastradh 	uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
377*fa791526Sriastradh 
378*fa791526Sriastradh 	y0 = le32dec(c + 0);
379*fa791526Sriastradh 	y1 = le32dec(c + 4);
380*fa791526Sriastradh 	y2 = le32dec(c + 8);
381*fa791526Sriastradh 	y3 = le32dec(c + 12);
382*fa791526Sriastradh 	y4 = le32dec(k + 0);
383*fa791526Sriastradh 	y5 = le32dec(k + 4);
384*fa791526Sriastradh 	y6 = le32dec(k + 8);
385*fa791526Sriastradh 	y7 = le32dec(k + 12);
386*fa791526Sriastradh 	y8 = le32dec(k + 16);
387*fa791526Sriastradh 	y9 = le32dec(k + 20);
388*fa791526Sriastradh 	y10 = le32dec(k + 24);
389*fa791526Sriastradh 	y11 = le32dec(k + 28);
390*fa791526Sriastradh 	y12 = le32dec(in + 0);
391*fa791526Sriastradh 	y13 = le32dec(in + 4);
392*fa791526Sriastradh 	y14 = le32dec(in + 8);
393*fa791526Sriastradh 	y15 = le32dec(in + 12);
394*fa791526Sriastradh 
395*fa791526Sriastradh 	for (; nr > 0; nr -= 2) {
396*fa791526Sriastradh 		CHACHA_QUARTERROUND( y0, y4, y8,y12);
397*fa791526Sriastradh 		CHACHA_QUARTERROUND( y1, y5, y9,y13);
398*fa791526Sriastradh 		CHACHA_QUARTERROUND( y2, y6,y10,y14);
399*fa791526Sriastradh 		CHACHA_QUARTERROUND( y3, y7,y11,y15);
400*fa791526Sriastradh 		CHACHA_QUARTERROUND( y0, y5,y10,y15);
401*fa791526Sriastradh 		CHACHA_QUARTERROUND( y1, y6,y11,y12);
402*fa791526Sriastradh 		CHACHA_QUARTERROUND( y2, y7, y8,y13);
403*fa791526Sriastradh 		CHACHA_QUARTERROUND( y3, y4, y9,y14);
404*fa791526Sriastradh 	}
405*fa791526Sriastradh 
406*fa791526Sriastradh 	le32enc(out + 0, y0);
407*fa791526Sriastradh 	le32enc(out + 4, y1);
408*fa791526Sriastradh 	le32enc(out + 8, y2);
409*fa791526Sriastradh 	le32enc(out + 12, y3);
410*fa791526Sriastradh 	le32enc(out + 16, y12);
411*fa791526Sriastradh 	le32enc(out + 20, y13);
412*fa791526Sriastradh 	le32enc(out + 24, y14);
413*fa791526Sriastradh 	le32enc(out + 28, y15);
414*fa791526Sriastradh }
415*fa791526Sriastradh 
416*fa791526Sriastradh /* XChaCha stream cipher */
417*fa791526Sriastradh 
418*fa791526Sriastradh /* https://tools.ietf.org/html/draft-irtf-cfrg-xchacha-03 */
419*fa791526Sriastradh 
420*fa791526Sriastradh static void
xchacha_stream_ref(uint8_t * restrict s,size_t nbytes,uint32_t blkno,const uint8_t nonce[static24],const uint8_t k[static32],unsigned nr)421*fa791526Sriastradh xchacha_stream_ref(uint8_t *restrict s, size_t nbytes, uint32_t blkno,
422*fa791526Sriastradh     const uint8_t nonce[static 24], const uint8_t k[static 32], unsigned nr)
423*fa791526Sriastradh {
424*fa791526Sriastradh 	uint8_t subkey[32];
425*fa791526Sriastradh 	uint8_t subnonce[12];
426*fa791526Sriastradh 
427*fa791526Sriastradh 	hchacha_ref(subkey, nonce/*[0:16)*/, k, chacha_const32, nr);
428*fa791526Sriastradh 	memset(subnonce, 0, 4);
429*fa791526Sriastradh 	memcpy(subnonce + 4, nonce + 16, 8);
430*fa791526Sriastradh 	chacha_stream_ref(s, nbytes, blkno, subnonce, subkey, nr);
431*fa791526Sriastradh }
432*fa791526Sriastradh 
433*fa791526Sriastradh static void
xchacha_stream_xor_ref(uint8_t * restrict c,const uint8_t * p,size_t nbytes,uint32_t blkno,const uint8_t nonce[static24],const uint8_t k[static32],unsigned nr)434*fa791526Sriastradh xchacha_stream_xor_ref(uint8_t *restrict c, const uint8_t *p, size_t nbytes,
435*fa791526Sriastradh     uint32_t blkno,
436*fa791526Sriastradh     const uint8_t nonce[static 24],
437*fa791526Sriastradh     const uint8_t k[static 32],
438*fa791526Sriastradh     unsigned nr)
439*fa791526Sriastradh {
440*fa791526Sriastradh 	uint8_t subkey[32];
441*fa791526Sriastradh 	uint8_t subnonce[12];
442*fa791526Sriastradh 
443*fa791526Sriastradh 	hchacha_ref(subkey, nonce/*[0:16)*/, k, chacha_const32, nr);
444*fa791526Sriastradh 	memset(subnonce, 0, 4);
445*fa791526Sriastradh 	memcpy(subnonce + 4, nonce + 16, 8);
446*fa791526Sriastradh 	chacha_stream_xor_ref(c, p, nbytes, blkno, subnonce, subkey, nr);
447*fa791526Sriastradh }
448*fa791526Sriastradh 
449*fa791526Sriastradh static int
chacha_probe_ref(void)450*fa791526Sriastradh chacha_probe_ref(void)
451*fa791526Sriastradh {
452*fa791526Sriastradh 
453*fa791526Sriastradh 	/* The reference implementation is always available.  */
454*fa791526Sriastradh 	return 0;
455*fa791526Sriastradh }
456*fa791526Sriastradh 
457*fa791526Sriastradh const struct chacha_impl chacha_ref_impl = {
458*fa791526Sriastradh 	.ci_name = "Portable C ChaCha",
459*fa791526Sriastradh 	.ci_probe = chacha_probe_ref,
460*fa791526Sriastradh 	.ci_chacha_core = chacha_core_ref,
461*fa791526Sriastradh 	.ci_hchacha = hchacha_ref,
462*fa791526Sriastradh 	.ci_chacha_stream = chacha_stream_ref,
463*fa791526Sriastradh 	.ci_chacha_stream_xor = chacha_stream_xor_ref,
464*fa791526Sriastradh 	.ci_xchacha_stream = xchacha_stream_ref,
465*fa791526Sriastradh 	.ci_xchacha_stream_xor = xchacha_stream_xor_ref,
466*fa791526Sriastradh };
467