1 /* $OpenBSD: chacha.c,v 1.2 2023/07/17 05:26:38 djm Exp $ */
2 /*
3 chacha-merged.c version 20080118
4 D. J. Bernstein
5 Public domain.
6 */
7
8 #include "includes.h"
9 __RCSID("$NetBSD: chacha.c,v 1.6 2023/10/25 20:19:57 christos Exp $");
10
11 #include <stdio.h> /* for NULL */
12 #include "chacha.h"
13
14 typedef unsigned char u8;
15 typedef unsigned int u32;
16
17 typedef struct chacha_ctx chacha_ctx;
18
19 #define U8C(v) (v##U)
20 #define U32C(v) (v##U)
21
22 #define U8V(v) ((u8)(v) & U8C(0xFF))
23 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
24
25 #define ROTL32(v, n) \
26 (U32V((v) << (n)) | ((v) >> (32 - (n))))
27
28 #define U8TO32_LITTLE(p) \
29 (((u32)((p)[0]) ) | \
30 ((u32)((p)[1]) << 8) | \
31 ((u32)((p)[2]) << 16) | \
32 ((u32)((p)[3]) << 24))
33
34 #define U32TO8_LITTLE(p, v) \
35 do { \
36 (p)[0] = U8V((v) ); \
37 (p)[1] = U8V((v) >> 8); \
38 (p)[2] = U8V((v) >> 16); \
39 (p)[3] = U8V((v) >> 24); \
40 } while (0)
41
42 #define ROTATE(v,c) (ROTL32(v,c))
43 #define XOR(v,w) ((v) ^ (w))
44 #define PLUS(v,w) (U32V((v) + (w)))
45 #define PLUSONE(v) (PLUS((v),1))
46
47 #define QUARTERROUND(a,b,c,d) \
48 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
49 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
50 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
51 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
52
53 static const char sigma[16] = "expand 32-byte k";
54 static const char tau[16] = "expand 16-byte k";
55
56 void
chacha_keysetup(chacha_ctx * x,const u8 * k,u32 kbits)57 chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
58 {
59 const char *constants;
60
61 x->input[4] = U8TO32_LITTLE(k + 0);
62 x->input[5] = U8TO32_LITTLE(k + 4);
63 x->input[6] = U8TO32_LITTLE(k + 8);
64 x->input[7] = U8TO32_LITTLE(k + 12);
65 if (kbits == 256) { /* recommended */
66 k += 16;
67 constants = sigma;
68 } else { /* kbits == 128 */
69 constants = tau;
70 }
71 x->input[8] = U8TO32_LITTLE(k + 0);
72 x->input[9] = U8TO32_LITTLE(k + 4);
73 x->input[10] = U8TO32_LITTLE(k + 8);
74 x->input[11] = U8TO32_LITTLE(k + 12);
75 x->input[0] = U8TO32_LITTLE(constants + 0);
76 x->input[1] = U8TO32_LITTLE(constants + 4);
77 x->input[2] = U8TO32_LITTLE(constants + 8);
78 x->input[3] = U8TO32_LITTLE(constants + 12);
79 }
80
81 void
chacha_ivsetup(chacha_ctx * x,const u8 * iv,const u8 * counter)82 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
83 {
84 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
85 x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
86 x->input[14] = U8TO32_LITTLE(iv + 0);
87 x->input[15] = U8TO32_LITTLE(iv + 4);
88 }
89
90 void
chacha_encrypt_bytes(chacha_ctx * x,const u8 * m,u8 * c,u32 bytes)91 chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
92 {
93 u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
94 u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
95 u8 *ctarget = NULL;
96 u8 tmp[64];
97 u_int i;
98
99 if (!bytes) return;
100
101 j0 = x->input[0];
102 j1 = x->input[1];
103 j2 = x->input[2];
104 j3 = x->input[3];
105 j4 = x->input[4];
106 j5 = x->input[5];
107 j6 = x->input[6];
108 j7 = x->input[7];
109 j8 = x->input[8];
110 j9 = x->input[9];
111 j10 = x->input[10];
112 j11 = x->input[11];
113 j12 = x->input[12];
114 j13 = x->input[13];
115 j14 = x->input[14];
116 j15 = x->input[15];
117
118 for (;;) {
119 if (bytes < 64) {
120 for (i = 0;i < bytes;++i) tmp[i] = m[i];
121 m = tmp;
122 ctarget = c;
123 c = tmp;
124 }
125 x0 = j0;
126 x1 = j1;
127 x2 = j2;
128 x3 = j3;
129 x4 = j4;
130 x5 = j5;
131 x6 = j6;
132 x7 = j7;
133 x8 = j8;
134 x9 = j9;
135 x10 = j10;
136 x11 = j11;
137 x12 = j12;
138 x13 = j13;
139 x14 = j14;
140 x15 = j15;
141 for (i = 20;i > 0;i -= 2) {
142 QUARTERROUND( x0, x4, x8,x12)
143 QUARTERROUND( x1, x5, x9,x13)
144 QUARTERROUND( x2, x6,x10,x14)
145 QUARTERROUND( x3, x7,x11,x15)
146 QUARTERROUND( x0, x5,x10,x15)
147 QUARTERROUND( x1, x6,x11,x12)
148 QUARTERROUND( x2, x7, x8,x13)
149 QUARTERROUND( x3, x4, x9,x14)
150 }
151 x0 = PLUS(x0,j0);
152 x1 = PLUS(x1,j1);
153 x2 = PLUS(x2,j2);
154 x3 = PLUS(x3,j3);
155 x4 = PLUS(x4,j4);
156 x5 = PLUS(x5,j5);
157 x6 = PLUS(x6,j6);
158 x7 = PLUS(x7,j7);
159 x8 = PLUS(x8,j8);
160 x9 = PLUS(x9,j9);
161 x10 = PLUS(x10,j10);
162 x11 = PLUS(x11,j11);
163 x12 = PLUS(x12,j12);
164 x13 = PLUS(x13,j13);
165 x14 = PLUS(x14,j14);
166 x15 = PLUS(x15,j15);
167
168 x0 = XOR(x0,U8TO32_LITTLE(m + 0));
169 x1 = XOR(x1,U8TO32_LITTLE(m + 4));
170 x2 = XOR(x2,U8TO32_LITTLE(m + 8));
171 x3 = XOR(x3,U8TO32_LITTLE(m + 12));
172 x4 = XOR(x4,U8TO32_LITTLE(m + 16));
173 x5 = XOR(x5,U8TO32_LITTLE(m + 20));
174 x6 = XOR(x6,U8TO32_LITTLE(m + 24));
175 x7 = XOR(x7,U8TO32_LITTLE(m + 28));
176 x8 = XOR(x8,U8TO32_LITTLE(m + 32));
177 x9 = XOR(x9,U8TO32_LITTLE(m + 36));
178 x10 = XOR(x10,U8TO32_LITTLE(m + 40));
179 x11 = XOR(x11,U8TO32_LITTLE(m + 44));
180 x12 = XOR(x12,U8TO32_LITTLE(m + 48));
181 x13 = XOR(x13,U8TO32_LITTLE(m + 52));
182 x14 = XOR(x14,U8TO32_LITTLE(m + 56));
183 x15 = XOR(x15,U8TO32_LITTLE(m + 60));
184
185 j12 = PLUSONE(j12);
186 if (!j12) {
187 j13 = PLUSONE(j13);
188 /* stopping at 2^70 bytes per nonce is user's responsibility */
189 }
190
191 U32TO8_LITTLE(c + 0,x0);
192 U32TO8_LITTLE(c + 4,x1);
193 U32TO8_LITTLE(c + 8,x2);
194 U32TO8_LITTLE(c + 12,x3);
195 U32TO8_LITTLE(c + 16,x4);
196 U32TO8_LITTLE(c + 20,x5);
197 U32TO8_LITTLE(c + 24,x6);
198 U32TO8_LITTLE(c + 28,x7);
199 U32TO8_LITTLE(c + 32,x8);
200 U32TO8_LITTLE(c + 36,x9);
201 U32TO8_LITTLE(c + 40,x10);
202 U32TO8_LITTLE(c + 44,x11);
203 U32TO8_LITTLE(c + 48,x12);
204 U32TO8_LITTLE(c + 52,x13);
205 U32TO8_LITTLE(c + 56,x14);
206 U32TO8_LITTLE(c + 60,x15);
207
208 if (bytes <= 64) {
209 if (bytes < 64) {
210 for (i = 0;i < bytes;++i) ctarget[i] = c[i];
211 }
212 x->input[12] = j12;
213 x->input[13] = j13;
214 return;
215 }
216 bytes -= 64;
217 c += 64;
218 m += 64;
219 }
220 }
221