xref: /dflybsd-src/sys/crypto/chacha20/chacha.c (revision 04a3b05a06faf941aa6ec3a48b7ad31d19e75bc0)
1  /*
2  chacha-merged.c version 20080118
3  D. J. Bernstein
4  Public domain.
5  */
6  
7  /* $OpenBSD: chacha.c,v 1.1 2013/11/21 00:45:44 djm Exp $ */
8  
9  #include <sys/param.h>
10  
11  #include "chacha.h"
12  
13  typedef uint8_t u8;
14  typedef uint32_t u32;
15  
16  typedef struct chacha_ctx chacha_ctx;
17  
18  #define U8C(v) (v##U)
19  #define U32C(v) (v##U)
20  
21  #define U8V(v) ((u8)(v) & U8C(0xFF))
22  #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
23  
24  #define ROTL32(v, n) \
25    (U32V((v) << (n)) | ((v) >> (32 - (n))))
26  
27  #define U8TO32_LITTLE(p) \
28    (((u32)((p)[0])      ) | \
29     ((u32)((p)[1]) <<  8) | \
30     ((u32)((p)[2]) << 16) | \
31     ((u32)((p)[3]) << 24))
32  
33  #define U32TO8_LITTLE(p, v) \
34    do { \
35      (p)[0] = U8V((v)      ); \
36      (p)[1] = U8V((v) >>  8); \
37      (p)[2] = U8V((v) >> 16); \
38      (p)[3] = U8V((v) >> 24); \
39    } while (0)
40  
41  #define ROTATE(v,c) (ROTL32(v,c))
42  #define XOR(v,w) ((v) ^ (w))
43  #define PLUS(v,w) (U32V((v) + (w)))
44  #define PLUSONE(v) (PLUS((v),1))
45  
46  #define QUARTERROUND(a,b,c,d) \
47    a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
48    c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
49    a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
50    c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
51  
52  static const char sigma[16] = "expand 32-byte k";
53  static const char tau[16] = "expand 16-byte k";
54  
55  LOCAL void
56  chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits)
57  {
58    const char *constants;
59  
60    x->input[4] = U8TO32_LITTLE(k + 0);
61    x->input[5] = U8TO32_LITTLE(k + 4);
62    x->input[6] = U8TO32_LITTLE(k + 8);
63    x->input[7] = U8TO32_LITTLE(k + 12);
64    if (kbits == 256) { /* recommended */
65      k += 16;
66      constants = sigma;
67    } else { /* kbits == 128 */
68      constants = tau;
69    }
70    x->input[8] = U8TO32_LITTLE(k + 0);
71    x->input[9] = U8TO32_LITTLE(k + 4);
72    x->input[10] = U8TO32_LITTLE(k + 8);
73    x->input[11] = U8TO32_LITTLE(k + 12);
74    x->input[0] = U8TO32_LITTLE(constants + 0);
75    x->input[1] = U8TO32_LITTLE(constants + 4);
76    x->input[2] = U8TO32_LITTLE(constants + 8);
77    x->input[3] = U8TO32_LITTLE(constants + 12);
78  }
79  
80  LOCAL void
81  chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
82  {
83  #ifdef CHACHA_NONCE0_CTR128
84    /* 128-bit counter without IV */
85    (void)iv;
86    x->input[12] = U8TO32_LITTLE(counter + 0);
87    x->input[13] = U8TO32_LITTLE(counter + 4);
88    x->input[14] = U8TO32_LITTLE(counter + 8);
89    x->input[15] = U8TO32_LITTLE(counter + 12);
90  #else
91    /* 64-bit IV and 64-bit counter */
92    x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
93    x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
94    x->input[14] = U8TO32_LITTLE(iv + 0);
95    x->input[15] = U8TO32_LITTLE(iv + 4);
96  #endif
97  }
98  
99  #ifdef CHACHA_NONCE0_CTR128
100  LOCAL void
101  chacha_ctrsave(const chacha_ctx *x, u8 *counter)
102  {
103      U32TO8_LITTLE(counter + 0,  x->input[12]);
104      U32TO8_LITTLE(counter + 4,  x->input[13]);
105      U32TO8_LITTLE(counter + 8,  x->input[14]);
106      U32TO8_LITTLE(counter + 12, x->input[15]);
107  }
108  #endif
109  
110  LOCAL void
111  chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes)
112  {
113    u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
114    u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
115    u8 *ctarget = NULL;
116    u8 tmp[64];
117    u32 i;
118  
119    if (!bytes) return;
120  
121    j0 = x->input[0];
122    j1 = x->input[1];
123    j2 = x->input[2];
124    j3 = x->input[3];
125    j4 = x->input[4];
126    j5 = x->input[5];
127    j6 = x->input[6];
128    j7 = x->input[7];
129    j8 = x->input[8];
130    j9 = x->input[9];
131    j10 = x->input[10];
132    j11 = x->input[11];
133    j12 = x->input[12];
134    j13 = x->input[13];
135    j14 = x->input[14];
136    j15 = x->input[15];
137  
138    for (;;) {
139      if (bytes < 64) {
140  #ifndef KEYSTREAM_ONLY
141        for (i = 0;i < bytes;++i) tmp[i] = m[i];
142        m = tmp;
143  #else
144        (void)m; /* suppress compiler warning */
145  #endif
146        ctarget = c;
147        c = tmp;
148      }
149      x0 = j0;
150      x1 = j1;
151      x2 = j2;
152      x3 = j3;
153      x4 = j4;
154      x5 = j5;
155      x6 = j6;
156      x7 = j7;
157      x8 = j8;
158      x9 = j9;
159      x10 = j10;
160      x11 = j11;
161      x12 = j12;
162      x13 = j13;
163      x14 = j14;
164      x15 = j15;
165      for (i = 20;i > 0;i -= 2) {
166        QUARTERROUND( x0, x4, x8,x12)
167        QUARTERROUND( x1, x5, x9,x13)
168        QUARTERROUND( x2, x6,x10,x14)
169        QUARTERROUND( x3, x7,x11,x15)
170        QUARTERROUND( x0, x5,x10,x15)
171        QUARTERROUND( x1, x6,x11,x12)
172        QUARTERROUND( x2, x7, x8,x13)
173        QUARTERROUND( x3, x4, x9,x14)
174      }
175      x0 = PLUS(x0,j0);
176      x1 = PLUS(x1,j1);
177      x2 = PLUS(x2,j2);
178      x3 = PLUS(x3,j3);
179      x4 = PLUS(x4,j4);
180      x5 = PLUS(x5,j5);
181      x6 = PLUS(x6,j6);
182      x7 = PLUS(x7,j7);
183      x8 = PLUS(x8,j8);
184      x9 = PLUS(x9,j9);
185      x10 = PLUS(x10,j10);
186      x11 = PLUS(x11,j11);
187      x12 = PLUS(x12,j12);
188      x13 = PLUS(x13,j13);
189      x14 = PLUS(x14,j14);
190      x15 = PLUS(x15,j15);
191  
192  #ifndef KEYSTREAM_ONLY
193      x0 = XOR(x0,U8TO32_LITTLE(m + 0));
194      x1 = XOR(x1,U8TO32_LITTLE(m + 4));
195      x2 = XOR(x2,U8TO32_LITTLE(m + 8));
196      x3 = XOR(x3,U8TO32_LITTLE(m + 12));
197      x4 = XOR(x4,U8TO32_LITTLE(m + 16));
198      x5 = XOR(x5,U8TO32_LITTLE(m + 20));
199      x6 = XOR(x6,U8TO32_LITTLE(m + 24));
200      x7 = XOR(x7,U8TO32_LITTLE(m + 28));
201      x8 = XOR(x8,U8TO32_LITTLE(m + 32));
202      x9 = XOR(x9,U8TO32_LITTLE(m + 36));
203      x10 = XOR(x10,U8TO32_LITTLE(m + 40));
204      x11 = XOR(x11,U8TO32_LITTLE(m + 44));
205      x12 = XOR(x12,U8TO32_LITTLE(m + 48));
206      x13 = XOR(x13,U8TO32_LITTLE(m + 52));
207      x14 = XOR(x14,U8TO32_LITTLE(m + 56));
208      x15 = XOR(x15,U8TO32_LITTLE(m + 60));
209  #endif
210  
211      j12 = PLUSONE(j12);
212      if (!j12) {
213        j13 = PLUSONE(j13);
214  #ifdef CHACHA_NONCE0_CTR128
215        if (!j13) {
216          j14 = PLUSONE(j14);
217          if (!j14) {
218            j15 = PLUSONE(j15);
219          }
220        }
221  #else
222        /* stopping at 2^70 bytes per nonce is user's responsibility */
223  #endif
224      }
225  
226      U32TO8_LITTLE(c + 0,x0);
227      U32TO8_LITTLE(c + 4,x1);
228      U32TO8_LITTLE(c + 8,x2);
229      U32TO8_LITTLE(c + 12,x3);
230      U32TO8_LITTLE(c + 16,x4);
231      U32TO8_LITTLE(c + 20,x5);
232      U32TO8_LITTLE(c + 24,x6);
233      U32TO8_LITTLE(c + 28,x7);
234      U32TO8_LITTLE(c + 32,x8);
235      U32TO8_LITTLE(c + 36,x9);
236      U32TO8_LITTLE(c + 40,x10);
237      U32TO8_LITTLE(c + 44,x11);
238      U32TO8_LITTLE(c + 48,x12);
239      U32TO8_LITTLE(c + 52,x13);
240      U32TO8_LITTLE(c + 56,x14);
241      U32TO8_LITTLE(c + 60,x15);
242  
243      if (bytes <= 64) {
244        if (bytes < 64) {
245          for (i = 0;i < bytes;++i) ctarget[i] = c[i];
246        }
247        x->input[12] = j12;
248        x->input[13] = j13;
249  #ifdef CHACHA_NONCE0_CTR128
250        x->input[14] = j14;
251        x->input[15] = j15;
252  #endif
253        return;
254      }
255      bytes -= 64;
256      c += 64;
257  #ifndef KEYSTREAM_ONLY
258      m += 64;
259  #endif
260    }
261  }
262