1 /* 2 chacha-merged.c version 20080118 3 D. J. Bernstein 4 Public domain. 5 */ 6 7 #include "includes.h" 8 __RCSID("$NetBSD: chacha.c,v 1.5 2017/04/18 18:41:46 christos Exp $"); 9 10 #include <stdio.h> /* for NULL */ 11 #include "chacha.h" 12 13 /* $OpenBSD: chacha.c,v 1.1 2013/11/21 00:45:44 djm Exp $ */ 14 15 typedef unsigned char u8; 16 typedef unsigned int u32; 17 18 typedef struct chacha_ctx chacha_ctx; 19 20 #define U8C(v) (v##U) 21 #define U32C(v) (v##U) 22 23 #define U8V(v) ((u8)(v) & U8C(0xFF)) 24 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) 25 26 #define ROTL32(v, n) \ 27 (U32V((v) << (n)) | ((v) >> (32 - (n)))) 28 29 #define U8TO32_LITTLE(p) \ 30 (((u32)((p)[0]) ) | \ 31 ((u32)((p)[1]) << 8) | \ 32 ((u32)((p)[2]) << 16) | \ 33 ((u32)((p)[3]) << 24)) 34 35 #define U32TO8_LITTLE(p, v) \ 36 do { \ 37 (p)[0] = U8V((v) ); \ 38 (p)[1] = U8V((v) >> 8); \ 39 (p)[2] = U8V((v) >> 16); \ 40 (p)[3] = U8V((v) >> 24); \ 41 } while (0) 42 43 #define ROTATE(v,c) (ROTL32(v,c)) 44 #define XOR(v,w) ((v) ^ (w)) 45 #define PLUS(v,w) (U32V((v) + (w))) 46 #define PLUSONE(v) (PLUS((v),1)) 47 48 #define QUARTERROUND(a,b,c,d) \ 49 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ 50 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ 51 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ 52 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); 53 54 static const char sigma[16] = "expand 32-byte k"; 55 static const char tau[16] = "expand 16-byte k"; 56 57 void 58 chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits) 59 { 60 const char *constants; 61 62 x->input[4] = U8TO32_LITTLE(k + 0); 63 x->input[5] = U8TO32_LITTLE(k + 4); 64 x->input[6] = U8TO32_LITTLE(k + 8); 65 x->input[7] = U8TO32_LITTLE(k + 12); 66 if (kbits == 256) { /* recommended */ 67 k += 16; 68 constants = sigma; 69 } else { /* kbits == 128 */ 70 constants = tau; 71 } 72 x->input[8] = U8TO32_LITTLE(k + 0); 73 x->input[9] = U8TO32_LITTLE(k + 4); 74 x->input[10] = U8TO32_LITTLE(k + 8); 75 x->input[11] = U8TO32_LITTLE(k + 12); 76 x->input[0] = U8TO32_LITTLE(constants + 0); 77 x->input[1] = U8TO32_LITTLE(constants + 4); 78 x->input[2] = U8TO32_LITTLE(constants + 8); 79 x->input[3] = U8TO32_LITTLE(constants + 12); 80 } 81 82 void 83 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter) 84 { 85 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0); 86 x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4); 87 x->input[14] = U8TO32_LITTLE(iv + 0); 88 x->input[15] = U8TO32_LITTLE(iv + 4); 89 } 90 91 void 92 chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) 93 { 94 u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; 95 u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; 96 u8 *ctarget = NULL; 97 u8 tmp[64]; 98 u_int i; 99 100 if (!bytes) return; 101 102 j0 = x->input[0]; 103 j1 = x->input[1]; 104 j2 = x->input[2]; 105 j3 = x->input[3]; 106 j4 = x->input[4]; 107 j5 = x->input[5]; 108 j6 = x->input[6]; 109 j7 = x->input[7]; 110 j8 = x->input[8]; 111 j9 = x->input[9]; 112 j10 = x->input[10]; 113 j11 = x->input[11]; 114 j12 = x->input[12]; 115 j13 = x->input[13]; 116 j14 = x->input[14]; 117 j15 = x->input[15]; 118 119 for (;;) { 120 if (bytes < 64) { 121 for (i = 0;i < bytes;++i) tmp[i] = m[i]; 122 m = tmp; 123 ctarget = c; 124 c = tmp; 125 } 126 x0 = j0; 127 x1 = j1; 128 x2 = j2; 129 x3 = j3; 130 x4 = j4; 131 x5 = j5; 132 x6 = j6; 133 x7 = j7; 134 x8 = j8; 135 x9 = j9; 136 x10 = j10; 137 x11 = j11; 138 x12 = j12; 139 x13 = j13; 140 x14 = j14; 141 x15 = j15; 142 for (i = 20;i > 0;i -= 2) { 143 QUARTERROUND( x0, x4, x8,x12) 144 QUARTERROUND( x1, x5, x9,x13) 145 QUARTERROUND( x2, x6,x10,x14) 146 QUARTERROUND( x3, x7,x11,x15) 147 QUARTERROUND( x0, x5,x10,x15) 148 QUARTERROUND( x1, x6,x11,x12) 149 QUARTERROUND( x2, x7, x8,x13) 150 QUARTERROUND( x3, x4, x9,x14) 151 } 152 x0 = PLUS(x0,j0); 153 x1 = PLUS(x1,j1); 154 x2 = PLUS(x2,j2); 155 x3 = PLUS(x3,j3); 156 x4 = PLUS(x4,j4); 157 x5 = PLUS(x5,j5); 158 x6 = PLUS(x6,j6); 159 x7 = PLUS(x7,j7); 160 x8 = PLUS(x8,j8); 161 x9 = PLUS(x9,j9); 162 x10 = PLUS(x10,j10); 163 x11 = PLUS(x11,j11); 164 x12 = PLUS(x12,j12); 165 x13 = PLUS(x13,j13); 166 x14 = PLUS(x14,j14); 167 x15 = PLUS(x15,j15); 168 169 x0 = XOR(x0,U8TO32_LITTLE(m + 0)); 170 x1 = XOR(x1,U8TO32_LITTLE(m + 4)); 171 x2 = XOR(x2,U8TO32_LITTLE(m + 8)); 172 x3 = XOR(x3,U8TO32_LITTLE(m + 12)); 173 x4 = XOR(x4,U8TO32_LITTLE(m + 16)); 174 x5 = XOR(x5,U8TO32_LITTLE(m + 20)); 175 x6 = XOR(x6,U8TO32_LITTLE(m + 24)); 176 x7 = XOR(x7,U8TO32_LITTLE(m + 28)); 177 x8 = XOR(x8,U8TO32_LITTLE(m + 32)); 178 x9 = XOR(x9,U8TO32_LITTLE(m + 36)); 179 x10 = XOR(x10,U8TO32_LITTLE(m + 40)); 180 x11 = XOR(x11,U8TO32_LITTLE(m + 44)); 181 x12 = XOR(x12,U8TO32_LITTLE(m + 48)); 182 x13 = XOR(x13,U8TO32_LITTLE(m + 52)); 183 x14 = XOR(x14,U8TO32_LITTLE(m + 56)); 184 x15 = XOR(x15,U8TO32_LITTLE(m + 60)); 185 186 j12 = PLUSONE(j12); 187 if (!j12) { 188 j13 = PLUSONE(j13); 189 /* stopping at 2^70 bytes per nonce is user's responsibility */ 190 } 191 192 U32TO8_LITTLE(c + 0,x0); 193 U32TO8_LITTLE(c + 4,x1); 194 U32TO8_LITTLE(c + 8,x2); 195 U32TO8_LITTLE(c + 12,x3); 196 U32TO8_LITTLE(c + 16,x4); 197 U32TO8_LITTLE(c + 20,x5); 198 U32TO8_LITTLE(c + 24,x6); 199 U32TO8_LITTLE(c + 28,x7); 200 U32TO8_LITTLE(c + 32,x8); 201 U32TO8_LITTLE(c + 36,x9); 202 U32TO8_LITTLE(c + 40,x10); 203 U32TO8_LITTLE(c + 44,x11); 204 U32TO8_LITTLE(c + 48,x12); 205 U32TO8_LITTLE(c + 52,x13); 206 U32TO8_LITTLE(c + 56,x14); 207 U32TO8_LITTLE(c + 60,x15); 208 209 if (bytes <= 64) { 210 if (bytes < 64) { 211 for (i = 0;i < bytes;++i) ctarget[i] = c[i]; 212 } 213 x->input[12] = j12; 214 x->input[13] = j13; 215 return; 216 } 217 bytes -= 64; 218 c += 64; 219 m += 64; 220 } 221 } 222