1 /* 2 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the OpenSSL license (the "License"). You may not use 5 * this file except in compliance with the License. You can obtain a copy 6 * in the file LICENSE in the source distribution or at 7 * https://www.openssl.org/source/license.html 8 */ 9 10 #include <openssl/crypto.h> 11 #include "modes_local.h" 12 #include <string.h> 13 14 #if defined(__GNUC__) && !defined(STRICT_ALIGNMENT) 15 typedef size_t size_t_aX __attribute((__aligned__(1))); 16 #else 17 typedef size_t size_t_aX; 18 #endif 19 20 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT) 21 /* redefine, because alignment is ensured */ 22 # undef GETU32 23 # define GETU32(p) BSWAP4(*(const u32 *)(p)) 24 # undef PUTU32 25 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) 26 #endif 27 28 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16)) 29 #define REDUCE1BIT(V) do { \ 30 if (sizeof(size_t)==8) { \ 31 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \ 32 V.lo = (V.hi<<63)|(V.lo>>1); \ 33 V.hi = (V.hi>>1 )^T; \ 34 } \ 35 else { \ 36 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \ 37 V.lo = (V.hi<<63)|(V.lo>>1); \ 38 V.hi = (V.hi>>1 )^((u64)T<<32); \ 39 } \ 40 } while(0) 41 42 /*- 43 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should 44 * never be set to 8. 8 is effectively reserved for testing purposes. 45 * TABLE_BITS>1 are lookup-table-driven implementations referred to as 46 * "Shoup's" in GCM specification. In other words OpenSSL does not cover 47 * whole spectrum of possible table driven implementations. Why? In 48 * non-"Shoup's" case memory access pattern is segmented in such manner, 49 * that it's trivial to see that cache timing information can reveal 50 * fair portion of intermediate hash value. Given that ciphertext is 51 * always available to attacker, it's possible for him to attempt to 52 * deduce secret parameter H and if successful, tamper with messages 53 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's 54 * not as trivial, but there is no reason to believe that it's resistant 55 * to cache-timing attack. And the thing about "8-bit" implementation is 56 * that it consumes 16 (sixteen) times more memory, 4KB per individual 57 * key + 1KB shared. Well, on pros side it should be twice as fast as 58 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version 59 * was observed to run ~75% faster, closer to 100% for commercial 60 * compilers... Yet "4-bit" procedure is preferred, because it's 61 * believed to provide better security-performance balance and adequate 62 * all-round performance. "All-round" refers to things like: 63 * 64 * - shorter setup time effectively improves overall timing for 65 * handling short messages; 66 * - larger table allocation can become unbearable because of VM 67 * subsystem penalties (for example on Windows large enough free 68 * results in VM working set trimming, meaning that consequent 69 * malloc would immediately incur working set expansion); 70 * - larger table has larger cache footprint, which can affect 71 * performance of other code paths (not necessarily even from same 72 * thread in Hyper-Threading world); 73 * 74 * Value of 1 is not appropriate for performance reasons. 75 */ 76 #if TABLE_BITS==8 77 78 static void gcm_init_8bit(u128 Htable[256], u64 H[2]) 79 { 80 int i, j; 81 u128 V; 82 83 Htable[0].hi = 0; 84 Htable[0].lo = 0; 85 V.hi = H[0]; 86 V.lo = H[1]; 87 88 for (Htable[128] = V, i = 64; i > 0; i >>= 1) { 89 REDUCE1BIT(V); 90 Htable[i] = V; 91 } 92 93 for (i = 2; i < 256; i <<= 1) { 94 u128 *Hi = Htable + i, H0 = *Hi; 95 for (j = 1; j < i; ++j) { 96 Hi[j].hi = H0.hi ^ Htable[j].hi; 97 Hi[j].lo = H0.lo ^ Htable[j].lo; 98 } 99 } 100 } 101 102 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256]) 103 { 104 u128 Z = { 0, 0 }; 105 const u8 *xi = (const u8 *)Xi + 15; 106 size_t rem, n = *xi; 107 const union { 108 long one; 109 char little; 110 } is_endian = { 1 }; 111 static const size_t rem_8bit[256] = { 112 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246), 113 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E), 114 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56), 115 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E), 116 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66), 117 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E), 118 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076), 119 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E), 120 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06), 121 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E), 122 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416), 123 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E), 124 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626), 125 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E), 126 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836), 127 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E), 128 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6), 129 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE), 130 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6), 131 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE), 132 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6), 133 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE), 134 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6), 135 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE), 136 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86), 137 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E), 138 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496), 139 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E), 140 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6), 141 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE), 142 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6), 143 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE), 144 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346), 145 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E), 146 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56), 147 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E), 148 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66), 149 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E), 150 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176), 151 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E), 152 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06), 153 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E), 154 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516), 155 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E), 156 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726), 157 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E), 158 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936), 159 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E), 160 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6), 161 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE), 162 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6), 163 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE), 164 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6), 165 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE), 166 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6), 167 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE), 168 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86), 169 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E), 170 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596), 171 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E), 172 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6), 173 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE), 174 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6), 175 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) 176 }; 177 178 while (1) { 179 Z.hi ^= Htable[n].hi; 180 Z.lo ^= Htable[n].lo; 181 182 if ((u8 *)Xi == xi) 183 break; 184 185 n = *(--xi); 186 187 rem = (size_t)Z.lo & 0xff; 188 Z.lo = (Z.hi << 56) | (Z.lo >> 8); 189 Z.hi = (Z.hi >> 8); 190 if (sizeof(size_t) == 8) 191 Z.hi ^= rem_8bit[rem]; 192 else 193 Z.hi ^= (u64)rem_8bit[rem] << 32; 194 } 195 196 if (is_endian.little) { 197 # ifdef BSWAP8 198 Xi[0] = BSWAP8(Z.hi); 199 Xi[1] = BSWAP8(Z.lo); 200 # else 201 u8 *p = (u8 *)Xi; 202 u32 v; 203 v = (u32)(Z.hi >> 32); 204 PUTU32(p, v); 205 v = (u32)(Z.hi); 206 PUTU32(p + 4, v); 207 v = (u32)(Z.lo >> 32); 208 PUTU32(p + 8, v); 209 v = (u32)(Z.lo); 210 PUTU32(p + 12, v); 211 # endif 212 } else { 213 Xi[0] = Z.hi; 214 Xi[1] = Z.lo; 215 } 216 } 217 218 # define GCM_MUL(ctx) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable) 219 220 #elif TABLE_BITS==4 221 222 static void gcm_init_4bit(u128 Htable[16], u64 H[2]) 223 { 224 u128 V; 225 # if defined(OPENSSL_SMALL_FOOTPRINT) 226 int i; 227 # endif 228 229 Htable[0].hi = 0; 230 Htable[0].lo = 0; 231 V.hi = H[0]; 232 V.lo = H[1]; 233 234 # if defined(OPENSSL_SMALL_FOOTPRINT) 235 for (Htable[8] = V, i = 4; i > 0; i >>= 1) { 236 REDUCE1BIT(V); 237 Htable[i] = V; 238 } 239 240 for (i = 2; i < 16; i <<= 1) { 241 u128 *Hi = Htable + i; 242 int j; 243 for (V = *Hi, j = 1; j < i; ++j) { 244 Hi[j].hi = V.hi ^ Htable[j].hi; 245 Hi[j].lo = V.lo ^ Htable[j].lo; 246 } 247 } 248 # else 249 Htable[8] = V; 250 REDUCE1BIT(V); 251 Htable[4] = V; 252 REDUCE1BIT(V); 253 Htable[2] = V; 254 REDUCE1BIT(V); 255 Htable[1] = V; 256 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo; 257 V = Htable[4]; 258 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo; 259 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo; 260 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo; 261 V = Htable[8]; 262 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo; 263 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo; 264 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo; 265 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo; 266 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo; 267 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo; 268 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo; 269 # endif 270 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm)) 271 /* 272 * ARM assembler expects specific dword order in Htable. 273 */ 274 { 275 int j; 276 const union { 277 long one; 278 char little; 279 } is_endian = { 1 }; 280 281 if (is_endian.little) 282 for (j = 0; j < 16; ++j) { 283 V = Htable[j]; 284 Htable[j].hi = V.lo; 285 Htable[j].lo = V.hi; 286 } else 287 for (j = 0; j < 16; ++j) { 288 V = Htable[j]; 289 Htable[j].hi = V.lo << 32 | V.lo >> 32; 290 Htable[j].lo = V.hi << 32 | V.hi >> 32; 291 } 292 } 293 # endif 294 } 295 296 # ifndef GHASH_ASM 297 static const size_t rem_4bit[16] = { 298 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460), 299 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0), 300 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), 301 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) 302 }; 303 304 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) 305 { 306 u128 Z; 307 int cnt = 15; 308 size_t rem, nlo, nhi; 309 const union { 310 long one; 311 char little; 312 } is_endian = { 1 }; 313 314 nlo = ((const u8 *)Xi)[15]; 315 nhi = nlo >> 4; 316 nlo &= 0xf; 317 318 Z.hi = Htable[nlo].hi; 319 Z.lo = Htable[nlo].lo; 320 321 while (1) { 322 rem = (size_t)Z.lo & 0xf; 323 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 324 Z.hi = (Z.hi >> 4); 325 if (sizeof(size_t) == 8) 326 Z.hi ^= rem_4bit[rem]; 327 else 328 Z.hi ^= (u64)rem_4bit[rem] << 32; 329 330 Z.hi ^= Htable[nhi].hi; 331 Z.lo ^= Htable[nhi].lo; 332 333 if (--cnt < 0) 334 break; 335 336 nlo = ((const u8 *)Xi)[cnt]; 337 nhi = nlo >> 4; 338 nlo &= 0xf; 339 340 rem = (size_t)Z.lo & 0xf; 341 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 342 Z.hi = (Z.hi >> 4); 343 if (sizeof(size_t) == 8) 344 Z.hi ^= rem_4bit[rem]; 345 else 346 Z.hi ^= (u64)rem_4bit[rem] << 32; 347 348 Z.hi ^= Htable[nlo].hi; 349 Z.lo ^= Htable[nlo].lo; 350 } 351 352 if (is_endian.little) { 353 # ifdef BSWAP8 354 Xi[0] = BSWAP8(Z.hi); 355 Xi[1] = BSWAP8(Z.lo); 356 # else 357 u8 *p = (u8 *)Xi; 358 u32 v; 359 v = (u32)(Z.hi >> 32); 360 PUTU32(p, v); 361 v = (u32)(Z.hi); 362 PUTU32(p + 4, v); 363 v = (u32)(Z.lo >> 32); 364 PUTU32(p + 8, v); 365 v = (u32)(Z.lo); 366 PUTU32(p + 12, v); 367 # endif 368 } else { 369 Xi[0] = Z.hi; 370 Xi[1] = Z.lo; 371 } 372 } 373 374 # if !defined(OPENSSL_SMALL_FOOTPRINT) 375 /* 376 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for 377 * details... Compiler-generated code doesn't seem to give any 378 * performance improvement, at least not on x86[_64]. It's here 379 * mostly as reference and a placeholder for possible future 380 * non-trivial optimization[s]... 381 */ 382 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], 383 const u8 *inp, size_t len) 384 { 385 u128 Z; 386 int cnt; 387 size_t rem, nlo, nhi; 388 const union { 389 long one; 390 char little; 391 } is_endian = { 1 }; 392 393 # if 1 394 do { 395 cnt = 15; 396 nlo = ((const u8 *)Xi)[15]; 397 nlo ^= inp[15]; 398 nhi = nlo >> 4; 399 nlo &= 0xf; 400 401 Z.hi = Htable[nlo].hi; 402 Z.lo = Htable[nlo].lo; 403 404 while (1) { 405 rem = (size_t)Z.lo & 0xf; 406 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 407 Z.hi = (Z.hi >> 4); 408 if (sizeof(size_t) == 8) 409 Z.hi ^= rem_4bit[rem]; 410 else 411 Z.hi ^= (u64)rem_4bit[rem] << 32; 412 413 Z.hi ^= Htable[nhi].hi; 414 Z.lo ^= Htable[nhi].lo; 415 416 if (--cnt < 0) 417 break; 418 419 nlo = ((const u8 *)Xi)[cnt]; 420 nlo ^= inp[cnt]; 421 nhi = nlo >> 4; 422 nlo &= 0xf; 423 424 rem = (size_t)Z.lo & 0xf; 425 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 426 Z.hi = (Z.hi >> 4); 427 if (sizeof(size_t) == 8) 428 Z.hi ^= rem_4bit[rem]; 429 else 430 Z.hi ^= (u64)rem_4bit[rem] << 32; 431 432 Z.hi ^= Htable[nlo].hi; 433 Z.lo ^= Htable[nlo].lo; 434 } 435 # else 436 /* 437 * Extra 256+16 bytes per-key plus 512 bytes shared tables 438 * [should] give ~50% improvement... One could have PACK()-ed 439 * the rem_8bit even here, but the priority is to minimize 440 * cache footprint... 441 */ 442 u128 Hshr4[16]; /* Htable shifted right by 4 bits */ 443 u8 Hshl4[16]; /* Htable shifted left by 4 bits */ 444 static const unsigned short rem_8bit[256] = { 445 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E, 446 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E, 447 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E, 448 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E, 449 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E, 450 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E, 451 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E, 452 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E, 453 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE, 454 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE, 455 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE, 456 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE, 457 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E, 458 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E, 459 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE, 460 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE, 461 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E, 462 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E, 463 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E, 464 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E, 465 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E, 466 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E, 467 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E, 468 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E, 469 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE, 470 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE, 471 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE, 472 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE, 473 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E, 474 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E, 475 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE, 476 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE 477 }; 478 /* 479 * This pre-processing phase slows down procedure by approximately 480 * same time as it makes each loop spin faster. In other words 481 * single block performance is approximately same as straightforward 482 * "4-bit" implementation, and then it goes only faster... 483 */ 484 for (cnt = 0; cnt < 16; ++cnt) { 485 Z.hi = Htable[cnt].hi; 486 Z.lo = Htable[cnt].lo; 487 Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4); 488 Hshr4[cnt].hi = (Z.hi >> 4); 489 Hshl4[cnt] = (u8)(Z.lo << 4); 490 } 491 492 do { 493 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) { 494 nlo = ((const u8 *)Xi)[cnt]; 495 nlo ^= inp[cnt]; 496 nhi = nlo >> 4; 497 nlo &= 0xf; 498 499 Z.hi ^= Htable[nlo].hi; 500 Z.lo ^= Htable[nlo].lo; 501 502 rem = (size_t)Z.lo & 0xff; 503 504 Z.lo = (Z.hi << 56) | (Z.lo >> 8); 505 Z.hi = (Z.hi >> 8); 506 507 Z.hi ^= Hshr4[nhi].hi; 508 Z.lo ^= Hshr4[nhi].lo; 509 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48; 510 } 511 512 nlo = ((const u8 *)Xi)[0]; 513 nlo ^= inp[0]; 514 nhi = nlo >> 4; 515 nlo &= 0xf; 516 517 Z.hi ^= Htable[nlo].hi; 518 Z.lo ^= Htable[nlo].lo; 519 520 rem = (size_t)Z.lo & 0xf; 521 522 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 523 Z.hi = (Z.hi >> 4); 524 525 Z.hi ^= Htable[nhi].hi; 526 Z.lo ^= Htable[nhi].lo; 527 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48; 528 # endif 529 530 if (is_endian.little) { 531 # ifdef BSWAP8 532 Xi[0] = BSWAP8(Z.hi); 533 Xi[1] = BSWAP8(Z.lo); 534 # else 535 u8 *p = (u8 *)Xi; 536 u32 v; 537 v = (u32)(Z.hi >> 32); 538 PUTU32(p, v); 539 v = (u32)(Z.hi); 540 PUTU32(p + 4, v); 541 v = (u32)(Z.lo >> 32); 542 PUTU32(p + 8, v); 543 v = (u32)(Z.lo); 544 PUTU32(p + 12, v); 545 # endif 546 } else { 547 Xi[0] = Z.hi; 548 Xi[1] = Z.lo; 549 } 550 } while (inp += 16, len -= 16); 551 } 552 # endif 553 # else 554 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]); 555 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp, 556 size_t len); 557 # endif 558 559 # define GCM_MUL(ctx) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) 560 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT) 561 # define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len) 562 /* 563 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing 564 * effect. In other words idea is to hash data while it's still in L1 cache 565 * after encryption pass... 566 */ 567 # define GHASH_CHUNK (3*1024) 568 # endif 569 570 #else /* TABLE_BITS */ 571 572 static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2]) 573 { 574 u128 V, Z = { 0, 0 }; 575 long X; 576 int i, j; 577 const long *xi = (const long *)Xi; 578 const union { 579 long one; 580 char little; 581 } is_endian = { 1 }; 582 583 V.hi = H[0]; /* H is in host byte order, no byte swapping */ 584 V.lo = H[1]; 585 586 for (j = 0; j < 16 / sizeof(long); ++j) { 587 if (is_endian.little) { 588 if (sizeof(long) == 8) { 589 # ifdef BSWAP8 590 X = (long)(BSWAP8(xi[j])); 591 # else 592 const u8 *p = (const u8 *)(xi + j); 593 X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4)); 594 # endif 595 } else { 596 const u8 *p = (const u8 *)(xi + j); 597 X = (long)GETU32(p); 598 } 599 } else 600 X = xi[j]; 601 602 for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) { 603 u64 M = (u64)(X >> (8 * sizeof(long) - 1)); 604 Z.hi ^= V.hi & M; 605 Z.lo ^= V.lo & M; 606 607 REDUCE1BIT(V); 608 } 609 } 610 611 if (is_endian.little) { 612 # ifdef BSWAP8 613 Xi[0] = BSWAP8(Z.hi); 614 Xi[1] = BSWAP8(Z.lo); 615 # else 616 u8 *p = (u8 *)Xi; 617 u32 v; 618 v = (u32)(Z.hi >> 32); 619 PUTU32(p, v); 620 v = (u32)(Z.hi); 621 PUTU32(p + 4, v); 622 v = (u32)(Z.lo >> 32); 623 PUTU32(p + 8, v); 624 v = (u32)(Z.lo); 625 PUTU32(p + 12, v); 626 # endif 627 } else { 628 Xi[0] = Z.hi; 629 Xi[1] = Z.lo; 630 } 631 } 632 633 # define GCM_MUL(ctx) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u) 634 635 #endif 636 637 #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ)) 638 # if !defined(I386_ONLY) && \ 639 (defined(__i386) || defined(__i386__) || \ 640 defined(__x86_64) || defined(__x86_64__) || \ 641 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) 642 # define GHASH_ASM_X86_OR_64 643 # define GCM_FUNCREF_4BIT 644 extern unsigned int OPENSSL_ia32cap_P[]; 645 646 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]); 647 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]); 648 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp, 649 size_t len); 650 651 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) 652 # define gcm_init_avx gcm_init_clmul 653 # define gcm_gmult_avx gcm_gmult_clmul 654 # define gcm_ghash_avx gcm_ghash_clmul 655 # else 656 void gcm_init_avx(u128 Htable[16], const u64 Xi[2]); 657 void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]); 658 void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp, 659 size_t len); 660 # endif 661 662 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) 663 # define GHASH_ASM_X86 664 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]); 665 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp, 666 size_t len); 667 668 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]); 669 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp, 670 size_t len); 671 # endif 672 # elif (defined(__arm__) || defined(__arm) || defined(__aarch64__)) && defined(GHASH_ASM) 673 # include "arm_arch.h" 674 # if __ARM_MAX_ARCH__>=7 675 # define GHASH_ASM_ARM 676 # define GCM_FUNCREF_4BIT 677 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL) 678 # if defined(__arm__) || defined(__arm) 679 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) 680 # endif 681 void gcm_init_neon(u128 Htable[16], const u64 Xi[2]); 682 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]); 683 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp, 684 size_t len); 685 void gcm_init_v8(u128 Htable[16], const u64 Xi[2]); 686 void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]); 687 void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp, 688 size_t len); 689 # endif 690 # elif defined(__sparc__) || defined(__sparc) 691 # include "sparc_arch.h" 692 # if defined(__arch64__) 693 # define GHASH_ASM_SPARC 694 # define GCM_FUNCREF_4BIT 695 extern unsigned int OPENSSL_sparcv9cap_P[]; 696 void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]); 697 void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]); 698 void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp, 699 size_t len); 700 # endif /* __arch64__ */ 701 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) 702 # include "ppc_arch.h" 703 # define GHASH_ASM_PPC 704 # define GCM_FUNCREF_4BIT 705 void gcm_init_p8(u128 Htable[16], const u64 Xi[2]); 706 void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]); 707 void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp, 708 size_t len); 709 # endif 710 #endif 711 712 #ifdef GCM_FUNCREF_4BIT 713 # undef GCM_MUL 714 # define GCM_MUL(ctx) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable) 715 # ifdef GHASH 716 # undef GHASH 717 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len) 718 # endif 719 #endif 720 721 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) 722 { 723 const union { 724 long one; 725 char little; 726 } is_endian = { 1 }; 727 728 memset(ctx, 0, sizeof(*ctx)); 729 ctx->block = block; 730 ctx->key = key; 731 732 (*block) (ctx->H.c, ctx->H.c, key); 733 734 if (is_endian.little) { 735 /* H is stored in host byte order */ 736 #ifdef BSWAP8 737 ctx->H.u[0] = BSWAP8(ctx->H.u[0]); 738 ctx->H.u[1] = BSWAP8(ctx->H.u[1]); 739 #else 740 u8 *p = ctx->H.c; 741 u64 hi, lo; 742 hi = (u64)GETU32(p) << 32 | GETU32(p + 4); 743 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12); 744 ctx->H.u[0] = hi; 745 ctx->H.u[1] = lo; 746 #endif 747 } 748 #if TABLE_BITS==8 749 gcm_init_8bit(ctx->Htable, ctx->H.u); 750 #elif TABLE_BITS==4 751 # if defined(GHASH) 752 # define CTX__GHASH(f) (ctx->ghash = (f)) 753 # else 754 # define CTX__GHASH(f) (ctx->ghash = NULL) 755 # endif 756 # if defined(GHASH_ASM_X86_OR_64) 757 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) 758 if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */ 759 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */ 760 gcm_init_avx(ctx->Htable, ctx->H.u); 761 ctx->gmult = gcm_gmult_avx; 762 CTX__GHASH(gcm_ghash_avx); 763 } else { 764 gcm_init_clmul(ctx->Htable, ctx->H.u); 765 ctx->gmult = gcm_gmult_clmul; 766 CTX__GHASH(gcm_ghash_clmul); 767 } 768 return; 769 } 770 # endif 771 gcm_init_4bit(ctx->Htable, ctx->H.u); 772 # if defined(GHASH_ASM_X86) /* x86 only */ 773 # if defined(OPENSSL_IA32_SSE2) 774 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */ 775 # else 776 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */ 777 # endif 778 ctx->gmult = gcm_gmult_4bit_mmx; 779 CTX__GHASH(gcm_ghash_4bit_mmx); 780 } else { 781 ctx->gmult = gcm_gmult_4bit_x86; 782 CTX__GHASH(gcm_ghash_4bit_x86); 783 } 784 # else 785 ctx->gmult = gcm_gmult_4bit; 786 CTX__GHASH(gcm_ghash_4bit); 787 # endif 788 # elif defined(GHASH_ASM_ARM) 789 # ifdef PMULL_CAPABLE 790 if (PMULL_CAPABLE) { 791 gcm_init_v8(ctx->Htable, ctx->H.u); 792 ctx->gmult = gcm_gmult_v8; 793 CTX__GHASH(gcm_ghash_v8); 794 } else 795 # endif 796 # ifdef NEON_CAPABLE 797 if (NEON_CAPABLE) { 798 gcm_init_neon(ctx->Htable, ctx->H.u); 799 ctx->gmult = gcm_gmult_neon; 800 CTX__GHASH(gcm_ghash_neon); 801 } else 802 # endif 803 { 804 gcm_init_4bit(ctx->Htable, ctx->H.u); 805 ctx->gmult = gcm_gmult_4bit; 806 CTX__GHASH(gcm_ghash_4bit); 807 } 808 # elif defined(GHASH_ASM_SPARC) 809 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) { 810 gcm_init_vis3(ctx->Htable, ctx->H.u); 811 ctx->gmult = gcm_gmult_vis3; 812 CTX__GHASH(gcm_ghash_vis3); 813 } else { 814 gcm_init_4bit(ctx->Htable, ctx->H.u); 815 ctx->gmult = gcm_gmult_4bit; 816 CTX__GHASH(gcm_ghash_4bit); 817 } 818 # elif defined(GHASH_ASM_PPC) 819 if (OPENSSL_ppccap_P & PPC_CRYPTO207) { 820 gcm_init_p8(ctx->Htable, ctx->H.u); 821 ctx->gmult = gcm_gmult_p8; 822 CTX__GHASH(gcm_ghash_p8); 823 } else { 824 gcm_init_4bit(ctx->Htable, ctx->H.u); 825 ctx->gmult = gcm_gmult_4bit; 826 CTX__GHASH(gcm_ghash_4bit); 827 } 828 # else 829 gcm_init_4bit(ctx->Htable, ctx->H.u); 830 # endif 831 # undef CTX__GHASH 832 #endif 833 } 834 835 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, 836 size_t len) 837 { 838 const union { 839 long one; 840 char little; 841 } is_endian = { 1 }; 842 unsigned int ctr; 843 #ifdef GCM_FUNCREF_4BIT 844 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 845 #endif 846 847 ctx->len.u[0] = 0; /* AAD length */ 848 ctx->len.u[1] = 0; /* message length */ 849 ctx->ares = 0; 850 ctx->mres = 0; 851 852 if (len == 12) { 853 memcpy(ctx->Yi.c, iv, 12); 854 ctx->Yi.c[12] = 0; 855 ctx->Yi.c[13] = 0; 856 ctx->Yi.c[14] = 0; 857 ctx->Yi.c[15] = 1; 858 ctr = 1; 859 } else { 860 size_t i; 861 u64 len0 = len; 862 863 /* Borrow ctx->Xi to calculate initial Yi */ 864 ctx->Xi.u[0] = 0; 865 ctx->Xi.u[1] = 0; 866 867 while (len >= 16) { 868 for (i = 0; i < 16; ++i) 869 ctx->Xi.c[i] ^= iv[i]; 870 GCM_MUL(ctx); 871 iv += 16; 872 len -= 16; 873 } 874 if (len) { 875 for (i = 0; i < len; ++i) 876 ctx->Xi.c[i] ^= iv[i]; 877 GCM_MUL(ctx); 878 } 879 len0 <<= 3; 880 if (is_endian.little) { 881 #ifdef BSWAP8 882 ctx->Xi.u[1] ^= BSWAP8(len0); 883 #else 884 ctx->Xi.c[8] ^= (u8)(len0 >> 56); 885 ctx->Xi.c[9] ^= (u8)(len0 >> 48); 886 ctx->Xi.c[10] ^= (u8)(len0 >> 40); 887 ctx->Xi.c[11] ^= (u8)(len0 >> 32); 888 ctx->Xi.c[12] ^= (u8)(len0 >> 24); 889 ctx->Xi.c[13] ^= (u8)(len0 >> 16); 890 ctx->Xi.c[14] ^= (u8)(len0 >> 8); 891 ctx->Xi.c[15] ^= (u8)(len0); 892 #endif 893 } else { 894 ctx->Xi.u[1] ^= len0; 895 } 896 897 GCM_MUL(ctx); 898 899 if (is_endian.little) 900 #ifdef BSWAP4 901 ctr = BSWAP4(ctx->Xi.d[3]); 902 #else 903 ctr = GETU32(ctx->Xi.c + 12); 904 #endif 905 else 906 ctr = ctx->Xi.d[3]; 907 908 /* Copy borrowed Xi to Yi */ 909 ctx->Yi.u[0] = ctx->Xi.u[0]; 910 ctx->Yi.u[1] = ctx->Xi.u[1]; 911 } 912 913 ctx->Xi.u[0] = 0; 914 ctx->Xi.u[1] = 0; 915 916 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key); 917 ++ctr; 918 if (is_endian.little) 919 #ifdef BSWAP4 920 ctx->Yi.d[3] = BSWAP4(ctr); 921 #else 922 PUTU32(ctx->Yi.c + 12, ctr); 923 #endif 924 else 925 ctx->Yi.d[3] = ctr; 926 } 927 928 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, 929 size_t len) 930 { 931 size_t i; 932 unsigned int n; 933 u64 alen = ctx->len.u[0]; 934 #ifdef GCM_FUNCREF_4BIT 935 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 936 # ifdef GHASH 937 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 938 const u8 *inp, size_t len) = ctx->ghash; 939 # endif 940 #endif 941 942 if (ctx->len.u[1]) 943 return -2; 944 945 alen += len; 946 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len)) 947 return -1; 948 ctx->len.u[0] = alen; 949 950 n = ctx->ares; 951 if (n) { 952 while (n && len) { 953 ctx->Xi.c[n] ^= *(aad++); 954 --len; 955 n = (n + 1) % 16; 956 } 957 if (n == 0) 958 GCM_MUL(ctx); 959 else { 960 ctx->ares = n; 961 return 0; 962 } 963 } 964 #ifdef GHASH 965 if ((i = (len & (size_t)-16))) { 966 GHASH(ctx, aad, i); 967 aad += i; 968 len -= i; 969 } 970 #else 971 while (len >= 16) { 972 for (i = 0; i < 16; ++i) 973 ctx->Xi.c[i] ^= aad[i]; 974 GCM_MUL(ctx); 975 aad += 16; 976 len -= 16; 977 } 978 #endif 979 if (len) { 980 n = (unsigned int)len; 981 for (i = 0; i < len; ++i) 982 ctx->Xi.c[i] ^= aad[i]; 983 } 984 985 ctx->ares = n; 986 return 0; 987 } 988 989 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, 990 const unsigned char *in, unsigned char *out, 991 size_t len) 992 { 993 const union { 994 long one; 995 char little; 996 } is_endian = { 1 }; 997 unsigned int n, ctr, mres; 998 size_t i; 999 u64 mlen = ctx->len.u[1]; 1000 block128_f block = ctx->block; 1001 void *key = ctx->key; 1002 #ifdef GCM_FUNCREF_4BIT 1003 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1004 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1005 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1006 const u8 *inp, size_t len) = ctx->ghash; 1007 # endif 1008 #endif 1009 1010 mlen += len; 1011 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1012 return -1; 1013 ctx->len.u[1] = mlen; 1014 1015 mres = ctx->mres; 1016 1017 if (ctx->ares) { 1018 /* First call to encrypt finalizes GHASH(AAD) */ 1019 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1020 if (len == 0) { 1021 GCM_MUL(ctx); 1022 ctx->ares = 0; 1023 return 0; 1024 } 1025 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1026 ctx->Xi.u[0] = 0; 1027 ctx->Xi.u[1] = 0; 1028 mres = sizeof(ctx->Xi); 1029 #else 1030 GCM_MUL(ctx); 1031 #endif 1032 ctx->ares = 0; 1033 } 1034 1035 if (is_endian.little) 1036 #ifdef BSWAP4 1037 ctr = BSWAP4(ctx->Yi.d[3]); 1038 #else 1039 ctr = GETU32(ctx->Yi.c + 12); 1040 #endif 1041 else 1042 ctr = ctx->Yi.d[3]; 1043 1044 n = mres % 16; 1045 #if !defined(OPENSSL_SMALL_FOOTPRINT) 1046 if (16 % sizeof(size_t) == 0) { /* always true actually */ 1047 do { 1048 if (n) { 1049 # if defined(GHASH) 1050 while (n && len) { 1051 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n]; 1052 --len; 1053 n = (n + 1) % 16; 1054 } 1055 if (n == 0) { 1056 GHASH(ctx, ctx->Xn, mres); 1057 mres = 0; 1058 } else { 1059 ctx->mres = mres; 1060 return 0; 1061 } 1062 # else 1063 while (n && len) { 1064 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 1065 --len; 1066 n = (n + 1) % 16; 1067 } 1068 if (n == 0) { 1069 GCM_MUL(ctx); 1070 mres = 0; 1071 } else { 1072 ctx->mres = n; 1073 return 0; 1074 } 1075 # endif 1076 } 1077 # if defined(STRICT_ALIGNMENT) 1078 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0) 1079 break; 1080 # endif 1081 # if defined(GHASH) 1082 if (len >= 16 && mres) { 1083 GHASH(ctx, ctx->Xn, mres); 1084 mres = 0; 1085 } 1086 # if defined(GHASH_CHUNK) 1087 while (len >= GHASH_CHUNK) { 1088 size_t j = GHASH_CHUNK; 1089 1090 while (j) { 1091 size_t_aX *out_t = (size_t_aX *)out; 1092 const size_t_aX *in_t = (const size_t_aX *)in; 1093 1094 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1095 ++ctr; 1096 if (is_endian.little) 1097 # ifdef BSWAP4 1098 ctx->Yi.d[3] = BSWAP4(ctr); 1099 # else 1100 PUTU32(ctx->Yi.c + 12, ctr); 1101 # endif 1102 else 1103 ctx->Yi.d[3] = ctr; 1104 for (i = 0; i < 16 / sizeof(size_t); ++i) 1105 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1106 out += 16; 1107 in += 16; 1108 j -= 16; 1109 } 1110 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK); 1111 len -= GHASH_CHUNK; 1112 } 1113 # endif 1114 if ((i = (len & (size_t)-16))) { 1115 size_t j = i; 1116 1117 while (len >= 16) { 1118 size_t_aX *out_t = (size_t_aX *)out; 1119 const size_t_aX *in_t = (const size_t_aX *)in; 1120 1121 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1122 ++ctr; 1123 if (is_endian.little) 1124 # ifdef BSWAP4 1125 ctx->Yi.d[3] = BSWAP4(ctr); 1126 # else 1127 PUTU32(ctx->Yi.c + 12, ctr); 1128 # endif 1129 else 1130 ctx->Yi.d[3] = ctr; 1131 for (i = 0; i < 16 / sizeof(size_t); ++i) 1132 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1133 out += 16; 1134 in += 16; 1135 len -= 16; 1136 } 1137 GHASH(ctx, out - j, j); 1138 } 1139 # else 1140 while (len >= 16) { 1141 size_t *out_t = (size_t *)out; 1142 const size_t *in_t = (const size_t *)in; 1143 1144 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1145 ++ctr; 1146 if (is_endian.little) 1147 # ifdef BSWAP4 1148 ctx->Yi.d[3] = BSWAP4(ctr); 1149 # else 1150 PUTU32(ctx->Yi.c + 12, ctr); 1151 # endif 1152 else 1153 ctx->Yi.d[3] = ctr; 1154 for (i = 0; i < 16 / sizeof(size_t); ++i) 1155 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1156 GCM_MUL(ctx); 1157 out += 16; 1158 in += 16; 1159 len -= 16; 1160 } 1161 # endif 1162 if (len) { 1163 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1164 ++ctr; 1165 if (is_endian.little) 1166 # ifdef BSWAP4 1167 ctx->Yi.d[3] = BSWAP4(ctr); 1168 # else 1169 PUTU32(ctx->Yi.c + 12, ctr); 1170 # endif 1171 else 1172 ctx->Yi.d[3] = ctr; 1173 # if defined(GHASH) 1174 while (len--) { 1175 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n]; 1176 ++n; 1177 } 1178 # else 1179 while (len--) { 1180 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 1181 ++n; 1182 } 1183 mres = n; 1184 # endif 1185 } 1186 1187 ctx->mres = mres; 1188 return 0; 1189 } while (0); 1190 } 1191 #endif 1192 for (i = 0; i < len; ++i) { 1193 if (n == 0) { 1194 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1195 ++ctr; 1196 if (is_endian.little) 1197 #ifdef BSWAP4 1198 ctx->Yi.d[3] = BSWAP4(ctr); 1199 #else 1200 PUTU32(ctx->Yi.c + 12, ctr); 1201 #endif 1202 else 1203 ctx->Yi.d[3] = ctr; 1204 } 1205 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1206 ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n]; 1207 n = (n + 1) % 16; 1208 if (mres == sizeof(ctx->Xn)) { 1209 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn)); 1210 mres = 0; 1211 } 1212 #else 1213 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; 1214 mres = n = (n + 1) % 16; 1215 if (n == 0) 1216 GCM_MUL(ctx); 1217 #endif 1218 } 1219 1220 ctx->mres = mres; 1221 return 0; 1222 } 1223 1224 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, 1225 const unsigned char *in, unsigned char *out, 1226 size_t len) 1227 { 1228 const union { 1229 long one; 1230 char little; 1231 } is_endian = { 1 }; 1232 unsigned int n, ctr, mres; 1233 size_t i; 1234 u64 mlen = ctx->len.u[1]; 1235 block128_f block = ctx->block; 1236 void *key = ctx->key; 1237 #ifdef GCM_FUNCREF_4BIT 1238 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1239 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1240 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1241 const u8 *inp, size_t len) = ctx->ghash; 1242 # endif 1243 #endif 1244 1245 mlen += len; 1246 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1247 return -1; 1248 ctx->len.u[1] = mlen; 1249 1250 mres = ctx->mres; 1251 1252 if (ctx->ares) { 1253 /* First call to decrypt finalizes GHASH(AAD) */ 1254 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1255 if (len == 0) { 1256 GCM_MUL(ctx); 1257 ctx->ares = 0; 1258 return 0; 1259 } 1260 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1261 ctx->Xi.u[0] = 0; 1262 ctx->Xi.u[1] = 0; 1263 mres = sizeof(ctx->Xi); 1264 #else 1265 GCM_MUL(ctx); 1266 #endif 1267 ctx->ares = 0; 1268 } 1269 1270 if (is_endian.little) 1271 #ifdef BSWAP4 1272 ctr = BSWAP4(ctx->Yi.d[3]); 1273 #else 1274 ctr = GETU32(ctx->Yi.c + 12); 1275 #endif 1276 else 1277 ctr = ctx->Yi.d[3]; 1278 1279 n = mres % 16; 1280 #if !defined(OPENSSL_SMALL_FOOTPRINT) 1281 if (16 % sizeof(size_t) == 0) { /* always true actually */ 1282 do { 1283 if (n) { 1284 # if defined(GHASH) 1285 while (n && len) { 1286 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n]; 1287 --len; 1288 n = (n + 1) % 16; 1289 } 1290 if (n == 0) { 1291 GHASH(ctx, ctx->Xn, mres); 1292 mres = 0; 1293 } else { 1294 ctx->mres = mres; 1295 return 0; 1296 } 1297 # else 1298 while (n && len) { 1299 u8 c = *(in++); 1300 *(out++) = c ^ ctx->EKi.c[n]; 1301 ctx->Xi.c[n] ^= c; 1302 --len; 1303 n = (n + 1) % 16; 1304 } 1305 if (n == 0) { 1306 GCM_MUL(ctx); 1307 mres = 0; 1308 } else { 1309 ctx->mres = n; 1310 return 0; 1311 } 1312 # endif 1313 } 1314 # if defined(STRICT_ALIGNMENT) 1315 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0) 1316 break; 1317 # endif 1318 # if defined(GHASH) 1319 if (len >= 16 && mres) { 1320 GHASH(ctx, ctx->Xn, mres); 1321 mres = 0; 1322 } 1323 # if defined(GHASH_CHUNK) 1324 while (len >= GHASH_CHUNK) { 1325 size_t j = GHASH_CHUNK; 1326 1327 GHASH(ctx, in, GHASH_CHUNK); 1328 while (j) { 1329 size_t_aX *out_t = (size_t_aX *)out; 1330 const size_t_aX *in_t = (const size_t_aX *)in; 1331 1332 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1333 ++ctr; 1334 if (is_endian.little) 1335 # ifdef BSWAP4 1336 ctx->Yi.d[3] = BSWAP4(ctr); 1337 # else 1338 PUTU32(ctx->Yi.c + 12, ctr); 1339 # endif 1340 else 1341 ctx->Yi.d[3] = ctr; 1342 for (i = 0; i < 16 / sizeof(size_t); ++i) 1343 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1344 out += 16; 1345 in += 16; 1346 j -= 16; 1347 } 1348 len -= GHASH_CHUNK; 1349 } 1350 # endif 1351 if ((i = (len & (size_t)-16))) { 1352 GHASH(ctx, in, i); 1353 while (len >= 16) { 1354 size_t_aX *out_t = (size_t_aX *)out; 1355 const size_t_aX *in_t = (const size_t_aX *)in; 1356 1357 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1358 ++ctr; 1359 if (is_endian.little) 1360 # ifdef BSWAP4 1361 ctx->Yi.d[3] = BSWAP4(ctr); 1362 # else 1363 PUTU32(ctx->Yi.c + 12, ctr); 1364 # endif 1365 else 1366 ctx->Yi.d[3] = ctr; 1367 for (i = 0; i < 16 / sizeof(size_t); ++i) 1368 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1369 out += 16; 1370 in += 16; 1371 len -= 16; 1372 } 1373 } 1374 # else 1375 while (len >= 16) { 1376 size_t *out_t = (size_t *)out; 1377 const size_t *in_t = (const size_t *)in; 1378 1379 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1380 ++ctr; 1381 if (is_endian.little) 1382 # ifdef BSWAP4 1383 ctx->Yi.d[3] = BSWAP4(ctr); 1384 # else 1385 PUTU32(ctx->Yi.c + 12, ctr); 1386 # endif 1387 else 1388 ctx->Yi.d[3] = ctr; 1389 for (i = 0; i < 16 / sizeof(size_t); ++i) { 1390 size_t c = in_t[i]; 1391 out_t[i] = c ^ ctx->EKi.t[i]; 1392 ctx->Xi.t[i] ^= c; 1393 } 1394 GCM_MUL(ctx); 1395 out += 16; 1396 in += 16; 1397 len -= 16; 1398 } 1399 # endif 1400 if (len) { 1401 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1402 ++ctr; 1403 if (is_endian.little) 1404 # ifdef BSWAP4 1405 ctx->Yi.d[3] = BSWAP4(ctr); 1406 # else 1407 PUTU32(ctx->Yi.c + 12, ctr); 1408 # endif 1409 else 1410 ctx->Yi.d[3] = ctr; 1411 # if defined(GHASH) 1412 while (len--) { 1413 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n]; 1414 ++n; 1415 } 1416 # else 1417 while (len--) { 1418 u8 c = in[n]; 1419 ctx->Xi.c[n] ^= c; 1420 out[n] = c ^ ctx->EKi.c[n]; 1421 ++n; 1422 } 1423 mres = n; 1424 # endif 1425 } 1426 1427 ctx->mres = mres; 1428 return 0; 1429 } while (0); 1430 } 1431 #endif 1432 for (i = 0; i < len; ++i) { 1433 u8 c; 1434 if (n == 0) { 1435 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1436 ++ctr; 1437 if (is_endian.little) 1438 #ifdef BSWAP4 1439 ctx->Yi.d[3] = BSWAP4(ctr); 1440 #else 1441 PUTU32(ctx->Yi.c + 12, ctr); 1442 #endif 1443 else 1444 ctx->Yi.d[3] = ctr; 1445 } 1446 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1447 out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n]; 1448 n = (n + 1) % 16; 1449 if (mres == sizeof(ctx->Xn)) { 1450 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn)); 1451 mres = 0; 1452 } 1453 #else 1454 c = in[i]; 1455 out[i] = c ^ ctx->EKi.c[n]; 1456 ctx->Xi.c[n] ^= c; 1457 mres = n = (n + 1) % 16; 1458 if (n == 0) 1459 GCM_MUL(ctx); 1460 #endif 1461 } 1462 1463 ctx->mres = mres; 1464 return 0; 1465 } 1466 1467 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, 1468 const unsigned char *in, unsigned char *out, 1469 size_t len, ctr128_f stream) 1470 { 1471 #if defined(OPENSSL_SMALL_FOOTPRINT) 1472 return CRYPTO_gcm128_encrypt(ctx, in, out, len); 1473 #else 1474 const union { 1475 long one; 1476 char little; 1477 } is_endian = { 1 }; 1478 unsigned int n, ctr, mres; 1479 size_t i; 1480 u64 mlen = ctx->len.u[1]; 1481 void *key = ctx->key; 1482 # ifdef GCM_FUNCREF_4BIT 1483 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1484 # ifdef GHASH 1485 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1486 const u8 *inp, size_t len) = ctx->ghash; 1487 # endif 1488 # endif 1489 1490 mlen += len; 1491 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1492 return -1; 1493 ctx->len.u[1] = mlen; 1494 1495 mres = ctx->mres; 1496 1497 if (ctx->ares) { 1498 /* First call to encrypt finalizes GHASH(AAD) */ 1499 #if defined(GHASH) 1500 if (len == 0) { 1501 GCM_MUL(ctx); 1502 ctx->ares = 0; 1503 return 0; 1504 } 1505 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1506 ctx->Xi.u[0] = 0; 1507 ctx->Xi.u[1] = 0; 1508 mres = sizeof(ctx->Xi); 1509 #else 1510 GCM_MUL(ctx); 1511 #endif 1512 ctx->ares = 0; 1513 } 1514 1515 if (is_endian.little) 1516 # ifdef BSWAP4 1517 ctr = BSWAP4(ctx->Yi.d[3]); 1518 # else 1519 ctr = GETU32(ctx->Yi.c + 12); 1520 # endif 1521 else 1522 ctr = ctx->Yi.d[3]; 1523 1524 n = mres % 16; 1525 if (n) { 1526 # if defined(GHASH) 1527 while (n && len) { 1528 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n]; 1529 --len; 1530 n = (n + 1) % 16; 1531 } 1532 if (n == 0) { 1533 GHASH(ctx, ctx->Xn, mres); 1534 mres = 0; 1535 } else { 1536 ctx->mres = mres; 1537 return 0; 1538 } 1539 # else 1540 while (n && len) { 1541 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 1542 --len; 1543 n = (n + 1) % 16; 1544 } 1545 if (n == 0) { 1546 GCM_MUL(ctx); 1547 mres = 0; 1548 } else { 1549 ctx->mres = n; 1550 return 0; 1551 } 1552 # endif 1553 } 1554 # if defined(GHASH) 1555 if (len >= 16 && mres) { 1556 GHASH(ctx, ctx->Xn, mres); 1557 mres = 0; 1558 } 1559 # if defined(GHASH_CHUNK) 1560 while (len >= GHASH_CHUNK) { 1561 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 1562 ctr += GHASH_CHUNK / 16; 1563 if (is_endian.little) 1564 # ifdef BSWAP4 1565 ctx->Yi.d[3] = BSWAP4(ctr); 1566 # else 1567 PUTU32(ctx->Yi.c + 12, ctr); 1568 # endif 1569 else 1570 ctx->Yi.d[3] = ctr; 1571 GHASH(ctx, out, GHASH_CHUNK); 1572 out += GHASH_CHUNK; 1573 in += GHASH_CHUNK; 1574 len -= GHASH_CHUNK; 1575 } 1576 # endif 1577 # endif 1578 if ((i = (len & (size_t)-16))) { 1579 size_t j = i / 16; 1580 1581 (*stream) (in, out, j, key, ctx->Yi.c); 1582 ctr += (unsigned int)j; 1583 if (is_endian.little) 1584 # ifdef BSWAP4 1585 ctx->Yi.d[3] = BSWAP4(ctr); 1586 # else 1587 PUTU32(ctx->Yi.c + 12, ctr); 1588 # endif 1589 else 1590 ctx->Yi.d[3] = ctr; 1591 in += i; 1592 len -= i; 1593 # if defined(GHASH) 1594 GHASH(ctx, out, i); 1595 out += i; 1596 # else 1597 while (j--) { 1598 for (i = 0; i < 16; ++i) 1599 ctx->Xi.c[i] ^= out[i]; 1600 GCM_MUL(ctx); 1601 out += 16; 1602 } 1603 # endif 1604 } 1605 if (len) { 1606 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key); 1607 ++ctr; 1608 if (is_endian.little) 1609 # ifdef BSWAP4 1610 ctx->Yi.d[3] = BSWAP4(ctr); 1611 # else 1612 PUTU32(ctx->Yi.c + 12, ctr); 1613 # endif 1614 else 1615 ctx->Yi.d[3] = ctr; 1616 while (len--) { 1617 # if defined(GHASH) 1618 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n]; 1619 # else 1620 ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 1621 # endif 1622 ++n; 1623 } 1624 } 1625 1626 ctx->mres = mres; 1627 return 0; 1628 #endif 1629 } 1630 1631 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, 1632 const unsigned char *in, unsigned char *out, 1633 size_t len, ctr128_f stream) 1634 { 1635 #if defined(OPENSSL_SMALL_FOOTPRINT) 1636 return CRYPTO_gcm128_decrypt(ctx, in, out, len); 1637 #else 1638 const union { 1639 long one; 1640 char little; 1641 } is_endian = { 1 }; 1642 unsigned int n, ctr, mres; 1643 size_t i; 1644 u64 mlen = ctx->len.u[1]; 1645 void *key = ctx->key; 1646 # ifdef GCM_FUNCREF_4BIT 1647 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1648 # ifdef GHASH 1649 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1650 const u8 *inp, size_t len) = ctx->ghash; 1651 # endif 1652 # endif 1653 1654 mlen += len; 1655 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1656 return -1; 1657 ctx->len.u[1] = mlen; 1658 1659 mres = ctx->mres; 1660 1661 if (ctx->ares) { 1662 /* First call to decrypt finalizes GHASH(AAD) */ 1663 # if defined(GHASH) 1664 if (len == 0) { 1665 GCM_MUL(ctx); 1666 ctx->ares = 0; 1667 return 0; 1668 } 1669 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1670 ctx->Xi.u[0] = 0; 1671 ctx->Xi.u[1] = 0; 1672 mres = sizeof(ctx->Xi); 1673 # else 1674 GCM_MUL(ctx); 1675 # endif 1676 ctx->ares = 0; 1677 } 1678 1679 if (is_endian.little) 1680 # ifdef BSWAP4 1681 ctr = BSWAP4(ctx->Yi.d[3]); 1682 # else 1683 ctr = GETU32(ctx->Yi.c + 12); 1684 # endif 1685 else 1686 ctr = ctx->Yi.d[3]; 1687 1688 n = mres % 16; 1689 if (n) { 1690 # if defined(GHASH) 1691 while (n && len) { 1692 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n]; 1693 --len; 1694 n = (n + 1) % 16; 1695 } 1696 if (n == 0) { 1697 GHASH(ctx, ctx->Xn, mres); 1698 mres = 0; 1699 } else { 1700 ctx->mres = mres; 1701 return 0; 1702 } 1703 # else 1704 while (n && len) { 1705 u8 c = *(in++); 1706 *(out++) = c ^ ctx->EKi.c[n]; 1707 ctx->Xi.c[n] ^= c; 1708 --len; 1709 n = (n + 1) % 16; 1710 } 1711 if (n == 0) { 1712 GCM_MUL(ctx); 1713 mres = 0; 1714 } else { 1715 ctx->mres = n; 1716 return 0; 1717 } 1718 # endif 1719 } 1720 # if defined(GHASH) 1721 if (len >= 16 && mres) { 1722 GHASH(ctx, ctx->Xn, mres); 1723 mres = 0; 1724 } 1725 # if defined(GHASH_CHUNK) 1726 while (len >= GHASH_CHUNK) { 1727 GHASH(ctx, in, GHASH_CHUNK); 1728 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 1729 ctr += GHASH_CHUNK / 16; 1730 if (is_endian.little) 1731 # ifdef BSWAP4 1732 ctx->Yi.d[3] = BSWAP4(ctr); 1733 # else 1734 PUTU32(ctx->Yi.c + 12, ctr); 1735 # endif 1736 else 1737 ctx->Yi.d[3] = ctr; 1738 out += GHASH_CHUNK; 1739 in += GHASH_CHUNK; 1740 len -= GHASH_CHUNK; 1741 } 1742 # endif 1743 # endif 1744 if ((i = (len & (size_t)-16))) { 1745 size_t j = i / 16; 1746 1747 # if defined(GHASH) 1748 GHASH(ctx, in, i); 1749 # else 1750 while (j--) { 1751 size_t k; 1752 for (k = 0; k < 16; ++k) 1753 ctx->Xi.c[k] ^= in[k]; 1754 GCM_MUL(ctx); 1755 in += 16; 1756 } 1757 j = i / 16; 1758 in -= i; 1759 # endif 1760 (*stream) (in, out, j, key, ctx->Yi.c); 1761 ctr += (unsigned int)j; 1762 if (is_endian.little) 1763 # ifdef BSWAP4 1764 ctx->Yi.d[3] = BSWAP4(ctr); 1765 # else 1766 PUTU32(ctx->Yi.c + 12, ctr); 1767 # endif 1768 else 1769 ctx->Yi.d[3] = ctr; 1770 out += i; 1771 in += i; 1772 len -= i; 1773 } 1774 if (len) { 1775 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key); 1776 ++ctr; 1777 if (is_endian.little) 1778 # ifdef BSWAP4 1779 ctx->Yi.d[3] = BSWAP4(ctr); 1780 # else 1781 PUTU32(ctx->Yi.c + 12, ctr); 1782 # endif 1783 else 1784 ctx->Yi.d[3] = ctr; 1785 while (len--) { 1786 # if defined(GHASH) 1787 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n]; 1788 # else 1789 u8 c = in[n]; 1790 ctx->Xi.c[mres++] ^= c; 1791 out[n] = c ^ ctx->EKi.c[n]; 1792 # endif 1793 ++n; 1794 } 1795 } 1796 1797 ctx->mres = mres; 1798 return 0; 1799 #endif 1800 } 1801 1802 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, 1803 size_t len) 1804 { 1805 const union { 1806 long one; 1807 char little; 1808 } is_endian = { 1 }; 1809 u64 alen = ctx->len.u[0] << 3; 1810 u64 clen = ctx->len.u[1] << 3; 1811 #ifdef GCM_FUNCREF_4BIT 1812 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1813 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1814 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1815 const u8 *inp, size_t len) = ctx->ghash; 1816 # endif 1817 #endif 1818 1819 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1820 u128 bitlen; 1821 unsigned int mres = ctx->mres; 1822 1823 if (mres) { 1824 unsigned blocks = (mres + 15) & -16; 1825 1826 memset(ctx->Xn + mres, 0, blocks - mres); 1827 mres = blocks; 1828 if (mres == sizeof(ctx->Xn)) { 1829 GHASH(ctx, ctx->Xn, mres); 1830 mres = 0; 1831 } 1832 } else if (ctx->ares) { 1833 GCM_MUL(ctx); 1834 } 1835 #else 1836 if (ctx->mres || ctx->ares) 1837 GCM_MUL(ctx); 1838 #endif 1839 1840 if (is_endian.little) { 1841 #ifdef BSWAP8 1842 alen = BSWAP8(alen); 1843 clen = BSWAP8(clen); 1844 #else 1845 u8 *p = ctx->len.c; 1846 1847 ctx->len.u[0] = alen; 1848 ctx->len.u[1] = clen; 1849 1850 alen = (u64)GETU32(p) << 32 | GETU32(p + 4); 1851 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12); 1852 #endif 1853 } 1854 1855 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1856 bitlen.hi = alen; 1857 bitlen.lo = clen; 1858 memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen)); 1859 mres += sizeof(bitlen); 1860 GHASH(ctx, ctx->Xn, mres); 1861 #else 1862 ctx->Xi.u[0] ^= alen; 1863 ctx->Xi.u[1] ^= clen; 1864 GCM_MUL(ctx); 1865 #endif 1866 1867 ctx->Xi.u[0] ^= ctx->EK0.u[0]; 1868 ctx->Xi.u[1] ^= ctx->EK0.u[1]; 1869 1870 if (tag && len <= sizeof(ctx->Xi)) 1871 return CRYPTO_memcmp(ctx->Xi.c, tag, len); 1872 else 1873 return -1; 1874 } 1875 1876 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) 1877 { 1878 CRYPTO_gcm128_finish(ctx, NULL, 0); 1879 memcpy(tag, ctx->Xi.c, 1880 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c)); 1881 } 1882 1883 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) 1884 { 1885 GCM128_CONTEXT *ret; 1886 1887 if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL) 1888 CRYPTO_gcm128_init(ret, key, block); 1889 1890 return ret; 1891 } 1892 1893 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) 1894 { 1895 OPENSSL_clear_free(ctx, sizeof(*ctx)); 1896 } 1897