xref: /netbsd-src/crypto/external/bsd/openssl/dist/crypto/modes/gcm128.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*
2  * Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the OpenSSL license (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 #include <openssl/crypto.h>
11 #include "modes_lcl.h"
12 #include <string.h>
13 
14 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
15 /* redefine, because alignment is ensured */
16 # undef  GETU32
17 # define GETU32(p)       BSWAP4(*(const u32 *)(p))
18 # undef  PUTU32
19 # define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
20 #endif
21 
22 #define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
23 #define REDUCE1BIT(V)   do { \
24         if (sizeof(size_t)==8) { \
25                 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
26                 V.lo  = (V.hi<<63)|(V.lo>>1); \
27                 V.hi  = (V.hi>>1 )^T; \
28         } \
29         else { \
30                 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
31                 V.lo  = (V.hi<<63)|(V.lo>>1); \
32                 V.hi  = (V.hi>>1 )^((u64)T<<32); \
33         } \
34 } while(0)
35 
36 /*-
37  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
38  * never be set to 8. 8 is effectively reserved for testing purposes.
39  * TABLE_BITS>1 are lookup-table-driven implementations referred to as
40  * "Shoup's" in GCM specification. In other words OpenSSL does not cover
41  * whole spectrum of possible table driven implementations. Why? In
42  * non-"Shoup's" case memory access pattern is segmented in such manner,
43  * that it's trivial to see that cache timing information can reveal
44  * fair portion of intermediate hash value. Given that ciphertext is
45  * always available to attacker, it's possible for him to attempt to
46  * deduce secret parameter H and if successful, tamper with messages
47  * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
48  * not as trivial, but there is no reason to believe that it's resistant
49  * to cache-timing attack. And the thing about "8-bit" implementation is
50  * that it consumes 16 (sixteen) times more memory, 4KB per individual
51  * key + 1KB shared. Well, on pros side it should be twice as fast as
52  * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
53  * was observed to run ~75% faster, closer to 100% for commercial
54  * compilers... Yet "4-bit" procedure is preferred, because it's
55  * believed to provide better security-performance balance and adequate
56  * all-round performance. "All-round" refers to things like:
57  *
58  * - shorter setup time effectively improves overall timing for
59  *   handling short messages;
60  * - larger table allocation can become unbearable because of VM
61  *   subsystem penalties (for example on Windows large enough free
62  *   results in VM working set trimming, meaning that consequent
63  *   malloc would immediately incur working set expansion);
64  * - larger table has larger cache footprint, which can affect
65  *   performance of other code paths (not necessarily even from same
66  *   thread in Hyper-Threading world);
67  *
68  * Value of 1 is not appropriate for performance reasons.
69  */
70 #if     TABLE_BITS==8
71 
72 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
73 {
74     int i, j;
75     u128 V;
76 
77     Htable[0].hi = 0;
78     Htable[0].lo = 0;
79     V.hi = H[0];
80     V.lo = H[1];
81 
82     for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
83         REDUCE1BIT(V);
84         Htable[i] = V;
85     }
86 
87     for (i = 2; i < 256; i <<= 1) {
88         u128 *Hi = Htable + i, H0 = *Hi;
89         for (j = 1; j < i; ++j) {
90             Hi[j].hi = H0.hi ^ Htable[j].hi;
91             Hi[j].lo = H0.lo ^ Htable[j].lo;
92         }
93     }
94 }
95 
96 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
97 {
98     u128 Z = { 0, 0 };
99     const u8 *xi = (const u8 *)Xi + 15;
100     size_t rem, n = *xi;
101     const union {
102         long one;
103         char little;
104     } is_endian = { 1 };
105     static const size_t rem_8bit[256] = {
106         PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
107         PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
108         PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
109         PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
110         PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
111         PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
112         PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
113         PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
114         PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
115         PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
116         PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
117         PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
118         PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
119         PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
120         PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
121         PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
122         PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
123         PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
124         PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
125         PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
126         PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
127         PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
128         PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
129         PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
130         PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
131         PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
132         PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
133         PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
134         PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
135         PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
136         PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
137         PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
138         PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
139         PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
140         PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
141         PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
142         PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
143         PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
144         PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
145         PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
146         PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
147         PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
148         PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
149         PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
150         PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
151         PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
152         PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
153         PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
154         PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
155         PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
156         PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
157         PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
158         PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
159         PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
160         PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
161         PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
162         PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
163         PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
164         PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
165         PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
166         PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
167         PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
168         PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
169         PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
170     };
171 
172     while (1) {
173         Z.hi ^= Htable[n].hi;
174         Z.lo ^= Htable[n].lo;
175 
176         if ((u8 *)Xi == xi)
177             break;
178 
179         n = *(--xi);
180 
181         rem = (size_t)Z.lo & 0xff;
182         Z.lo = (Z.hi << 56) | (Z.lo >> 8);
183         Z.hi = (Z.hi >> 8);
184         if (sizeof(size_t) == 8)
185             Z.hi ^= rem_8bit[rem];
186         else
187             Z.hi ^= (u64)rem_8bit[rem] << 32;
188     }
189 
190     if (is_endian.little) {
191 # ifdef BSWAP8
192         Xi[0] = BSWAP8(Z.hi);
193         Xi[1] = BSWAP8(Z.lo);
194 # else
195         u8 *p = (u8 *)Xi;
196         u32 v;
197         v = (u32)(Z.hi >> 32);
198         PUTU32(p, v);
199         v = (u32)(Z.hi);
200         PUTU32(p + 4, v);
201         v = (u32)(Z.lo >> 32);
202         PUTU32(p + 8, v);
203         v = (u32)(Z.lo);
204         PUTU32(p + 12, v);
205 # endif
206     } else {
207         Xi[0] = Z.hi;
208         Xi[1] = Z.lo;
209     }
210 }
211 
212 # define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
213 
214 #elif   TABLE_BITS==4
215 
216 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
217 {
218     u128 V;
219 # if defined(OPENSSL_SMALL_FOOTPRINT)
220     int i;
221 # endif
222 
223     Htable[0].hi = 0;
224     Htable[0].lo = 0;
225     V.hi = H[0];
226     V.lo = H[1];
227 
228 # if defined(OPENSSL_SMALL_FOOTPRINT)
229     for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
230         REDUCE1BIT(V);
231         Htable[i] = V;
232     }
233 
234     for (i = 2; i < 16; i <<= 1) {
235         u128 *Hi = Htable + i;
236         int j;
237         for (V = *Hi, j = 1; j < i; ++j) {
238             Hi[j].hi = V.hi ^ Htable[j].hi;
239             Hi[j].lo = V.lo ^ Htable[j].lo;
240         }
241     }
242 # else
243     Htable[8] = V;
244     REDUCE1BIT(V);
245     Htable[4] = V;
246     REDUCE1BIT(V);
247     Htable[2] = V;
248     REDUCE1BIT(V);
249     Htable[1] = V;
250     Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
251     V = Htable[4];
252     Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
253     Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
254     Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
255     V = Htable[8];
256     Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
257     Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
258     Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
259     Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
260     Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
261     Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
262     Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
263 # endif
264 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
265     /*
266      * ARM assembler expects specific dword order in Htable.
267      */
268     {
269         int j;
270         const union {
271             long one;
272             char little;
273         } is_endian = { 1 };
274 
275         if (is_endian.little)
276             for (j = 0; j < 16; ++j) {
277                 V = Htable[j];
278                 Htable[j].hi = V.lo;
279                 Htable[j].lo = V.hi;
280         } else
281             for (j = 0; j < 16; ++j) {
282                 V = Htable[j];
283                 Htable[j].hi = V.lo << 32 | V.lo >> 32;
284                 Htable[j].lo = V.hi << 32 | V.hi >> 32;
285             }
286     }
287 # endif
288 }
289 
290 # ifndef GHASH_ASM
291 static const size_t rem_4bit[16] = {
292     PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
293     PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
294     PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
295     PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
296 };
297 
298 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
299 {
300     u128 Z;
301     int cnt = 15;
302     size_t rem, nlo, nhi;
303     const union {
304         long one;
305         char little;
306     } is_endian = { 1 };
307 
308     nlo = ((const u8 *)Xi)[15];
309     nhi = nlo >> 4;
310     nlo &= 0xf;
311 
312     Z.hi = Htable[nlo].hi;
313     Z.lo = Htable[nlo].lo;
314 
315     while (1) {
316         rem = (size_t)Z.lo & 0xf;
317         Z.lo = (Z.hi << 60) | (Z.lo >> 4);
318         Z.hi = (Z.hi >> 4);
319         if (sizeof(size_t) == 8)
320             Z.hi ^= rem_4bit[rem];
321         else
322             Z.hi ^= (u64)rem_4bit[rem] << 32;
323 
324         Z.hi ^= Htable[nhi].hi;
325         Z.lo ^= Htable[nhi].lo;
326 
327         if (--cnt < 0)
328             break;
329 
330         nlo = ((const u8 *)Xi)[cnt];
331         nhi = nlo >> 4;
332         nlo &= 0xf;
333 
334         rem = (size_t)Z.lo & 0xf;
335         Z.lo = (Z.hi << 60) | (Z.lo >> 4);
336         Z.hi = (Z.hi >> 4);
337         if (sizeof(size_t) == 8)
338             Z.hi ^= rem_4bit[rem];
339         else
340             Z.hi ^= (u64)rem_4bit[rem] << 32;
341 
342         Z.hi ^= Htable[nlo].hi;
343         Z.lo ^= Htable[nlo].lo;
344     }
345 
346     if (is_endian.little) {
347 #  ifdef BSWAP8
348         Xi[0] = BSWAP8(Z.hi);
349         Xi[1] = BSWAP8(Z.lo);
350 #  else
351         u8 *p = (u8 *)Xi;
352         u32 v;
353         v = (u32)(Z.hi >> 32);
354         PUTU32(p, v);
355         v = (u32)(Z.hi);
356         PUTU32(p + 4, v);
357         v = (u32)(Z.lo >> 32);
358         PUTU32(p + 8, v);
359         v = (u32)(Z.lo);
360         PUTU32(p + 12, v);
361 #  endif
362     } else {
363         Xi[0] = Z.hi;
364         Xi[1] = Z.lo;
365     }
366 }
367 
368 #  if !defined(OPENSSL_SMALL_FOOTPRINT)
369 /*
370  * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
371  * details... Compiler-generated code doesn't seem to give any
372  * performance improvement, at least not on x86[_64]. It's here
373  * mostly as reference and a placeholder for possible future
374  * non-trivial optimization[s]...
375  */
376 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
377                            const u8 *inp, size_t len)
378 {
379     u128 Z;
380     int cnt;
381     size_t rem, nlo, nhi;
382     const union {
383         long one;
384         char little;
385     } is_endian = { 1 };
386 
387 #   if 1
388     do {
389         cnt = 15;
390         nlo = ((const u8 *)Xi)[15];
391         nlo ^= inp[15];
392         nhi = nlo >> 4;
393         nlo &= 0xf;
394 
395         Z.hi = Htable[nlo].hi;
396         Z.lo = Htable[nlo].lo;
397 
398         while (1) {
399             rem = (size_t)Z.lo & 0xf;
400             Z.lo = (Z.hi << 60) | (Z.lo >> 4);
401             Z.hi = (Z.hi >> 4);
402             if (sizeof(size_t) == 8)
403                 Z.hi ^= rem_4bit[rem];
404             else
405                 Z.hi ^= (u64)rem_4bit[rem] << 32;
406 
407             Z.hi ^= Htable[nhi].hi;
408             Z.lo ^= Htable[nhi].lo;
409 
410             if (--cnt < 0)
411                 break;
412 
413             nlo = ((const u8 *)Xi)[cnt];
414             nlo ^= inp[cnt];
415             nhi = nlo >> 4;
416             nlo &= 0xf;
417 
418             rem = (size_t)Z.lo & 0xf;
419             Z.lo = (Z.hi << 60) | (Z.lo >> 4);
420             Z.hi = (Z.hi >> 4);
421             if (sizeof(size_t) == 8)
422                 Z.hi ^= rem_4bit[rem];
423             else
424                 Z.hi ^= (u64)rem_4bit[rem] << 32;
425 
426             Z.hi ^= Htable[nlo].hi;
427             Z.lo ^= Htable[nlo].lo;
428         }
429 #   else
430     /*
431      * Extra 256+16 bytes per-key plus 512 bytes shared tables
432      * [should] give ~50% improvement... One could have PACK()-ed
433      * the rem_8bit even here, but the priority is to minimize
434      * cache footprint...
435      */
436     u128 Hshr4[16];             /* Htable shifted right by 4 bits */
437     u8 Hshl4[16];               /* Htable shifted left by 4 bits */
438     static const unsigned short rem_8bit[256] = {
439         0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
440         0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
441         0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
442         0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
443         0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
444         0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
445         0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
446         0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
447         0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
448         0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
449         0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
450         0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
451         0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
452         0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
453         0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
454         0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
455         0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
456         0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
457         0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
458         0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
459         0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
460         0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
461         0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
462         0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
463         0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
464         0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
465         0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
466         0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
467         0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
468         0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
469         0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
470         0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
471     };
472     /*
473      * This pre-processing phase slows down procedure by approximately
474      * same time as it makes each loop spin faster. In other words
475      * single block performance is approximately same as straightforward
476      * "4-bit" implementation, and then it goes only faster...
477      */
478     for (cnt = 0; cnt < 16; ++cnt) {
479         Z.hi = Htable[cnt].hi;
480         Z.lo = Htable[cnt].lo;
481         Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
482         Hshr4[cnt].hi = (Z.hi >> 4);
483         Hshl4[cnt] = (u8)(Z.lo << 4);
484     }
485 
486     do {
487         for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
488             nlo = ((const u8 *)Xi)[cnt];
489             nlo ^= inp[cnt];
490             nhi = nlo >> 4;
491             nlo &= 0xf;
492 
493             Z.hi ^= Htable[nlo].hi;
494             Z.lo ^= Htable[nlo].lo;
495 
496             rem = (size_t)Z.lo & 0xff;
497 
498             Z.lo = (Z.hi << 56) | (Z.lo >> 8);
499             Z.hi = (Z.hi >> 8);
500 
501             Z.hi ^= Hshr4[nhi].hi;
502             Z.lo ^= Hshr4[nhi].lo;
503             Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
504         }
505 
506         nlo = ((const u8 *)Xi)[0];
507         nlo ^= inp[0];
508         nhi = nlo >> 4;
509         nlo &= 0xf;
510 
511         Z.hi ^= Htable[nlo].hi;
512         Z.lo ^= Htable[nlo].lo;
513 
514         rem = (size_t)Z.lo & 0xf;
515 
516         Z.lo = (Z.hi << 60) | (Z.lo >> 4);
517         Z.hi = (Z.hi >> 4);
518 
519         Z.hi ^= Htable[nhi].hi;
520         Z.lo ^= Htable[nhi].lo;
521         Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
522 #   endif
523 
524         if (is_endian.little) {
525 #   ifdef BSWAP8
526             Xi[0] = BSWAP8(Z.hi);
527             Xi[1] = BSWAP8(Z.lo);
528 #   else
529             u8 *p = (u8 *)Xi;
530             u32 v;
531             v = (u32)(Z.hi >> 32);
532             PUTU32(p, v);
533             v = (u32)(Z.hi);
534             PUTU32(p + 4, v);
535             v = (u32)(Z.lo >> 32);
536             PUTU32(p + 8, v);
537             v = (u32)(Z.lo);
538             PUTU32(p + 12, v);
539 #   endif
540         } else {
541             Xi[0] = Z.hi;
542             Xi[1] = Z.lo;
543         }
544     } while (inp += 16, len -= 16);
545 }
546 #  endif
547 # else
548 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
549 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
550                     size_t len);
551 # endif
552 
553 # define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
554 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
555 #  define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
556 /*
557  * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
558  * effect. In other words idea is to hash data while it's still in L1 cache
559  * after encryption pass...
560  */
561 #  define GHASH_CHUNK       (3*1024)
562 # endif
563 
564 #else                           /* TABLE_BITS */
565 
566 static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
567 {
568     u128 V, Z = { 0, 0 };
569     long X;
570     int i, j;
571     const long *xi = (const long *)Xi;
572     const union {
573         long one;
574         char little;
575     } is_endian = { 1 };
576 
577     V.hi = H[0];                /* H is in host byte order, no byte swapping */
578     V.lo = H[1];
579 
580     for (j = 0; j < 16 / sizeof(long); ++j) {
581         if (is_endian.little) {
582             if (sizeof(long) == 8) {
583 # ifdef BSWAP8
584                 X = (long)(BSWAP8(xi[j]));
585 # else
586                 const u8 *p = (const u8 *)(xi + j);
587                 X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
588 # endif
589             } else {
590                 const u8 *p = (const u8 *)(xi + j);
591                 X = (long)GETU32(p);
592             }
593         } else
594             X = xi[j];
595 
596         for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
597             u64 M = (u64)(X >> (8 * sizeof(long) - 1));
598             Z.hi ^= V.hi & M;
599             Z.lo ^= V.lo & M;
600 
601             REDUCE1BIT(V);
602         }
603     }
604 
605     if (is_endian.little) {
606 # ifdef BSWAP8
607         Xi[0] = BSWAP8(Z.hi);
608         Xi[1] = BSWAP8(Z.lo);
609 # else
610         u8 *p = (u8 *)Xi;
611         u32 v;
612         v = (u32)(Z.hi >> 32);
613         PUTU32(p, v);
614         v = (u32)(Z.hi);
615         PUTU32(p + 4, v);
616         v = (u32)(Z.lo >> 32);
617         PUTU32(p + 8, v);
618         v = (u32)(Z.lo);
619         PUTU32(p + 12, v);
620 # endif
621     } else {
622         Xi[0] = Z.hi;
623         Xi[1] = Z.lo;
624     }
625 }
626 
627 # define GCM_MUL(ctx,Xi)   gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
628 
629 #endif
630 
631 #if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
632 # if    !defined(I386_ONLY) && \
633         (defined(__i386)        || defined(__i386__)    || \
634          defined(__x86_64)      || defined(__x86_64__)  || \
635          defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
636 #  define GHASH_ASM_X86_OR_64
637 #  define GCM_FUNCREF_4BIT
638 extern unsigned int OPENSSL_ia32cap_P[];
639 
640 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
641 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
642 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
643                      size_t len);
644 
645 #  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
646 #   define gcm_init_avx   gcm_init_clmul
647 #   define gcm_gmult_avx  gcm_gmult_clmul
648 #   define gcm_ghash_avx  gcm_ghash_clmul
649 #  else
650 void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
651 void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
652 void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
653                    size_t len);
654 #  endif
655 
656 #  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
657 #   define GHASH_ASM_X86
658 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
659 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
660                         size_t len);
661 
662 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
663 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
664                         size_t len);
665 #  endif
666 # elif (defined(__arm__) || defined(__arm) || defined(__aarch64__)) && defined(GHASH_ASM)
667 #  include "arm_arch.h"
668 #  if __ARM_MAX_ARCH__>=7
669 #   define GHASH_ASM_ARM
670 #   define GCM_FUNCREF_4BIT
671 #   if defined(__arm__) || defined(__arm)
672 #    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
673 #   endif
674 void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
675 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
676 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
677                     size_t len);
678 #   if __ARM_MAX_ARCH__>=8
679 #    define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
680 void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
681 void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
682 void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
683                   size_t len);
684 #   endif
685 #  endif
686 # elif defined(__sparc__) || defined(__sparc)
687 #  include "sparc_arch.h"
688 #  if defined(__arch64__)
689 #   define GHASH_ASM_SPARC
690 #   define GCM_FUNCREF_4BIT
691 extern unsigned int OPENSSL_sparcv9cap_P[];
692 void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
693 void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
694 void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
695                     size_t len);
696 #  endif
697 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
698 #  include "ppc_arch.h"
699 #  define GHASH_ASM_PPC
700 #  define GCM_FUNCREF_4BIT
701 void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
702 void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
703 void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
704                   size_t len);
705 # endif
706 #endif
707 
708 #ifdef GCM_FUNCREF_4BIT
709 # undef  GCM_MUL
710 # define GCM_MUL(ctx,Xi)        (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
711 # ifdef GHASH
712 #  undef  GHASH
713 #  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
714 # endif
715 #endif
716 
717 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
718 {
719     const union {
720         long one;
721         char little;
722     } is_endian = { 1 };
723 
724     memset(ctx, 0, sizeof(*ctx));
725     ctx->block = block;
726     ctx->key = key;
727 
728     (*block) (ctx->H.c, ctx->H.c, key);
729 
730     if (is_endian.little) {
731         /* H is stored in host byte order */
732 #ifdef BSWAP8
733         ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
734         ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
735 #else
736         u8 *p = ctx->H.c;
737         u64 hi, lo;
738         hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
739         lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
740         ctx->H.u[0] = hi;
741         ctx->H.u[1] = lo;
742 #endif
743     }
744 #if     TABLE_BITS==8
745     gcm_init_8bit(ctx->Htable, ctx->H.u);
746 #elif   TABLE_BITS==4
747 # if    defined(GHASH)
748 #  define CTX__GHASH(f) (ctx->ghash = (f))
749 # else
750 #  define CTX__GHASH(f) (ctx->ghash = NULL)
751 # endif
752 # if    defined(GHASH_ASM_X86_OR_64)
753 #  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
754     if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
755         if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
756             gcm_init_avx(ctx->Htable, ctx->H.u);
757             ctx->gmult = gcm_gmult_avx;
758             CTX__GHASH(gcm_ghash_avx);
759         } else {
760             gcm_init_clmul(ctx->Htable, ctx->H.u);
761             ctx->gmult = gcm_gmult_clmul;
762             CTX__GHASH(gcm_ghash_clmul);
763         }
764         return;
765     }
766 #  endif
767     gcm_init_4bit(ctx->Htable, ctx->H.u);
768 #  if   defined(GHASH_ASM_X86)  /* x86 only */
769 #   if  defined(OPENSSL_IA32_SSE2)
770     if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
771 #   else
772     if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
773 #   endif
774         ctx->gmult = gcm_gmult_4bit_mmx;
775         CTX__GHASH(gcm_ghash_4bit_mmx);
776     } else {
777         ctx->gmult = gcm_gmult_4bit_x86;
778         CTX__GHASH(gcm_ghash_4bit_x86);
779     }
780 #  else
781     ctx->gmult = gcm_gmult_4bit;
782     CTX__GHASH(gcm_ghash_4bit);
783 #  endif
784 # elif  defined(GHASH_ASM_ARM)
785 #  ifdef PMULL_CAPABLE
786     if (PMULL_CAPABLE) {
787         gcm_init_v8(ctx->Htable, ctx->H.u);
788         ctx->gmult = gcm_gmult_v8;
789         CTX__GHASH(gcm_ghash_v8);
790     } else
791 #  endif
792 #  ifdef NEON_CAPABLE
793     if (NEON_CAPABLE) {
794         gcm_init_neon(ctx->Htable, ctx->H.u);
795         ctx->gmult = gcm_gmult_neon;
796         CTX__GHASH(gcm_ghash_neon);
797     } else
798 #  endif
799     {
800         gcm_init_4bit(ctx->Htable, ctx->H.u);
801         ctx->gmult = gcm_gmult_4bit;
802         CTX__GHASH(gcm_ghash_4bit);
803     }
804 # elif  defined(GHASH_ASM_SPARC)
805     if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
806         gcm_init_vis3(ctx->Htable, ctx->H.u);
807         ctx->gmult = gcm_gmult_vis3;
808         CTX__GHASH(gcm_ghash_vis3);
809     } else {
810         gcm_init_4bit(ctx->Htable, ctx->H.u);
811         ctx->gmult = gcm_gmult_4bit;
812         CTX__GHASH(gcm_ghash_4bit);
813     }
814 # elif  defined(GHASH_ASM_PPC)
815     if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
816         gcm_init_p8(ctx->Htable, ctx->H.u);
817         ctx->gmult = gcm_gmult_p8;
818         CTX__GHASH(gcm_ghash_p8);
819     } else {
820         gcm_init_4bit(ctx->Htable, ctx->H.u);
821         ctx->gmult = gcm_gmult_4bit;
822         CTX__GHASH(gcm_ghash_4bit);
823     }
824 # else
825     gcm_init_4bit(ctx->Htable, ctx->H.u);
826 # endif
827 # undef CTX__GHASH
828 #endif
829 }
830 
831 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
832                          size_t len)
833 {
834     const union {
835         long one;
836         char little;
837     } is_endian = { 1 };
838     unsigned int ctr;
839 #ifdef GCM_FUNCREF_4BIT
840     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
841 #endif
842 
843     ctx->Yi.u[0] = 0;
844     ctx->Yi.u[1] = 0;
845     ctx->Xi.u[0] = 0;
846     ctx->Xi.u[1] = 0;
847     ctx->len.u[0] = 0;          /* AAD length */
848     ctx->len.u[1] = 0;          /* message length */
849     ctx->ares = 0;
850     ctx->mres = 0;
851 
852     if (len == 12) {
853         memcpy(ctx->Yi.c, iv, 12);
854         ctx->Yi.c[15] = 1;
855         ctr = 1;
856     } else {
857         size_t i;
858         u64 len0 = len;
859 
860         while (len >= 16) {
861             for (i = 0; i < 16; ++i)
862                 ctx->Yi.c[i] ^= iv[i];
863             GCM_MUL(ctx, Yi);
864             iv += 16;
865             len -= 16;
866         }
867         if (len) {
868             for (i = 0; i < len; ++i)
869                 ctx->Yi.c[i] ^= iv[i];
870             GCM_MUL(ctx, Yi);
871         }
872         len0 <<= 3;
873         if (is_endian.little) {
874 #ifdef BSWAP8
875             ctx->Yi.u[1] ^= BSWAP8(len0);
876 #else
877             ctx->Yi.c[8] ^= (u8)(len0 >> 56);
878             ctx->Yi.c[9] ^= (u8)(len0 >> 48);
879             ctx->Yi.c[10] ^= (u8)(len0 >> 40);
880             ctx->Yi.c[11] ^= (u8)(len0 >> 32);
881             ctx->Yi.c[12] ^= (u8)(len0 >> 24);
882             ctx->Yi.c[13] ^= (u8)(len0 >> 16);
883             ctx->Yi.c[14] ^= (u8)(len0 >> 8);
884             ctx->Yi.c[15] ^= (u8)(len0);
885 #endif
886         } else
887             ctx->Yi.u[1] ^= len0;
888 
889         GCM_MUL(ctx, Yi);
890 
891         if (is_endian.little)
892 #ifdef BSWAP4
893             ctr = BSWAP4(ctx->Yi.d[3]);
894 #else
895             ctr = GETU32(ctx->Yi.c + 12);
896 #endif
897         else
898             ctr = ctx->Yi.d[3];
899     }
900 
901     (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
902     ++ctr;
903     if (is_endian.little)
904 #ifdef BSWAP4
905         ctx->Yi.d[3] = BSWAP4(ctr);
906 #else
907         PUTU32(ctx->Yi.c + 12, ctr);
908 #endif
909     else
910         ctx->Yi.d[3] = ctr;
911 }
912 
913 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
914                       size_t len)
915 {
916     size_t i;
917     unsigned int n;
918     u64 alen = ctx->len.u[0];
919 #ifdef GCM_FUNCREF_4BIT
920     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
921 # ifdef GHASH
922     void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
923                          const u8 *inp, size_t len) = ctx->ghash;
924 # endif
925 #endif
926 
927     if (ctx->len.u[1])
928         return -2;
929 
930     alen += len;
931     if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
932         return -1;
933     ctx->len.u[0] = alen;
934 
935     n = ctx->ares;
936     if (n) {
937         while (n && len) {
938             ctx->Xi.c[n] ^= *(aad++);
939             --len;
940             n = (n + 1) % 16;
941         }
942         if (n == 0)
943             GCM_MUL(ctx, Xi);
944         else {
945             ctx->ares = n;
946             return 0;
947         }
948     }
949 #ifdef GHASH
950     if ((i = (len & (size_t)-16))) {
951         GHASH(ctx, aad, i);
952         aad += i;
953         len -= i;
954     }
955 #else
956     while (len >= 16) {
957         for (i = 0; i < 16; ++i)
958             ctx->Xi.c[i] ^= aad[i];
959         GCM_MUL(ctx, Xi);
960         aad += 16;
961         len -= 16;
962     }
963 #endif
964     if (len) {
965         n = (unsigned int)len;
966         for (i = 0; i < len; ++i)
967             ctx->Xi.c[i] ^= aad[i];
968     }
969 
970     ctx->ares = n;
971     return 0;
972 }
973 
974 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
975                           const unsigned char *in, unsigned char *out,
976                           size_t len)
977 {
978     const union {
979         long one;
980         char little;
981     } is_endian = { 1 };
982     unsigned int n, ctr;
983     size_t i;
984     u64 mlen = ctx->len.u[1];
985     block128_f block = ctx->block;
986     void *key = ctx->key;
987 #ifdef GCM_FUNCREF_4BIT
988     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
989 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
990     void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
991                          const u8 *inp, size_t len) = ctx->ghash;
992 # endif
993 #endif
994 
995     mlen += len;
996     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
997         return -1;
998     ctx->len.u[1] = mlen;
999 
1000     if (ctx->ares) {
1001         /* First call to encrypt finalizes GHASH(AAD) */
1002         GCM_MUL(ctx, Xi);
1003         ctx->ares = 0;
1004     }
1005 
1006     if (is_endian.little)
1007 #ifdef BSWAP4
1008         ctr = BSWAP4(ctx->Yi.d[3]);
1009 #else
1010         ctr = GETU32(ctx->Yi.c + 12);
1011 #endif
1012     else
1013         ctr = ctx->Yi.d[3];
1014 
1015     n = ctx->mres;
1016 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1017     if (16 % sizeof(size_t) == 0) { /* always true actually */
1018         do {
1019             if (n) {
1020                 while (n && len) {
1021                     ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1022                     --len;
1023                     n = (n + 1) % 16;
1024                 }
1025                 if (n == 0)
1026                     GCM_MUL(ctx, Xi);
1027                 else {
1028                     ctx->mres = n;
1029                     return 0;
1030                 }
1031             }
1032 # if defined(STRICT_ALIGNMENT)
1033             if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1034                 break;
1035 # endif
1036 # if defined(GHASH)
1037 #  if defined(GHASH_CHUNK)
1038             while (len >= GHASH_CHUNK) {
1039                 size_t j = GHASH_CHUNK;
1040 
1041                 while (j) {
1042                     size_t *out_t = (size_t *)out;
1043                     const size_t *in_t = (const size_t *)in;
1044 
1045                     (*block) (ctx->Yi.c, ctx->EKi.c, key);
1046                     ++ctr;
1047                     if (is_endian.little)
1048 #   ifdef BSWAP4
1049                         ctx->Yi.d[3] = BSWAP4(ctr);
1050 #   else
1051                         PUTU32(ctx->Yi.c + 12, ctr);
1052 #   endif
1053                     else
1054                         ctx->Yi.d[3] = ctr;
1055                     for (i = 0; i < 16 / sizeof(size_t); ++i)
1056                         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1057                     out += 16;
1058                     in += 16;
1059                     j -= 16;
1060                 }
1061                 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1062                 len -= GHASH_CHUNK;
1063             }
1064 #  endif
1065             if ((i = (len & (size_t)-16))) {
1066                 size_t j = i;
1067 
1068                 while (len >= 16) {
1069                     size_t *out_t = (size_t *)out;
1070                     const size_t *in_t = (const size_t *)in;
1071 
1072                     (*block) (ctx->Yi.c, ctx->EKi.c, key);
1073                     ++ctr;
1074                     if (is_endian.little)
1075 #  ifdef BSWAP4
1076                         ctx->Yi.d[3] = BSWAP4(ctr);
1077 #  else
1078                         PUTU32(ctx->Yi.c + 12, ctr);
1079 #  endif
1080                     else
1081                         ctx->Yi.d[3] = ctr;
1082                     for (i = 0; i < 16 / sizeof(size_t); ++i)
1083                         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1084                     out += 16;
1085                     in += 16;
1086                     len -= 16;
1087                 }
1088                 GHASH(ctx, out - j, j);
1089             }
1090 # else
1091             while (len >= 16) {
1092                 size_t *out_t = (size_t *)out;
1093                 const size_t *in_t = (const size_t *)in;
1094 
1095                 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1096                 ++ctr;
1097                 if (is_endian.little)
1098 #  ifdef BSWAP4
1099                     ctx->Yi.d[3] = BSWAP4(ctr);
1100 #  else
1101                     PUTU32(ctx->Yi.c + 12, ctr);
1102 #  endif
1103                 else
1104                     ctx->Yi.d[3] = ctr;
1105                 for (i = 0; i < 16 / sizeof(size_t); ++i)
1106                     ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1107                 GCM_MUL(ctx, Xi);
1108                 out += 16;
1109                 in += 16;
1110                 len -= 16;
1111             }
1112 # endif
1113             if (len) {
1114                 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1115                 ++ctr;
1116                 if (is_endian.little)
1117 # ifdef BSWAP4
1118                     ctx->Yi.d[3] = BSWAP4(ctr);
1119 # else
1120                     PUTU32(ctx->Yi.c + 12, ctr);
1121 # endif
1122                 else
1123                     ctx->Yi.d[3] = ctr;
1124                 while (len--) {
1125                     ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1126                     ++n;
1127                 }
1128             }
1129 
1130             ctx->mres = n;
1131             return 0;
1132         } while (0);
1133     }
1134 #endif
1135     for (i = 0; i < len; ++i) {
1136         if (n == 0) {
1137             (*block) (ctx->Yi.c, ctx->EKi.c, key);
1138             ++ctr;
1139             if (is_endian.little)
1140 #ifdef BSWAP4
1141                 ctx->Yi.d[3] = BSWAP4(ctr);
1142 #else
1143                 PUTU32(ctx->Yi.c + 12, ctr);
1144 #endif
1145             else
1146                 ctx->Yi.d[3] = ctr;
1147         }
1148         ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1149         n = (n + 1) % 16;
1150         if (n == 0)
1151             GCM_MUL(ctx, Xi);
1152     }
1153 
1154     ctx->mres = n;
1155     return 0;
1156 }
1157 
1158 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1159                           const unsigned char *in, unsigned char *out,
1160                           size_t len)
1161 {
1162     const union {
1163         long one;
1164         char little;
1165     } is_endian = { 1 };
1166     unsigned int n, ctr;
1167     size_t i;
1168     u64 mlen = ctx->len.u[1];
1169     block128_f block = ctx->block;
1170     void *key = ctx->key;
1171 #ifdef GCM_FUNCREF_4BIT
1172     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1173 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1174     void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1175                          const u8 *inp, size_t len) = ctx->ghash;
1176 # endif
1177 #endif
1178 
1179     mlen += len;
1180     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1181         return -1;
1182     ctx->len.u[1] = mlen;
1183 
1184     if (ctx->ares) {
1185         /* First call to decrypt finalizes GHASH(AAD) */
1186         GCM_MUL(ctx, Xi);
1187         ctx->ares = 0;
1188     }
1189 
1190     if (is_endian.little)
1191 #ifdef BSWAP4
1192         ctr = BSWAP4(ctx->Yi.d[3]);
1193 #else
1194         ctr = GETU32(ctx->Yi.c + 12);
1195 #endif
1196     else
1197         ctr = ctx->Yi.d[3];
1198 
1199     n = ctx->mres;
1200 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1201     if (16 % sizeof(size_t) == 0) { /* always true actually */
1202         do {
1203             if (n) {
1204                 while (n && len) {
1205                     u8 c = *(in++);
1206                     *(out++) = c ^ ctx->EKi.c[n];
1207                     ctx->Xi.c[n] ^= c;
1208                     --len;
1209                     n = (n + 1) % 16;
1210                 }
1211                 if (n == 0)
1212                     GCM_MUL(ctx, Xi);
1213                 else {
1214                     ctx->mres = n;
1215                     return 0;
1216                 }
1217             }
1218 # if defined(STRICT_ALIGNMENT)
1219             if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1220                 break;
1221 # endif
1222 # if defined(GHASH)
1223 #  if defined(GHASH_CHUNK)
1224             while (len >= GHASH_CHUNK) {
1225                 size_t j = GHASH_CHUNK;
1226 
1227                 GHASH(ctx, in, GHASH_CHUNK);
1228                 while (j) {
1229                     size_t *out_t = (size_t *)out;
1230                     const size_t *in_t = (const size_t *)in;
1231 
1232                     (*block) (ctx->Yi.c, ctx->EKi.c, key);
1233                     ++ctr;
1234                     if (is_endian.little)
1235 #   ifdef BSWAP4
1236                         ctx->Yi.d[3] = BSWAP4(ctr);
1237 #   else
1238                         PUTU32(ctx->Yi.c + 12, ctr);
1239 #   endif
1240                     else
1241                         ctx->Yi.d[3] = ctr;
1242                     for (i = 0; i < 16 / sizeof(size_t); ++i)
1243                         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1244                     out += 16;
1245                     in += 16;
1246                     j -= 16;
1247                 }
1248                 len -= GHASH_CHUNK;
1249             }
1250 #  endif
1251             if ((i = (len & (size_t)-16))) {
1252                 GHASH(ctx, in, i);
1253                 while (len >= 16) {
1254                     size_t *out_t = (size_t *)out;
1255                     const size_t *in_t = (const size_t *)in;
1256 
1257                     (*block) (ctx->Yi.c, ctx->EKi.c, key);
1258                     ++ctr;
1259                     if (is_endian.little)
1260 #  ifdef BSWAP4
1261                         ctx->Yi.d[3] = BSWAP4(ctr);
1262 #  else
1263                         PUTU32(ctx->Yi.c + 12, ctr);
1264 #  endif
1265                     else
1266                         ctx->Yi.d[3] = ctr;
1267                     for (i = 0; i < 16 / sizeof(size_t); ++i)
1268                         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1269                     out += 16;
1270                     in += 16;
1271                     len -= 16;
1272                 }
1273             }
1274 # else
1275             while (len >= 16) {
1276                 size_t *out_t = (size_t *)out;
1277                 const size_t *in_t = (const size_t *)in;
1278 
1279                 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1280                 ++ctr;
1281                 if (is_endian.little)
1282 #  ifdef BSWAP4
1283                     ctx->Yi.d[3] = BSWAP4(ctr);
1284 #  else
1285                     PUTU32(ctx->Yi.c + 12, ctr);
1286 #  endif
1287                 else
1288                     ctx->Yi.d[3] = ctr;
1289                 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1290                     size_t c = in[i];
1291                     out[i] = c ^ ctx->EKi.t[i];
1292                     ctx->Xi.t[i] ^= c;
1293                 }
1294                 GCM_MUL(ctx, Xi);
1295                 out += 16;
1296                 in += 16;
1297                 len -= 16;
1298             }
1299 # endif
1300             if (len) {
1301                 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1302                 ++ctr;
1303                 if (is_endian.little)
1304 # ifdef BSWAP4
1305                     ctx->Yi.d[3] = BSWAP4(ctr);
1306 # else
1307                     PUTU32(ctx->Yi.c + 12, ctr);
1308 # endif
1309                 else
1310                     ctx->Yi.d[3] = ctr;
1311                 while (len--) {
1312                     u8 c = in[n];
1313                     ctx->Xi.c[n] ^= c;
1314                     out[n] = c ^ ctx->EKi.c[n];
1315                     ++n;
1316                 }
1317             }
1318 
1319             ctx->mres = n;
1320             return 0;
1321         } while (0);
1322     }
1323 #endif
1324     for (i = 0; i < len; ++i) {
1325         u8 c;
1326         if (n == 0) {
1327             (*block) (ctx->Yi.c, ctx->EKi.c, key);
1328             ++ctr;
1329             if (is_endian.little)
1330 #ifdef BSWAP4
1331                 ctx->Yi.d[3] = BSWAP4(ctr);
1332 #else
1333                 PUTU32(ctx->Yi.c + 12, ctr);
1334 #endif
1335             else
1336                 ctx->Yi.d[3] = ctr;
1337         }
1338         c = in[i];
1339         out[i] = c ^ ctx->EKi.c[n];
1340         ctx->Xi.c[n] ^= c;
1341         n = (n + 1) % 16;
1342         if (n == 0)
1343             GCM_MUL(ctx, Xi);
1344     }
1345 
1346     ctx->mres = n;
1347     return 0;
1348 }
1349 
1350 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1351                                 const unsigned char *in, unsigned char *out,
1352                                 size_t len, ctr128_f stream)
1353 {
1354 #if defined(OPENSSL_SMALL_FOOTPRINT)
1355     return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1356 #else
1357     const union {
1358         long one;
1359         char little;
1360     } is_endian = { 1 };
1361     unsigned int n, ctr;
1362     size_t i;
1363     u64 mlen = ctx->len.u[1];
1364     void *key = ctx->key;
1365 # ifdef GCM_FUNCREF_4BIT
1366     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1367 #  ifdef GHASH
1368     void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1369                          const u8 *inp, size_t len) = ctx->ghash;
1370 #  endif
1371 # endif
1372 
1373     mlen += len;
1374     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1375         return -1;
1376     ctx->len.u[1] = mlen;
1377 
1378     if (ctx->ares) {
1379         /* First call to encrypt finalizes GHASH(AAD) */
1380         GCM_MUL(ctx, Xi);
1381         ctx->ares = 0;
1382     }
1383 
1384     if (is_endian.little)
1385 # ifdef BSWAP4
1386         ctr = BSWAP4(ctx->Yi.d[3]);
1387 # else
1388         ctr = GETU32(ctx->Yi.c + 12);
1389 # endif
1390     else
1391         ctr = ctx->Yi.d[3];
1392 
1393     n = ctx->mres;
1394     if (n) {
1395         while (n && len) {
1396             ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1397             --len;
1398             n = (n + 1) % 16;
1399         }
1400         if (n == 0)
1401             GCM_MUL(ctx, Xi);
1402         else {
1403             ctx->mres = n;
1404             return 0;
1405         }
1406     }
1407 # if defined(GHASH) && defined(GHASH_CHUNK)
1408     while (len >= GHASH_CHUNK) {
1409         (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1410         ctr += GHASH_CHUNK / 16;
1411         if (is_endian.little)
1412 #  ifdef BSWAP4
1413             ctx->Yi.d[3] = BSWAP4(ctr);
1414 #  else
1415             PUTU32(ctx->Yi.c + 12, ctr);
1416 #  endif
1417         else
1418             ctx->Yi.d[3] = ctr;
1419         GHASH(ctx, out, GHASH_CHUNK);
1420         out += GHASH_CHUNK;
1421         in += GHASH_CHUNK;
1422         len -= GHASH_CHUNK;
1423     }
1424 # endif
1425     if ((i = (len & (size_t)-16))) {
1426         size_t j = i / 16;
1427 
1428         (*stream) (in, out, j, key, ctx->Yi.c);
1429         ctr += (unsigned int)j;
1430         if (is_endian.little)
1431 # ifdef BSWAP4
1432             ctx->Yi.d[3] = BSWAP4(ctr);
1433 # else
1434             PUTU32(ctx->Yi.c + 12, ctr);
1435 # endif
1436         else
1437             ctx->Yi.d[3] = ctr;
1438         in += i;
1439         len -= i;
1440 # if defined(GHASH)
1441         GHASH(ctx, out, i);
1442         out += i;
1443 # else
1444         while (j--) {
1445             for (i = 0; i < 16; ++i)
1446                 ctx->Xi.c[i] ^= out[i];
1447             GCM_MUL(ctx, Xi);
1448             out += 16;
1449         }
1450 # endif
1451     }
1452     if (len) {
1453         (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1454         ++ctr;
1455         if (is_endian.little)
1456 # ifdef BSWAP4
1457             ctx->Yi.d[3] = BSWAP4(ctr);
1458 # else
1459             PUTU32(ctx->Yi.c + 12, ctr);
1460 # endif
1461         else
1462             ctx->Yi.d[3] = ctr;
1463         while (len--) {
1464             ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1465             ++n;
1466         }
1467     }
1468 
1469     ctx->mres = n;
1470     return 0;
1471 #endif
1472 }
1473 
1474 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1475                                 const unsigned char *in, unsigned char *out,
1476                                 size_t len, ctr128_f stream)
1477 {
1478 #if defined(OPENSSL_SMALL_FOOTPRINT)
1479     return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1480 #else
1481     const union {
1482         long one;
1483         char little;
1484     } is_endian = { 1 };
1485     unsigned int n, ctr;
1486     size_t i;
1487     u64 mlen = ctx->len.u[1];
1488     void *key = ctx->key;
1489 # ifdef GCM_FUNCREF_4BIT
1490     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1491 #  ifdef GHASH
1492     void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1493                          const u8 *inp, size_t len) = ctx->ghash;
1494 #  endif
1495 # endif
1496 
1497     mlen += len;
1498     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1499         return -1;
1500     ctx->len.u[1] = mlen;
1501 
1502     if (ctx->ares) {
1503         /* First call to decrypt finalizes GHASH(AAD) */
1504         GCM_MUL(ctx, Xi);
1505         ctx->ares = 0;
1506     }
1507 
1508     if (is_endian.little)
1509 # ifdef BSWAP4
1510         ctr = BSWAP4(ctx->Yi.d[3]);
1511 # else
1512         ctr = GETU32(ctx->Yi.c + 12);
1513 # endif
1514     else
1515         ctr = ctx->Yi.d[3];
1516 
1517     n = ctx->mres;
1518     if (n) {
1519         while (n && len) {
1520             u8 c = *(in++);
1521             *(out++) = c ^ ctx->EKi.c[n];
1522             ctx->Xi.c[n] ^= c;
1523             --len;
1524             n = (n + 1) % 16;
1525         }
1526         if (n == 0)
1527             GCM_MUL(ctx, Xi);
1528         else {
1529             ctx->mres = n;
1530             return 0;
1531         }
1532     }
1533 # if defined(GHASH) && defined(GHASH_CHUNK)
1534     while (len >= GHASH_CHUNK) {
1535         GHASH(ctx, in, GHASH_CHUNK);
1536         (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1537         ctr += GHASH_CHUNK / 16;
1538         if (is_endian.little)
1539 #  ifdef BSWAP4
1540             ctx->Yi.d[3] = BSWAP4(ctr);
1541 #  else
1542             PUTU32(ctx->Yi.c + 12, ctr);
1543 #  endif
1544         else
1545             ctx->Yi.d[3] = ctr;
1546         out += GHASH_CHUNK;
1547         in += GHASH_CHUNK;
1548         len -= GHASH_CHUNK;
1549     }
1550 # endif
1551     if ((i = (len & (size_t)-16))) {
1552         size_t j = i / 16;
1553 
1554 # if defined(GHASH)
1555         GHASH(ctx, in, i);
1556 # else
1557         while (j--) {
1558             size_t k;
1559             for (k = 0; k < 16; ++k)
1560                 ctx->Xi.c[k] ^= in[k];
1561             GCM_MUL(ctx, Xi);
1562             in += 16;
1563         }
1564         j = i / 16;
1565         in -= i;
1566 # endif
1567         (*stream) (in, out, j, key, ctx->Yi.c);
1568         ctr += (unsigned int)j;
1569         if (is_endian.little)
1570 # ifdef BSWAP4
1571             ctx->Yi.d[3] = BSWAP4(ctr);
1572 # else
1573             PUTU32(ctx->Yi.c + 12, ctr);
1574 # endif
1575         else
1576             ctx->Yi.d[3] = ctr;
1577         out += i;
1578         in += i;
1579         len -= i;
1580     }
1581     if (len) {
1582         (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1583         ++ctr;
1584         if (is_endian.little)
1585 # ifdef BSWAP4
1586             ctx->Yi.d[3] = BSWAP4(ctr);
1587 # else
1588             PUTU32(ctx->Yi.c + 12, ctr);
1589 # endif
1590         else
1591             ctx->Yi.d[3] = ctr;
1592         while (len--) {
1593             u8 c = in[n];
1594             ctx->Xi.c[n] ^= c;
1595             out[n] = c ^ ctx->EKi.c[n];
1596             ++n;
1597         }
1598     }
1599 
1600     ctx->mres = n;
1601     return 0;
1602 #endif
1603 }
1604 
1605 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1606                          size_t len)
1607 {
1608     const union {
1609         long one;
1610         char little;
1611     } is_endian = { 1 };
1612     u64 alen = ctx->len.u[0] << 3;
1613     u64 clen = ctx->len.u[1] << 3;
1614 #ifdef GCM_FUNCREF_4BIT
1615     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1616 #endif
1617 
1618     if (ctx->mres || ctx->ares)
1619         GCM_MUL(ctx, Xi);
1620 
1621     if (is_endian.little) {
1622 #ifdef BSWAP8
1623         alen = BSWAP8(alen);
1624         clen = BSWAP8(clen);
1625 #else
1626         u8 *p = ctx->len.c;
1627 
1628         ctx->len.u[0] = alen;
1629         ctx->len.u[1] = clen;
1630 
1631         alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1632         clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1633 #endif
1634     }
1635 
1636     ctx->Xi.u[0] ^= alen;
1637     ctx->Xi.u[1] ^= clen;
1638     GCM_MUL(ctx, Xi);
1639 
1640     ctx->Xi.u[0] ^= ctx->EK0.u[0];
1641     ctx->Xi.u[1] ^= ctx->EK0.u[1];
1642 
1643     if (tag && len <= sizeof(ctx->Xi))
1644         return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1645     else
1646         return -1;
1647 }
1648 
1649 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1650 {
1651     CRYPTO_gcm128_finish(ctx, NULL, 0);
1652     memcpy(tag, ctx->Xi.c,
1653            len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1654 }
1655 
1656 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1657 {
1658     GCM128_CONTEXT *ret;
1659 
1660     if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1661         CRYPTO_gcm128_init(ret, key, block);
1662 
1663     return ret;
1664 }
1665 
1666 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1667 {
1668     OPENSSL_clear_free(ctx, sizeof(*ctx));
1669 }
1670 
1671 #if defined(SELFTEST)
1672 # include <stdio.h>
1673 # include <openssl/aes.h>
1674 
1675 /* Test Case 1 */
1676 static const u8 K1[16], *P1 = NULL, *A1 = NULL, IV1[12], *C1 = NULL;
1677 static const u8 T1[] = {
1678     0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, 0x30, 0x61,
1679     0x36, 0x7f, 0x1d, 0x57, 0xa4, 0xe7, 0x45, 0x5a
1680 };
1681 
1682 /* Test Case 2 */
1683 # define K2 K1
1684 # define A2 A1
1685 # define IV2 IV1
1686 static const u8 P2[16];
1687 static const u8 C2[] = {
1688     0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92,
1689     0xf3, 0x28, 0xc2, 0xb9, 0x71, 0xb2, 0xfe, 0x78
1690 };
1691 
1692 static const u8 T2[] = {
1693     0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec, 0x13, 0xbd,
1694     0xf5, 0x3a, 0x67, 0xb2, 0x12, 0x57, 0xbd, 0xdf
1695 };
1696 
1697 /* Test Case 3 */
1698 # define A3 A2
1699 static const u8 K3[] = {
1700     0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1701     0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
1702 };
1703 
1704 static const u8 P3[] = {
1705     0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1706     0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1707     0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1708     0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1709     0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1710     0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1711     0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1712     0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1713 };
1714 
1715 static const u8 IV3[] = {
1716     0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1717     0xde, 0xca, 0xf8, 0x88
1718 };
1719 
1720 static const u8 C3[] = {
1721     0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1722     0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1723     0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1724     0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1725     0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1726     0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1727     0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1728     0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85
1729 };
1730 
1731 static const u8 T3[] = {
1732     0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd, 0x64, 0xa6,
1733     0x2c, 0xf3, 0x5a, 0xbd, 0x2b, 0xa6, 0xfa, 0xb4
1734 };
1735 
1736 /* Test Case 4 */
1737 # define K4 K3
1738 # define IV4 IV3
1739 static const u8 P4[] = {
1740     0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1741     0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1742     0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1743     0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1744     0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1745     0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1746     0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1747     0xba, 0x63, 0x7b, 0x39
1748 };
1749 
1750 static const u8 A4[] = {
1751     0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1752     0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1753     0xab, 0xad, 0xda, 0xd2
1754 };
1755 
1756 static const u8 C4[] = {
1757     0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1758     0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1759     0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1760     0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1761     0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1762     0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1763     0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1764     0x3d, 0x58, 0xe0, 0x91
1765 };
1766 
1767 static const u8 T4[] = {
1768     0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21, 0xa5, 0xdb,
1769     0x94, 0xfa, 0xe9, 0x5a, 0xe7, 0x12, 0x1a, 0x47
1770 };
1771 
1772 /* Test Case 5 */
1773 # define K5 K4
1774 # define P5 P4
1775 # define A5 A4
1776 static const u8 IV5[] = {
1777     0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad
1778 };
1779 
1780 static const u8 C5[] = {
1781     0x61, 0x35, 0x3b, 0x4c, 0x28, 0x06, 0x93, 0x4a,
1782     0x77, 0x7f, 0xf5, 0x1f, 0xa2, 0x2a, 0x47, 0x55,
1783     0x69, 0x9b, 0x2a, 0x71, 0x4f, 0xcd, 0xc6, 0xf8,
1784     0x37, 0x66, 0xe5, 0xf9, 0x7b, 0x6c, 0x74, 0x23,
1785     0x73, 0x80, 0x69, 0x00, 0xe4, 0x9f, 0x24, 0xb2,
1786     0x2b, 0x09, 0x75, 0x44, 0xd4, 0x89, 0x6b, 0x42,
1787     0x49, 0x89, 0xb5, 0xe1, 0xeb, 0xac, 0x0f, 0x07,
1788     0xc2, 0x3f, 0x45, 0x98
1789 };
1790 
1791 static const u8 T5[] = {
1792     0x36, 0x12, 0xd2, 0xe7, 0x9e, 0x3b, 0x07, 0x85,
1793     0x56, 0x1b, 0xe1, 0x4a, 0xac, 0xa2, 0xfc, 0xcb
1794 };
1795 
1796 /* Test Case 6 */
1797 # define K6 K5
1798 # define P6 P5
1799 # define A6 A5
1800 static const u8 IV6[] = {
1801     0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
1802     0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
1803     0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
1804     0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
1805     0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
1806     0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
1807     0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
1808     0xa6, 0x37, 0xb3, 0x9b
1809 };
1810 
1811 static const u8 C6[] = {
1812     0x8c, 0xe2, 0x49, 0x98, 0x62, 0x56, 0x15, 0xb6,
1813     0x03, 0xa0, 0x33, 0xac, 0xa1, 0x3f, 0xb8, 0x94,
1814     0xbe, 0x91, 0x12, 0xa5, 0xc3, 0xa2, 0x11, 0xa8,
1815     0xba, 0x26, 0x2a, 0x3c, 0xca, 0x7e, 0x2c, 0xa7,
1816     0x01, 0xe4, 0xa9, 0xa4, 0xfb, 0xa4, 0x3c, 0x90,
1817     0xcc, 0xdc, 0xb2, 0x81, 0xd4, 0x8c, 0x7c, 0x6f,
1818     0xd6, 0x28, 0x75, 0xd2, 0xac, 0xa4, 0x17, 0x03,
1819     0x4c, 0x34, 0xae, 0xe5
1820 };
1821 
1822 static const u8 T6[] = {
1823     0x61, 0x9c, 0xc5, 0xae, 0xff, 0xfe, 0x0b, 0xfa,
1824     0x46, 0x2a, 0xf4, 0x3c, 0x16, 0x99, 0xd0, 0x50
1825 };
1826 
1827 /* Test Case 7 */
1828 static const u8 K7[24], *P7 = NULL, *A7 = NULL, IV7[12], *C7 = NULL;
1829 static const u8 T7[] = {
1830     0xcd, 0x33, 0xb2, 0x8a, 0xc7, 0x73, 0xf7, 0x4b,
1831     0xa0, 0x0e, 0xd1, 0xf3, 0x12, 0x57, 0x24, 0x35
1832 };
1833 
1834 /* Test Case 8 */
1835 # define K8 K7
1836 # define IV8 IV7
1837 # define A8 A7
1838 static const u8 P8[16];
1839 static const u8 C8[] = {
1840     0x98, 0xe7, 0x24, 0x7c, 0x07, 0xf0, 0xfe, 0x41,
1841     0x1c, 0x26, 0x7e, 0x43, 0x84, 0xb0, 0xf6, 0x00
1842 };
1843 
1844 static const u8 T8[] = {
1845     0x2f, 0xf5, 0x8d, 0x80, 0x03, 0x39, 0x27, 0xab,
1846     0x8e, 0xf4, 0xd4, 0x58, 0x75, 0x14, 0xf0, 0xfb
1847 };
1848 
1849 /* Test Case 9 */
1850 # define A9 A8
1851 static const u8 K9[] = {
1852     0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1853     0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
1854     0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c
1855 };
1856 
1857 static const u8 P9[] = {
1858     0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1859     0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1860     0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1861     0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1862     0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1863     0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1864     0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1865     0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1866 };
1867 
1868 static const u8 IV9[] = {
1869     0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1870     0xde, 0xca, 0xf8, 0x88
1871 };
1872 
1873 static const u8 C9[] = {
1874     0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1875     0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1876     0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1877     0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1878     0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1879     0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1880     0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1881     0xcc, 0xda, 0x27, 0x10, 0xac, 0xad, 0xe2, 0x56
1882 };
1883 
1884 static const u8 T9[] = {
1885     0x99, 0x24, 0xa7, 0xc8, 0x58, 0x73, 0x36, 0xbf,
1886     0xb1, 0x18, 0x02, 0x4d, 0xb8, 0x67, 0x4a, 0x14
1887 };
1888 
1889 /* Test Case 10 */
1890 # define K10 K9
1891 # define IV10 IV9
1892 static const u8 P10[] = {
1893     0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1894     0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1895     0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1896     0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1897     0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1898     0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1899     0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1900     0xba, 0x63, 0x7b, 0x39
1901 };
1902 
1903 static const u8 A10[] = {
1904     0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1905     0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1906     0xab, 0xad, 0xda, 0xd2
1907 };
1908 
1909 static const u8 C10[] = {
1910     0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1911     0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1912     0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1913     0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1914     0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1915     0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1916     0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1917     0xcc, 0xda, 0x27, 0x10
1918 };
1919 
1920 static const u8 T10[] = {
1921     0x25, 0x19, 0x49, 0x8e, 0x80, 0xf1, 0x47, 0x8f,
1922     0x37, 0xba, 0x55, 0xbd, 0x6d, 0x27, 0x61, 0x8c
1923 };
1924 
1925 /* Test Case 11 */
1926 # define K11 K10
1927 # define P11 P10
1928 # define A11 A10
1929 static const u8 IV11[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
1930 
1931 static const u8 C11[] = {
1932     0x0f, 0x10, 0xf5, 0x99, 0xae, 0x14, 0xa1, 0x54,
1933     0xed, 0x24, 0xb3, 0x6e, 0x25, 0x32, 0x4d, 0xb8,
1934     0xc5, 0x66, 0x63, 0x2e, 0xf2, 0xbb, 0xb3, 0x4f,
1935     0x83, 0x47, 0x28, 0x0f, 0xc4, 0x50, 0x70, 0x57,
1936     0xfd, 0xdc, 0x29, 0xdf, 0x9a, 0x47, 0x1f, 0x75,
1937     0xc6, 0x65, 0x41, 0xd4, 0xd4, 0xda, 0xd1, 0xc9,
1938     0xe9, 0x3a, 0x19, 0xa5, 0x8e, 0x8b, 0x47, 0x3f,
1939     0xa0, 0xf0, 0x62, 0xf7
1940 };
1941 
1942 static const u8 T11[] = {
1943     0x65, 0xdc, 0xc5, 0x7f, 0xcf, 0x62, 0x3a, 0x24,
1944     0x09, 0x4f, 0xcc, 0xa4, 0x0d, 0x35, 0x33, 0xf8
1945 };
1946 
1947 /* Test Case 12 */
1948 # define K12 K11
1949 # define P12 P11
1950 # define A12 A11
1951 static const u8 IV12[] = {
1952     0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
1953     0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
1954     0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
1955     0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
1956     0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
1957     0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
1958     0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
1959     0xa6, 0x37, 0xb3, 0x9b
1960 };
1961 
1962 static const u8 C12[] = {
1963     0xd2, 0x7e, 0x88, 0x68, 0x1c, 0xe3, 0x24, 0x3c,
1964     0x48, 0x30, 0x16, 0x5a, 0x8f, 0xdc, 0xf9, 0xff,
1965     0x1d, 0xe9, 0xa1, 0xd8, 0xe6, 0xb4, 0x47, 0xef,
1966     0x6e, 0xf7, 0xb7, 0x98, 0x28, 0x66, 0x6e, 0x45,
1967     0x81, 0xe7, 0x90, 0x12, 0xaf, 0x34, 0xdd, 0xd9,
1968     0xe2, 0xf0, 0x37, 0x58, 0x9b, 0x29, 0x2d, 0xb3,
1969     0xe6, 0x7c, 0x03, 0x67, 0x45, 0xfa, 0x22, 0xe7,
1970     0xe9, 0xb7, 0x37, 0x3b
1971 };
1972 
1973 static const u8 T12[] = {
1974     0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb,
1975     0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9
1976 };
1977 
1978 /* Test Case 13 */
1979 static const u8 K13[32], *P13 = NULL, *A13 = NULL, IV13[12], *C13 = NULL;
1980 static const u8 T13[] = {
1981     0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9,
1982     0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b
1983 };
1984 
1985 /* Test Case 14 */
1986 # define K14 K13
1987 # define A14 A13
1988 static const u8 P14[16], IV14[12];
1989 static const u8 C14[] = {
1990     0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, 0x6b, 0x6e,
1991     0x07, 0x4e, 0xc5, 0xd3, 0xba, 0xf3, 0x9d, 0x18
1992 };
1993 
1994 static const u8 T14[] = {
1995     0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99, 0x6b, 0xf0,
1996     0x26, 0x5b, 0x98, 0xb5, 0xd4, 0x8a, 0xb9, 0x19
1997 };
1998 
1999 /* Test Case 15 */
2000 # define A15 A14
2001 static const u8 K15[] = {
2002     0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2003     0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
2004     0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2005     0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
2006 };
2007 
2008 static const u8 P15[] = {
2009     0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2010     0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2011     0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2012     0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2013     0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2014     0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2015     0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2016     0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
2017 };
2018 
2019 static const u8 IV15[] = {
2020     0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
2021     0xde, 0xca, 0xf8, 0x88
2022 };
2023 
2024 static const u8 C15[] = {
2025     0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2026     0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2027     0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2028     0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2029     0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2030     0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2031     0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2032     0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2033 };
2034 
2035 static const u8 T15[] = {
2036     0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34, 0x71, 0xbd,
2037     0xec, 0x1a, 0x50, 0x22, 0x70, 0xe3, 0xcc, 0x6c
2038 };
2039 
2040 /* Test Case 16 */
2041 # define K16 K15
2042 # define IV16 IV15
2043 static const u8 P16[] = {
2044     0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2045     0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2046     0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2047     0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2048     0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2049     0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2050     0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2051     0xba, 0x63, 0x7b, 0x39
2052 };
2053 
2054 static const u8 A16[] = {
2055     0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2056     0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2057     0xab, 0xad, 0xda, 0xd2
2058 };
2059 
2060 static const u8 C16[] = {
2061     0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2062     0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2063     0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2064     0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2065     0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2066     0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2067     0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2068     0xbc, 0xc9, 0xf6, 0x62
2069 };
2070 
2071 static const u8 T16[] = {
2072     0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68,
2073     0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
2074 };
2075 
2076 /* Test Case 17 */
2077 # define K17 K16
2078 # define P17 P16
2079 # define A17 A16
2080 static const u8 IV17[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
2081 
2082 static const u8 C17[] = {
2083     0xc3, 0x76, 0x2d, 0xf1, 0xca, 0x78, 0x7d, 0x32,
2084     0xae, 0x47, 0xc1, 0x3b, 0xf1, 0x98, 0x44, 0xcb,
2085     0xaf, 0x1a, 0xe1, 0x4d, 0x0b, 0x97, 0x6a, 0xfa,
2086     0xc5, 0x2f, 0xf7, 0xd7, 0x9b, 0xba, 0x9d, 0xe0,
2087     0xfe, 0xb5, 0x82, 0xd3, 0x39, 0x34, 0xa4, 0xf0,
2088     0x95, 0x4c, 0xc2, 0x36, 0x3b, 0xc7, 0x3f, 0x78,
2089     0x62, 0xac, 0x43, 0x0e, 0x64, 0xab, 0xe4, 0x99,
2090     0xf4, 0x7c, 0x9b, 0x1f
2091 };
2092 
2093 static const u8 T17[] = {
2094     0x3a, 0x33, 0x7d, 0xbf, 0x46, 0xa7, 0x92, 0xc4,
2095     0x5e, 0x45, 0x49, 0x13, 0xfe, 0x2e, 0xa8, 0xf2
2096 };
2097 
2098 /* Test Case 18 */
2099 # define K18 K17
2100 # define P18 P17
2101 # define A18 A17
2102 static const u8 IV18[] = {
2103     0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2104     0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2105     0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2106     0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2107     0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2108     0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2109     0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2110     0xa6, 0x37, 0xb3, 0x9b
2111 };
2112 
2113 static const u8 C18[] = {
2114     0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1,
2115     0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20,
2116     0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19,
2117     0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4,
2118     0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45,
2119     0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde,
2120     0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e,
2121     0x44, 0xae, 0x7e, 0x3f
2122 };
2123 
2124 static const u8 T18[] = {
2125     0xa4, 0x4a, 0x82, 0x66, 0xee, 0x1c, 0x8e, 0xb0,
2126     0xc8, 0xb5, 0xd4, 0xcf, 0x5a, 0xe9, 0xf1, 0x9a
2127 };
2128 
2129 /* Test Case 19 */
2130 # define K19 K1
2131 # define P19 P1
2132 # define IV19 IV1
2133 # define C19 C1
2134 static const u8 A19[] = {
2135     0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2136     0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2137     0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2138     0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2139     0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2140     0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2141     0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2142     0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55,
2143     0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2144     0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2145     0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2146     0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2147     0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2148     0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2149     0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2150     0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2151 };
2152 
2153 static const u8 T19[] = {
2154     0x5f, 0xea, 0x79, 0x3a, 0x2d, 0x6f, 0x97, 0x4d,
2155     0x37, 0xe6, 0x8e, 0x0c, 0xb8, 0xff, 0x94, 0x92
2156 };
2157 
2158 /* Test Case 20 */
2159 # define K20 K1
2160 # define A20 A1
2161 /* this results in 0xff in counter LSB */
2162 static const u8 IV20[64] = { 0xff, 0xff, 0xff, 0xff };
2163 
2164 static const u8 P20[288];
2165 static const u8 C20[] = {
2166     0x56, 0xb3, 0x37, 0x3c, 0xa9, 0xef, 0x6e, 0x4a,
2167     0x2b, 0x64, 0xfe, 0x1e, 0x9a, 0x17, 0xb6, 0x14,
2168     0x25, 0xf1, 0x0d, 0x47, 0xa7, 0x5a, 0x5f, 0xce,
2169     0x13, 0xef, 0xc6, 0xbc, 0x78, 0x4a, 0xf2, 0x4f,
2170     0x41, 0x41, 0xbd, 0xd4, 0x8c, 0xf7, 0xc7, 0x70,
2171     0x88, 0x7a, 0xfd, 0x57, 0x3c, 0xca, 0x54, 0x18,
2172     0xa9, 0xae, 0xff, 0xcd, 0x7c, 0x5c, 0xed, 0xdf,
2173     0xc6, 0xa7, 0x83, 0x97, 0xb9, 0xa8, 0x5b, 0x49,
2174     0x9d, 0xa5, 0x58, 0x25, 0x72, 0x67, 0xca, 0xab,
2175     0x2a, 0xd0, 0xb2, 0x3c, 0xa4, 0x76, 0xa5, 0x3c,
2176     0xb1, 0x7f, 0xb4, 0x1c, 0x4b, 0x8b, 0x47, 0x5c,
2177     0xb4, 0xf3, 0xf7, 0x16, 0x50, 0x94, 0xc2, 0x29,
2178     0xc9, 0xe8, 0xc4, 0xdc, 0x0a, 0x2a, 0x5f, 0xf1,
2179     0x90, 0x3e, 0x50, 0x15, 0x11, 0x22, 0x13, 0x76,
2180     0xa1, 0xcd, 0xb8, 0x36, 0x4c, 0x50, 0x61, 0xa2,
2181     0x0c, 0xae, 0x74, 0xbc, 0x4a, 0xcd, 0x76, 0xce,
2182     0xb0, 0xab, 0xc9, 0xfd, 0x32, 0x17, 0xef, 0x9f,
2183     0x8c, 0x90, 0xbe, 0x40, 0x2d, 0xdf, 0x6d, 0x86,
2184     0x97, 0xf4, 0xf8, 0x80, 0xdf, 0xf1, 0x5b, 0xfb,
2185     0x7a, 0x6b, 0x28, 0x24, 0x1e, 0xc8, 0xfe, 0x18,
2186     0x3c, 0x2d, 0x59, 0xe3, 0xf9, 0xdf, 0xff, 0x65,
2187     0x3c, 0x71, 0x26, 0xf0, 0xac, 0xb9, 0xe6, 0x42,
2188     0x11, 0xf4, 0x2b, 0xae, 0x12, 0xaf, 0x46, 0x2b,
2189     0x10, 0x70, 0xbe, 0xf1, 0xab, 0x5e, 0x36, 0x06,
2190     0x87, 0x2c, 0xa1, 0x0d, 0xee, 0x15, 0xb3, 0x24,
2191     0x9b, 0x1a, 0x1b, 0x95, 0x8f, 0x23, 0x13, 0x4c,
2192     0x4b, 0xcc, 0xb7, 0xd0, 0x32, 0x00, 0xbc, 0xe4,
2193     0x20, 0xa2, 0xf8, 0xeb, 0x66, 0xdc, 0xf3, 0x64,
2194     0x4d, 0x14, 0x23, 0xc1, 0xb5, 0x69, 0x90, 0x03,
2195     0xc1, 0x3e, 0xce, 0xf4, 0xbf, 0x38, 0xa3, 0xb6,
2196     0x0e, 0xed, 0xc3, 0x40, 0x33, 0xba, 0xc1, 0x90,
2197     0x27, 0x83, 0xdc, 0x6d, 0x89, 0xe2, 0xe7, 0x74,
2198     0x18, 0x8a, 0x43, 0x9c, 0x7e, 0xbc, 0xc0, 0x67,
2199     0x2d, 0xbd, 0xa4, 0xdd, 0xcf, 0xb2, 0x79, 0x46,
2200     0x13, 0xb0, 0xbe, 0x41, 0x31, 0x5e, 0xf7, 0x78,
2201     0x70, 0x8a, 0x70, 0xee, 0x7d, 0x75, 0x16, 0x5c
2202 };
2203 
2204 static const u8 T20[] = {
2205     0x8b, 0x30, 0x7f, 0x6b, 0x33, 0x28, 0x6d, 0x0a,
2206     0xb0, 0x26, 0xa9, 0xed, 0x3f, 0xe1, 0xe8, 0x5f
2207 };
2208 
2209 # define TEST_CASE(n)    do {                                    \
2210         u8 out[sizeof(P##n)];                                   \
2211         AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key);          \
2212         CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);  \
2213         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
2214         memset(out,0,sizeof(out));                              \
2215         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
2216         if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out));     \
2217         if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
2218             (C##n && memcmp(out,C##n,sizeof(out))))             \
2219                 ret++, printf ("encrypt test#%d failed.\n",n);  \
2220         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
2221         memset(out,0,sizeof(out));                              \
2222         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
2223         if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out));     \
2224         if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
2225             (P##n && memcmp(out,P##n,sizeof(out))))             \
2226                 ret++, printf ("decrypt test#%d failed.\n",n);  \
2227         } while(0)
2228 
2229 int main()
2230 {
2231     GCM128_CONTEXT ctx;
2232     AES_KEY key;
2233     int ret = 0;
2234 
2235     TEST_CASE(1);
2236     TEST_CASE(2);
2237     TEST_CASE(3);
2238     TEST_CASE(4);
2239     TEST_CASE(5);
2240     TEST_CASE(6);
2241     TEST_CASE(7);
2242     TEST_CASE(8);
2243     TEST_CASE(9);
2244     TEST_CASE(10);
2245     TEST_CASE(11);
2246     TEST_CASE(12);
2247     TEST_CASE(13);
2248     TEST_CASE(14);
2249     TEST_CASE(15);
2250     TEST_CASE(16);
2251     TEST_CASE(17);
2252     TEST_CASE(18);
2253     TEST_CASE(19);
2254     TEST_CASE(20);
2255 
2256 # ifdef OPENSSL_CPUID_OBJ
2257     {
2258         size_t start, stop, gcm_t, ctr_t, OPENSSL_rdtsc();
2259         union {
2260             u64 u;
2261             u8 c[1024];
2262         } buf;
2263         int i;
2264 
2265         AES_set_encrypt_key(K1, sizeof(K1) * 8, &key);
2266         CRYPTO_gcm128_init(&ctx, &key, (block128_f) AES_encrypt);
2267         CRYPTO_gcm128_setiv(&ctx, IV1, sizeof(IV1));
2268 
2269         CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2270         start = OPENSSL_rdtsc();
2271         CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2272         gcm_t = OPENSSL_rdtsc() - start;
2273 
2274         CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2275                               &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2276                               (block128_f) AES_encrypt);
2277         start = OPENSSL_rdtsc();
2278         CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2279                               &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2280                               (block128_f) AES_encrypt);
2281         ctr_t = OPENSSL_rdtsc() - start;
2282 
2283         printf("%.2f-%.2f=%.2f\n",
2284                gcm_t / (double)sizeof(buf),
2285                ctr_t / (double)sizeof(buf),
2286                (gcm_t - ctr_t) / (double)sizeof(buf));
2287 #  ifdef GHASH
2288         {
2289             void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
2290                                  const u8 *inp, size_t len) = ctx.ghash;
2291 
2292             GHASH((&ctx), buf.c, sizeof(buf));
2293             start = OPENSSL_rdtsc();
2294             for (i = 0; i < 100; ++i)
2295                 GHASH((&ctx), buf.c, sizeof(buf));
2296             gcm_t = OPENSSL_rdtsc() - start;
2297             printf("%.2f\n", gcm_t / (double)sizeof(buf) / (double)i);
2298         }
2299 #  endif
2300     }
2301 # endif
2302 
2303     return ret;
2304 }
2305 #endif
2306