xref: /openbsd-src/lib/libcrypto/sha/sha512.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4  * according to the OpenSSL license [found in ../../LICENSE].
5  * ====================================================================
6  */
7 #include <openssl/opensslconf.h>
8 #ifdef OPENSSL_FIPS
9 #include <openssl/fips.h>
10 #endif
11 
12 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
13 /*
14  * IMPLEMENTATION NOTES.
15  *
16  * As you might have noticed 32-bit hash algorithms:
17  *
18  * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
19  * - optimized versions implement two transform functions: one operating
20  *   on [aligned] data in host byte order and one - on data in input
21  *   stream byte order;
22  * - share common byte-order neutral collector and padding function
23  *   implementations, ../md32_common.h;
24  *
25  * Neither of the above applies to this SHA-512 implementations. Reasons
26  * [in reverse order] are:
27  *
28  * - it's the only 64-bit hash algorithm for the moment of this writing,
29  *   there is no need for common collector/padding implementation [yet];
30  * - by supporting only one transform function [which operates on
31  *   *aligned* data in input stream byte order, big-endian in this case]
32  *   we minimize burden of maintenance in two ways: a) collector/padding
33  *   function is simpler; b) only one transform function to stare at;
34  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
35  *   apply a number of optimizations to mitigate potential performance
36  *   penalties caused by previous design decision;
37  *
38  * Caveat lector.
39  *
40  * Implementation relies on the fact that "long long" is 64-bit on
41  * both 32- and 64-bit platforms. If some compiler vendor comes up
42  * with 128-bit long long, adjustment to sha.h would be required.
43  * As this implementation relies on 64-bit integer type, it's totally
44  * inappropriate for platforms which don't support it, most notably
45  * 16-bit platforms.
46  *					<appro@fy.chalmers.se>
47  */
48 #include <stdlib.h>
49 #include <string.h>
50 
51 #include <openssl/crypto.h>
52 #include <openssl/sha.h>
53 #include <openssl/opensslv.h>
54 
55 #include "cryptlib.h"
56 
57 const char SHA512_version[]="SHA-512" OPENSSL_VERSION_PTEXT;
58 
59 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
60     defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
61     defined(__s390__) || defined(__s390x__) || \
62     defined(SHA512_ASM)
63 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
64 #endif
65 
66 int SHA384_Init (SHA512_CTX *c)
67 	{
68 #ifdef OPENSSL_FIPS
69 	FIPS_selftest_check();
70 #endif
71 	c->h[0]=U64(0xcbbb9d5dc1059ed8);
72 	c->h[1]=U64(0x629a292a367cd507);
73 	c->h[2]=U64(0x9159015a3070dd17);
74 	c->h[3]=U64(0x152fecd8f70e5939);
75 	c->h[4]=U64(0x67332667ffc00b31);
76 	c->h[5]=U64(0x8eb44a8768581511);
77 	c->h[6]=U64(0xdb0c2e0d64f98fa7);
78 	c->h[7]=U64(0x47b5481dbefa4fa4);
79         c->Nl=0;        c->Nh=0;
80         c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
81         return 1;
82 	}
83 
84 int SHA512_Init (SHA512_CTX *c)
85 	{
86 #ifdef OPENSSL_FIPS
87 	FIPS_selftest_check();
88 #endif
89 	c->h[0]=U64(0x6a09e667f3bcc908);
90 	c->h[1]=U64(0xbb67ae8584caa73b);
91 	c->h[2]=U64(0x3c6ef372fe94f82b);
92 	c->h[3]=U64(0xa54ff53a5f1d36f1);
93 	c->h[4]=U64(0x510e527fade682d1);
94 	c->h[5]=U64(0x9b05688c2b3e6c1f);
95 	c->h[6]=U64(0x1f83d9abfb41bd6b);
96 	c->h[7]=U64(0x5be0cd19137e2179);
97         c->Nl=0;        c->Nh=0;
98         c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
99         return 1;
100 	}
101 
102 #ifndef SHA512_ASM
103 static
104 #endif
105 void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
106 
107 int SHA512_Final (unsigned char *md, SHA512_CTX *c)
108 	{
109 	unsigned char *p=(unsigned char *)c->u.p;
110 	size_t n=c->num;
111 
112 	p[n]=0x80;	/* There always is a room for one */
113 	n++;
114 	if (n > (sizeof(c->u)-16))
115 		memset (p+n,0,sizeof(c->u)-n), n=0,
116 		sha512_block_data_order (c,p,1);
117 
118 	memset (p+n,0,sizeof(c->u)-16-n);
119 #ifdef	B_ENDIAN
120 	c->u.d[SHA_LBLOCK-2] = c->Nh;
121 	c->u.d[SHA_LBLOCK-1] = c->Nl;
122 #else
123 	p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
124 	p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
125 	p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
126 	p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
127 	p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
128 	p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
129 	p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
130 	p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
131 	p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
132 	p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
133 	p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
134 	p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
135 	p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
136 	p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
137 	p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
138 	p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
139 #endif
140 
141 	sha512_block_data_order (c,p,1);
142 
143 	if (md==0) return 0;
144 
145 	switch (c->md_len)
146 		{
147 		/* Let compiler decide if it's appropriate to unroll... */
148 		case SHA384_DIGEST_LENGTH:
149 			for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
150 				{
151 				SHA_LONG64 t = c->h[n];
152 
153 				*(md++)	= (unsigned char)(t>>56);
154 				*(md++)	= (unsigned char)(t>>48);
155 				*(md++)	= (unsigned char)(t>>40);
156 				*(md++)	= (unsigned char)(t>>32);
157 				*(md++)	= (unsigned char)(t>>24);
158 				*(md++)	= (unsigned char)(t>>16);
159 				*(md++)	= (unsigned char)(t>>8);
160 				*(md++)	= (unsigned char)(t);
161 				}
162 			break;
163 		case SHA512_DIGEST_LENGTH:
164 			for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
165 				{
166 				SHA_LONG64 t = c->h[n];
167 
168 				*(md++)	= (unsigned char)(t>>56);
169 				*(md++)	= (unsigned char)(t>>48);
170 				*(md++)	= (unsigned char)(t>>40);
171 				*(md++)	= (unsigned char)(t>>32);
172 				*(md++)	= (unsigned char)(t>>24);
173 				*(md++)	= (unsigned char)(t>>16);
174 				*(md++)	= (unsigned char)(t>>8);
175 				*(md++)	= (unsigned char)(t);
176 				}
177 			break;
178 		/* ... as well as make sure md_len is not abused. */
179 		default:	return 0;
180 		}
181 
182 	return 1;
183 	}
184 
185 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
186 {   return SHA512_Final (md,c);   }
187 
188 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
189 	{
190 	SHA_LONG64	l;
191 	unsigned char  *p=c->u.p;
192 	const unsigned char *data=(const unsigned char *)_data;
193 
194 	if (len==0) return  1;
195 
196 	l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
197 	if (l < c->Nl)		c->Nh++;
198 	if (sizeof(len)>=8)	c->Nh+=(((SHA_LONG64)len)>>61);
199 	c->Nl=l;
200 
201 	if (c->num != 0)
202 		{
203 		size_t n = sizeof(c->u) - c->num;
204 
205 		if (len < n)
206 			{
207 			memcpy (p+c->num,data,len), c->num += len;
208 			return 1;
209 			}
210 		else	{
211 			memcpy (p+c->num,data,n), c->num = 0;
212 			len-=n, data+=n;
213 			sha512_block_data_order (c,p,1);
214 			}
215 		}
216 
217 	if (len >= sizeof(c->u))
218 		{
219 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
220 		if ((size_t)data%sizeof(c->u.d[0]) != 0)
221 			while (len >= sizeof(c->u))
222 				memcpy (p,data,sizeof(c->u)),
223 				sha512_block_data_order (c,p,1),
224 				len  -= sizeof(c->u),
225 				data += sizeof(c->u);
226 		else
227 #endif
228 			sha512_block_data_order (c,data,len/sizeof(c->u)),
229 			data += len,
230 			len  %= sizeof(c->u),
231 			data -= len;
232 		}
233 
234 	if (len != 0)	memcpy (p,data,len), c->num = (int)len;
235 
236 	return 1;
237 	}
238 
239 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
240 {   return SHA512_Update (c,data,len);   }
241 
242 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
243 {   sha512_block_data_order (c,data,1);  }
244 
245 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
246 	{
247 	SHA512_CTX c;
248 	static unsigned char m[SHA384_DIGEST_LENGTH];
249 
250 	if (md == NULL) md=m;
251 	SHA384_Init(&c);
252 	SHA512_Update(&c,d,n);
253 	SHA512_Final(md,&c);
254 	OPENSSL_cleanse(&c,sizeof(c));
255 	return(md);
256 	}
257 
258 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
259 	{
260 	SHA512_CTX c;
261 	static unsigned char m[SHA512_DIGEST_LENGTH];
262 
263 	if (md == NULL) md=m;
264 	SHA512_Init(&c);
265 	SHA512_Update(&c,d,n);
266 	SHA512_Final(md,&c);
267 	OPENSSL_cleanse(&c,sizeof(c));
268 	return(md);
269 	}
270 
271 #ifndef SHA512_ASM
272 static const SHA_LONG64 K512[80] = {
273         U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
274         U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
275         U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
276         U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
277         U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
278         U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
279         U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
280         U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
281         U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
282         U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
283         U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
284         U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
285         U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
286         U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
287         U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
288         U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
289         U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
290         U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
291         U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
292         U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
293         U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
294         U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
295         U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
296         U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
297         U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
298         U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
299         U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
300         U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
301         U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
302         U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
303         U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
304         U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
305         U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
306         U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
307         U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
308         U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
309         U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
310         U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
311         U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
312         U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
313 
314 #ifndef PEDANTIC
315 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
316 #  if defined(__x86_64) || defined(__x86_64__)
317 #   define ROTR(a,n)	({ unsigned long ret;		\
318 				asm ("rorq %1,%0"	\
319 				: "=r"(ret)		\
320 				: "J"(n),"0"(a)		\
321 				: "cc"); ret;		})
322 #   if !defined(B_ENDIAN)
323 #    define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));	\
324 				asm ("bswapq	%0"		\
325 				: "=r"(ret)			\
326 				: "0"(ret)); ret;		})
327 #   endif
328 #  elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
329 #   if defined(I386_ONLY)
330 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
331 			 unsigned int hi=p[0],lo=p[1];		\
332 				asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
333 				    "roll $16,%%eax; roll $16,%%edx; "\
334 				    "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
335 				: "=a"(lo),"=d"(hi)		\
336 				: "0"(lo),"1"(hi) : "cc");	\
337 				((SHA_LONG64)hi)<<32|lo;	})
338 #   else
339 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
340 			 unsigned int hi=p[0],lo=p[1];			\
341 				asm ("bswapl %0; bswapl %1;"	\
342 				: "=r"(lo),"=r"(hi)		\
343 				: "0"(lo),"1"(hi));		\
344 				((SHA_LONG64)hi)<<32|lo;	})
345 #   endif
346 #  elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
347 #   define ROTR(a,n)	({ unsigned long ret;		\
348 				asm ("rotrdi %0,%1,%2"	\
349 				: "=r"(ret)		\
350 				: "r"(a),"K"(n)); ret;	})
351 #  endif
352 # elif defined(_MSC_VER)
353 #  if defined(_WIN64)	/* applies to both IA-64 and AMD64 */
354 #   define ROTR(a,n)	_rotr64((a),n)
355 #  endif
356 #  if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
357 #   if defined(I386_ONLY)
358     static SHA_LONG64 __fastcall __pull64be(const void *x)
359     {	_asm	mov	edx, [ecx + 0]
360 	_asm	mov	eax, [ecx + 4]
361 	_asm	xchg	dh,dl
362 	_asm	xchg	ah,al
363 	_asm	rol	edx,16
364 	_asm	rol	eax,16
365 	_asm	xchg	dh,dl
366 	_asm	xchg	ah,al
367     }
368 #   else
369     static SHA_LONG64 __fastcall __pull64be(const void *x)
370     {	_asm	mov	edx, [ecx + 0]
371 	_asm	mov	eax, [ecx + 4]
372 	_asm	bswap	edx
373 	_asm	bswap	eax
374     }
375 #   endif
376 #   define PULL64(x) __pull64be(&(x))
377 #   if _MSC_VER<=1200
378 #    pragma inline_depth(0)
379 #   endif
380 #  endif
381 # endif
382 #endif
383 
384 #ifndef PULL64
385 #define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
386 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
387 #endif
388 
389 #ifndef ROTR
390 #define ROTR(x,s)	(((x)>>s) | (x)<<(64-s))
391 #endif
392 
393 #define Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
394 #define Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
395 #define sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
396 #define sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
397 
398 #define Ch(x,y,z)	(((x) & (y)) ^ ((~(x)) & (z)))
399 #define Maj(x,y,z)	(((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
400 
401 #if defined(OPENSSL_IA32_SSE2) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY)
402 #define	GO_FOR_SSE2(ctx,in,num)		do {		\
403 	void	sha512_block_sse2(void *,const void *,size_t);	\
404 	if (!(OPENSSL_ia32cap_P & (1<<26))) break;	\
405 	sha512_block_sse2(ctx->h,in,num); return;	\
406 					} while (0)
407 #endif
408 
409 #ifdef OPENSSL_SMALL_FOOTPRINT
410 
411 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
412 	{
413 	const SHA_LONG64 *W=in;
414 	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1,T2;
415 	SHA_LONG64	X[16];
416 	int i;
417 
418 #ifdef GO_FOR_SSE2
419 	GO_FOR_SSE2(ctx,in,num);
420 #endif
421 
422 			while (num--) {
423 
424 	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
425 	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
426 
427 	for (i=0;i<16;i++)
428 		{
429 #ifdef B_ENDIAN
430 		T1 = X[i] = W[i];
431 #else
432 		T1 = X[i] = PULL64(W[i]);
433 #endif
434 		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
435 		T2 = Sigma0(a) + Maj(a,b,c);
436 		h = g;	g = f;	f = e;	e = d + T1;
437 		d = c;	c = b;	b = a;	a = T1 + T2;
438 		}
439 
440 	for (;i<80;i++)
441 		{
442 		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
443 		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);
444 
445 		T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
446 		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
447 		T2 = Sigma0(a) + Maj(a,b,c);
448 		h = g;	g = f;	f = e;	e = d + T1;
449 		d = c;	c = b;	b = a;	a = T1 + T2;
450 		}
451 
452 	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
453 	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
454 
455 			W+=SHA_LBLOCK;
456 			}
457 	}
458 
459 #else
460 
461 #define	ROUND_00_15(i,a,b,c,d,e,f,g,h)		do {	\
462 	T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];	\
463 	h = Sigma0(a) + Maj(a,b,c);			\
464 	d += T1;	h += T1;		} while (0)
465 
466 #define	ROUND_16_80(i,a,b,c,d,e,f,g,h,X)	do {	\
467 	s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);	\
468 	s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);	\
469 	T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f];	\
470 	ROUND_00_15(i,a,b,c,d,e,f,g,h);		} while (0)
471 
472 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
473 	{
474 	const SHA_LONG64 *W=in;
475 	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1;
476 	SHA_LONG64	X[16];
477 	int i;
478 
479 #ifdef GO_FOR_SSE2
480 	GO_FOR_SSE2(ctx,in,num);
481 #endif
482 
483 			while (num--) {
484 
485 	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
486 	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
487 
488 #ifdef B_ENDIAN
489 	T1 = X[0] = W[0];	ROUND_00_15(0,a,b,c,d,e,f,g,h);
490 	T1 = X[1] = W[1];	ROUND_00_15(1,h,a,b,c,d,e,f,g);
491 	T1 = X[2] = W[2];	ROUND_00_15(2,g,h,a,b,c,d,e,f);
492 	T1 = X[3] = W[3];	ROUND_00_15(3,f,g,h,a,b,c,d,e);
493 	T1 = X[4] = W[4];	ROUND_00_15(4,e,f,g,h,a,b,c,d);
494 	T1 = X[5] = W[5];	ROUND_00_15(5,d,e,f,g,h,a,b,c);
495 	T1 = X[6] = W[6];	ROUND_00_15(6,c,d,e,f,g,h,a,b);
496 	T1 = X[7] = W[7];	ROUND_00_15(7,b,c,d,e,f,g,h,a);
497 	T1 = X[8] = W[8];	ROUND_00_15(8,a,b,c,d,e,f,g,h);
498 	T1 = X[9] = W[9];	ROUND_00_15(9,h,a,b,c,d,e,f,g);
499 	T1 = X[10] = W[10];	ROUND_00_15(10,g,h,a,b,c,d,e,f);
500 	T1 = X[11] = W[11];	ROUND_00_15(11,f,g,h,a,b,c,d,e);
501 	T1 = X[12] = W[12];	ROUND_00_15(12,e,f,g,h,a,b,c,d);
502 	T1 = X[13] = W[13];	ROUND_00_15(13,d,e,f,g,h,a,b,c);
503 	T1 = X[14] = W[14];	ROUND_00_15(14,c,d,e,f,g,h,a,b);
504 	T1 = X[15] = W[15];	ROUND_00_15(15,b,c,d,e,f,g,h,a);
505 #else
506 	T1 = X[0]  = PULL64(W[0]);	ROUND_00_15(0,a,b,c,d,e,f,g,h);
507 	T1 = X[1]  = PULL64(W[1]);	ROUND_00_15(1,h,a,b,c,d,e,f,g);
508 	T1 = X[2]  = PULL64(W[2]);	ROUND_00_15(2,g,h,a,b,c,d,e,f);
509 	T1 = X[3]  = PULL64(W[3]);	ROUND_00_15(3,f,g,h,a,b,c,d,e);
510 	T1 = X[4]  = PULL64(W[4]);	ROUND_00_15(4,e,f,g,h,a,b,c,d);
511 	T1 = X[5]  = PULL64(W[5]);	ROUND_00_15(5,d,e,f,g,h,a,b,c);
512 	T1 = X[6]  = PULL64(W[6]);	ROUND_00_15(6,c,d,e,f,g,h,a,b);
513 	T1 = X[7]  = PULL64(W[7]);	ROUND_00_15(7,b,c,d,e,f,g,h,a);
514 	T1 = X[8]  = PULL64(W[8]);	ROUND_00_15(8,a,b,c,d,e,f,g,h);
515 	T1 = X[9]  = PULL64(W[9]);	ROUND_00_15(9,h,a,b,c,d,e,f,g);
516 	T1 = X[10] = PULL64(W[10]);	ROUND_00_15(10,g,h,a,b,c,d,e,f);
517 	T1 = X[11] = PULL64(W[11]);	ROUND_00_15(11,f,g,h,a,b,c,d,e);
518 	T1 = X[12] = PULL64(W[12]);	ROUND_00_15(12,e,f,g,h,a,b,c,d);
519 	T1 = X[13] = PULL64(W[13]);	ROUND_00_15(13,d,e,f,g,h,a,b,c);
520 	T1 = X[14] = PULL64(W[14]);	ROUND_00_15(14,c,d,e,f,g,h,a,b);
521 	T1 = X[15] = PULL64(W[15]);	ROUND_00_15(15,b,c,d,e,f,g,h,a);
522 #endif
523 
524 	for (i=16;i<80;i+=8)
525 		{
526 		ROUND_16_80(i+0,a,b,c,d,e,f,g,h,X);
527 		ROUND_16_80(i+1,h,a,b,c,d,e,f,g,X);
528 		ROUND_16_80(i+2,g,h,a,b,c,d,e,f,X);
529 		ROUND_16_80(i+3,f,g,h,a,b,c,d,e,X);
530 		ROUND_16_80(i+4,e,f,g,h,a,b,c,d,X);
531 		ROUND_16_80(i+5,d,e,f,g,h,a,b,c,X);
532 		ROUND_16_80(i+6,c,d,e,f,g,h,a,b,X);
533 		ROUND_16_80(i+7,b,c,d,e,f,g,h,a,X);
534 		}
535 
536 	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
537 	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
538 
539 			W+=SHA_LBLOCK;
540 			}
541 	}
542 
543 #endif
544 
545 #endif /* SHA512_ASM */
546 
547 #endif /* OPENSSL_NO_SHA512 */
548