xref: /netbsd-src/external/gpl3/gdb/dist/libiberty/sha1.c (revision 5173eb0a33e5d83890ba976253e703be4c92557c)
198b9484cSchristos /* sha1.c - Functions to compute SHA1 message digest of files or
298b9484cSchristos    memory blocks according to the NIST specification FIPS-180-1.
398b9484cSchristos 
4*5173eb0aSchristos    Copyright (C) 2000-2024 Free Software Foundation, Inc.
598b9484cSchristos 
698b9484cSchristos    This program is free software; you can redistribute it and/or modify it
798b9484cSchristos    under the terms of the GNU General Public License as published by the
898b9484cSchristos    Free Software Foundation; either version 2, or (at your option) any
998b9484cSchristos    later version.
1098b9484cSchristos 
1198b9484cSchristos    This program is distributed in the hope that it will be useful,
1298b9484cSchristos    but WITHOUT ANY WARRANTY; without even the implied warranty of
1398b9484cSchristos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1498b9484cSchristos    GNU General Public License for more details.
1598b9484cSchristos 
1698b9484cSchristos    You should have received a copy of the GNU General Public License
1798b9484cSchristos    along with this program; if not, write to the Free Software Foundation,
1898b9484cSchristos    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
1998b9484cSchristos 
2098b9484cSchristos /* Written by Scott G. Miller
2198b9484cSchristos    Credits:
2298b9484cSchristos       Robert Klep <robert@ilse.nl>  -- Expansion function fix
2398b9484cSchristos */
2498b9484cSchristos 
2598b9484cSchristos #include <config.h>
2698b9484cSchristos 
2798b9484cSchristos #include "sha1.h"
2898b9484cSchristos 
2998b9484cSchristos #include <stddef.h>
3098b9484cSchristos #include <string.h>
3198b9484cSchristos 
32*5173eb0aSchristos #ifdef HAVE_X86_SHA1_HW_SUPPORT
33*5173eb0aSchristos # include <x86intrin.h>
34*5173eb0aSchristos # include <cpuid.h>
35*5173eb0aSchristos #endif
36*5173eb0aSchristos 
3798b9484cSchristos #if USE_UNLOCKED_IO
3898b9484cSchristos # include "unlocked-io.h"
3998b9484cSchristos #endif
4098b9484cSchristos 
4198b9484cSchristos #ifdef WORDS_BIGENDIAN
4298b9484cSchristos # define SWAP(n) (n)
4398b9484cSchristos #else
4498b9484cSchristos # define SWAP(n) \
4598b9484cSchristos     (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
4698b9484cSchristos #endif
4798b9484cSchristos 
4898b9484cSchristos #define BLOCKSIZE 4096
4998b9484cSchristos #if BLOCKSIZE % 64 != 0
5098b9484cSchristos # error "invalid BLOCKSIZE"
5198b9484cSchristos #endif
5298b9484cSchristos 
5398b9484cSchristos /* This array contains the bytes used to pad the buffer to the next
5498b9484cSchristos    64-byte boundary.  (RFC 1321, 3.1: Step 1)  */
5598b9484cSchristos static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ...  */ };
5698b9484cSchristos 
5798b9484cSchristos 
5898b9484cSchristos /* Take a pointer to a 160 bit block of data (five 32 bit ints) and
5998b9484cSchristos    initialize it to the start constants of the SHA1 algorithm.  This
6098b9484cSchristos    must be called before using hash in the call to sha1_hash.  */
6198b9484cSchristos void
6298b9484cSchristos sha1_init_ctx (struct sha1_ctx *ctx)
6398b9484cSchristos {
6498b9484cSchristos   ctx->A = 0x67452301;
6598b9484cSchristos   ctx->B = 0xefcdab89;
6698b9484cSchristos   ctx->C = 0x98badcfe;
6798b9484cSchristos   ctx->D = 0x10325476;
6898b9484cSchristos   ctx->E = 0xc3d2e1f0;
6998b9484cSchristos 
7098b9484cSchristos   ctx->total[0] = ctx->total[1] = 0;
7198b9484cSchristos   ctx->buflen = 0;
7298b9484cSchristos }
7398b9484cSchristos 
7498b9484cSchristos /* Put result from CTX in first 20 bytes following RESBUF.  The result
7598b9484cSchristos    must be in little endian byte order.
7698b9484cSchristos 
7798b9484cSchristos    IMPORTANT: On some systems it is required that RESBUF is correctly
7898b9484cSchristos    aligned for a 32-bit value.  */
7998b9484cSchristos void *
8098b9484cSchristos sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf)
8198b9484cSchristos {
8298b9484cSchristos   ((sha1_uint32 *) resbuf)[0] = SWAP (ctx->A);
8398b9484cSchristos   ((sha1_uint32 *) resbuf)[1] = SWAP (ctx->B);
8498b9484cSchristos   ((sha1_uint32 *) resbuf)[2] = SWAP (ctx->C);
8598b9484cSchristos   ((sha1_uint32 *) resbuf)[3] = SWAP (ctx->D);
8698b9484cSchristos   ((sha1_uint32 *) resbuf)[4] = SWAP (ctx->E);
8798b9484cSchristos 
8898b9484cSchristos   return resbuf;
8998b9484cSchristos }
9098b9484cSchristos 
9198b9484cSchristos /* Process the remaining bytes in the internal buffer and the usual
9298b9484cSchristos    prolog according to the standard and write the result to RESBUF.
9398b9484cSchristos 
9498b9484cSchristos    IMPORTANT: On some systems it is required that RESBUF is correctly
9598b9484cSchristos    aligned for a 32-bit value.  */
9698b9484cSchristos void *
9798b9484cSchristos sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf)
9898b9484cSchristos {
9998b9484cSchristos   /* Take yet unprocessed bytes into account.  */
10098b9484cSchristos   sha1_uint32 bytes = ctx->buflen;
10198b9484cSchristos   size_t size = (bytes < 56) ? 64 / 4 : 64 * 2 / 4;
10298b9484cSchristos 
10398b9484cSchristos   /* Now count remaining bytes.  */
10498b9484cSchristos   ctx->total[0] += bytes;
10598b9484cSchristos   if (ctx->total[0] < bytes)
10698b9484cSchristos     ++ctx->total[1];
10798b9484cSchristos 
10898b9484cSchristos   /* Put the 64-bit file length in *bits* at the end of the buffer.  */
10998b9484cSchristos   ctx->buffer[size - 2] = SWAP ((ctx->total[1] << 3) | (ctx->total[0] >> 29));
11098b9484cSchristos   ctx->buffer[size - 1] = SWAP (ctx->total[0] << 3);
11198b9484cSchristos 
11298b9484cSchristos   memcpy (&((char *) ctx->buffer)[bytes], fillbuf, (size - 2) * 4 - bytes);
11398b9484cSchristos 
11498b9484cSchristos   /* Process last bytes.  */
11598b9484cSchristos   sha1_process_block (ctx->buffer, size * 4, ctx);
11698b9484cSchristos 
11798b9484cSchristos   return sha1_read_ctx (ctx, resbuf);
11898b9484cSchristos }
11998b9484cSchristos 
12098b9484cSchristos /* Compute SHA1 message digest for bytes read from STREAM.  The
12198b9484cSchristos    resulting message digest number will be written into the 16 bytes
12298b9484cSchristos    beginning at RESBLOCK.  */
12398b9484cSchristos int
12498b9484cSchristos sha1_stream (FILE *stream, void *resblock)
12598b9484cSchristos {
12698b9484cSchristos   struct sha1_ctx ctx;
12798b9484cSchristos   char buffer[BLOCKSIZE + 72];
12898b9484cSchristos   size_t sum;
12998b9484cSchristos 
13098b9484cSchristos   /* Initialize the computation context.  */
13198b9484cSchristos   sha1_init_ctx (&ctx);
13298b9484cSchristos 
13398b9484cSchristos   /* Iterate over full file contents.  */
13498b9484cSchristos   while (1)
13598b9484cSchristos     {
13698b9484cSchristos       /* We read the file in blocks of BLOCKSIZE bytes.  One call of the
13798b9484cSchristos 	 computation function processes the whole buffer so that with the
13898b9484cSchristos 	 next round of the loop another block can be read.  */
13998b9484cSchristos       size_t n;
14098b9484cSchristos       sum = 0;
14198b9484cSchristos 
14298b9484cSchristos       /* Read block.  Take care for partial reads.  */
14398b9484cSchristos       while (1)
14498b9484cSchristos 	{
14598b9484cSchristos 	  n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream);
14698b9484cSchristos 
14798b9484cSchristos 	  sum += n;
14898b9484cSchristos 
14998b9484cSchristos 	  if (sum == BLOCKSIZE)
15098b9484cSchristos 	    break;
15198b9484cSchristos 
15298b9484cSchristos 	  if (n == 0)
15398b9484cSchristos 	    {
15498b9484cSchristos 	      /* Check for the error flag IFF N == 0, so that we don't
15598b9484cSchristos 		 exit the loop after a partial read due to e.g., EAGAIN
15698b9484cSchristos 		 or EWOULDBLOCK.  */
15798b9484cSchristos 	      if (ferror (stream))
15898b9484cSchristos 		return 1;
15998b9484cSchristos 	      goto process_partial_block;
16098b9484cSchristos 	    }
16198b9484cSchristos 
16298b9484cSchristos 	  /* We've read at least one byte, so ignore errors.  But always
16398b9484cSchristos 	     check for EOF, since feof may be true even though N > 0.
16498b9484cSchristos 	     Otherwise, we could end up calling fread after EOF.  */
16598b9484cSchristos 	  if (feof (stream))
16698b9484cSchristos 	    goto process_partial_block;
16798b9484cSchristos 	}
16898b9484cSchristos 
16998b9484cSchristos       /* Process buffer with BLOCKSIZE bytes.  Note that
17098b9484cSchristos 			BLOCKSIZE % 64 == 0
17198b9484cSchristos        */
17298b9484cSchristos       sha1_process_block (buffer, BLOCKSIZE, &ctx);
17398b9484cSchristos     }
17498b9484cSchristos 
17598b9484cSchristos  process_partial_block:;
17698b9484cSchristos 
17798b9484cSchristos   /* Process any remaining bytes.  */
17898b9484cSchristos   if (sum > 0)
17998b9484cSchristos     sha1_process_bytes (buffer, sum, &ctx);
18098b9484cSchristos 
18198b9484cSchristos   /* Construct result in desired memory.  */
18298b9484cSchristos   sha1_finish_ctx (&ctx, resblock);
18398b9484cSchristos   return 0;
18498b9484cSchristos }
18598b9484cSchristos 
18698b9484cSchristos /* Compute SHA1 message digest for LEN bytes beginning at BUFFER.  The
18798b9484cSchristos    result is always in little endian byte order, so that a byte-wise
18898b9484cSchristos    output yields to the wanted ASCII representation of the message
18998b9484cSchristos    digest.  */
19098b9484cSchristos void *
19198b9484cSchristos sha1_buffer (const char *buffer, size_t len, void *resblock)
19298b9484cSchristos {
19398b9484cSchristos   struct sha1_ctx ctx;
19498b9484cSchristos 
19598b9484cSchristos   /* Initialize the computation context.  */
19698b9484cSchristos   sha1_init_ctx (&ctx);
19798b9484cSchristos 
19898b9484cSchristos   /* Process whole buffer but last len % 64 bytes.  */
19998b9484cSchristos   sha1_process_bytes (buffer, len, &ctx);
20098b9484cSchristos 
20198b9484cSchristos   /* Put result in desired memory area.  */
20298b9484cSchristos   return sha1_finish_ctx (&ctx, resblock);
20398b9484cSchristos }
20498b9484cSchristos 
20598b9484cSchristos void
20698b9484cSchristos sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
20798b9484cSchristos {
20898b9484cSchristos   /* When we already have some bits in our internal buffer concatenate
20998b9484cSchristos      both inputs first.  */
21098b9484cSchristos   if (ctx->buflen != 0)
21198b9484cSchristos     {
21298b9484cSchristos       size_t left_over = ctx->buflen;
21398b9484cSchristos       size_t add = 128 - left_over > len ? len : 128 - left_over;
21498b9484cSchristos 
21598b9484cSchristos       memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
21698b9484cSchristos       ctx->buflen += add;
21798b9484cSchristos 
21898b9484cSchristos       if (ctx->buflen > 64)
21998b9484cSchristos 	{
22098b9484cSchristos 	  sha1_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
22198b9484cSchristos 
22298b9484cSchristos 	  ctx->buflen &= 63;
22398b9484cSchristos 	  /* The regions in the following copy operation cannot overlap.  */
22498b9484cSchristos 	  memcpy (ctx->buffer,
22598b9484cSchristos 		  &((char *) ctx->buffer)[(left_over + add) & ~63],
22698b9484cSchristos 		  ctx->buflen);
22798b9484cSchristos 	}
22898b9484cSchristos 
22998b9484cSchristos       buffer = (const char *) buffer + add;
23098b9484cSchristos       len -= add;
23198b9484cSchristos     }
23298b9484cSchristos 
23398b9484cSchristos   /* Process available complete blocks.  */
23498b9484cSchristos   if (len >= 64)
23598b9484cSchristos     {
23698b9484cSchristos #if !_STRING_ARCH_unaligned
23798b9484cSchristos # define alignof(type) offsetof (struct { char c; type x; }, x)
23898b9484cSchristos # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
23998b9484cSchristos       if (UNALIGNED_P (buffer))
24098b9484cSchristos 	while (len > 64)
24198b9484cSchristos 	  {
24298b9484cSchristos 	    sha1_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
24398b9484cSchristos 	    buffer = (const char *) buffer + 64;
24498b9484cSchristos 	    len -= 64;
24598b9484cSchristos 	  }
24698b9484cSchristos       else
24798b9484cSchristos #endif
24898b9484cSchristos 	{
24998b9484cSchristos 	  sha1_process_block (buffer, len & ~63, ctx);
25098b9484cSchristos 	  buffer = (const char *) buffer + (len & ~63);
25198b9484cSchristos 	  len &= 63;
25298b9484cSchristos 	}
25398b9484cSchristos     }
25498b9484cSchristos 
25598b9484cSchristos   /* Move remaining bytes in internal buffer.  */
25698b9484cSchristos   if (len > 0)
25798b9484cSchristos     {
25898b9484cSchristos       size_t left_over = ctx->buflen;
25998b9484cSchristos 
26098b9484cSchristos       memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
26198b9484cSchristos       left_over += len;
26298b9484cSchristos       if (left_over >= 64)
26398b9484cSchristos 	{
26498b9484cSchristos 	  sha1_process_block (ctx->buffer, 64, ctx);
26598b9484cSchristos 	  left_over -= 64;
2664b169a6bSchristos 	  memmove (ctx->buffer, &ctx->buffer[16], left_over);
26798b9484cSchristos 	}
26898b9484cSchristos       ctx->buflen = left_over;
26998b9484cSchristos     }
27098b9484cSchristos }
27198b9484cSchristos 
27298b9484cSchristos /* --- Code below is the primary difference between md5.c and sha1.c --- */
27398b9484cSchristos 
27498b9484cSchristos /* SHA1 round constants */
27598b9484cSchristos #define K1 0x5a827999
27698b9484cSchristos #define K2 0x6ed9eba1
27798b9484cSchristos #define K3 0x8f1bbcdc
27898b9484cSchristos #define K4 0xca62c1d6
27998b9484cSchristos 
28098b9484cSchristos /* Round functions.  Note that F2 is the same as F4.  */
28198b9484cSchristos #define F1(B,C,D) ( D ^ ( B & ( C ^ D ) ) )
28298b9484cSchristos #define F2(B,C,D) (B ^ C ^ D)
28398b9484cSchristos #define F3(B,C,D) ( ( B & C ) | ( D & ( B | C ) ) )
28498b9484cSchristos #define F4(B,C,D) (B ^ C ^ D)
28598b9484cSchristos 
28698b9484cSchristos /* Process LEN bytes of BUFFER, accumulating context into CTX.
28798b9484cSchristos    It is assumed that LEN % 64 == 0.
28898b9484cSchristos    Most of this code comes from GnuPG's cipher/sha1.c.  */
28998b9484cSchristos 
29098b9484cSchristos void
29198b9484cSchristos sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx)
29298b9484cSchristos {
29398b9484cSchristos   const sha1_uint32 *words = (const sha1_uint32*) buffer;
29498b9484cSchristos   size_t nwords = len / sizeof (sha1_uint32);
29598b9484cSchristos   const sha1_uint32 *endp = words + nwords;
29698b9484cSchristos   sha1_uint32 x[16];
29798b9484cSchristos   sha1_uint32 a = ctx->A;
29898b9484cSchristos   sha1_uint32 b = ctx->B;
29998b9484cSchristos   sha1_uint32 c = ctx->C;
30098b9484cSchristos   sha1_uint32 d = ctx->D;
30198b9484cSchristos   sha1_uint32 e = ctx->E;
30298b9484cSchristos 
30398b9484cSchristos   /* First increment the byte count.  RFC 1321 specifies the possible
30498b9484cSchristos      length of the file up to 2^64 bits.  Here we only compute the
30598b9484cSchristos      number of bytes.  Do a double word increment.  */
30698b9484cSchristos   ctx->total[0] += len;
307a2e2270fSchristos   ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len);
30898b9484cSchristos 
30998b9484cSchristos #define rol(x, n) (((x) << (n)) | ((sha1_uint32) (x) >> (32 - (n))))
31098b9484cSchristos 
31198b9484cSchristos #define M(I) ( tm =   x[I&0x0f] ^ x[(I-14)&0x0f] \
31298b9484cSchristos 		    ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \
31398b9484cSchristos 	       , (x[I&0x0f] = rol(tm, 1)) )
31498b9484cSchristos 
31598b9484cSchristos #define R(A,B,C,D,E,F,K,M)  do { E += rol( A, 5 )     \
31698b9484cSchristos 				      + F( B, C, D )  \
31798b9484cSchristos 				      + K	      \
31898b9484cSchristos 				      + M;	      \
31998b9484cSchristos 				 B = rol( B, 30 );    \
32098b9484cSchristos 			       } while(0)
32198b9484cSchristos 
32298b9484cSchristos   while (words < endp)
32398b9484cSchristos     {
32498b9484cSchristos       sha1_uint32 tm;
32598b9484cSchristos       int t;
32698b9484cSchristos       for (t = 0; t < 16; t++)
32798b9484cSchristos 	{
32898b9484cSchristos 	  x[t] = SWAP (*words);
32998b9484cSchristos 	  words++;
33098b9484cSchristos 	}
33198b9484cSchristos 
33298b9484cSchristos       R( a, b, c, d, e, F1, K1, x[ 0] );
33398b9484cSchristos       R( e, a, b, c, d, F1, K1, x[ 1] );
33498b9484cSchristos       R( d, e, a, b, c, F1, K1, x[ 2] );
33598b9484cSchristos       R( c, d, e, a, b, F1, K1, x[ 3] );
33698b9484cSchristos       R( b, c, d, e, a, F1, K1, x[ 4] );
33798b9484cSchristos       R( a, b, c, d, e, F1, K1, x[ 5] );
33898b9484cSchristos       R( e, a, b, c, d, F1, K1, x[ 6] );
33998b9484cSchristos       R( d, e, a, b, c, F1, K1, x[ 7] );
34098b9484cSchristos       R( c, d, e, a, b, F1, K1, x[ 8] );
34198b9484cSchristos       R( b, c, d, e, a, F1, K1, x[ 9] );
34298b9484cSchristos       R( a, b, c, d, e, F1, K1, x[10] );
34398b9484cSchristos       R( e, a, b, c, d, F1, K1, x[11] );
34498b9484cSchristos       R( d, e, a, b, c, F1, K1, x[12] );
34598b9484cSchristos       R( c, d, e, a, b, F1, K1, x[13] );
34698b9484cSchristos       R( b, c, d, e, a, F1, K1, x[14] );
34798b9484cSchristos       R( a, b, c, d, e, F1, K1, x[15] );
34898b9484cSchristos       R( e, a, b, c, d, F1, K1, M(16) );
34998b9484cSchristos       R( d, e, a, b, c, F1, K1, M(17) );
35098b9484cSchristos       R( c, d, e, a, b, F1, K1, M(18) );
35198b9484cSchristos       R( b, c, d, e, a, F1, K1, M(19) );
35298b9484cSchristos       R( a, b, c, d, e, F2, K2, M(20) );
35398b9484cSchristos       R( e, a, b, c, d, F2, K2, M(21) );
35498b9484cSchristos       R( d, e, a, b, c, F2, K2, M(22) );
35598b9484cSchristos       R( c, d, e, a, b, F2, K2, M(23) );
35698b9484cSchristos       R( b, c, d, e, a, F2, K2, M(24) );
35798b9484cSchristos       R( a, b, c, d, e, F2, K2, M(25) );
35898b9484cSchristos       R( e, a, b, c, d, F2, K2, M(26) );
35998b9484cSchristos       R( d, e, a, b, c, F2, K2, M(27) );
36098b9484cSchristos       R( c, d, e, a, b, F2, K2, M(28) );
36198b9484cSchristos       R( b, c, d, e, a, F2, K2, M(29) );
36298b9484cSchristos       R( a, b, c, d, e, F2, K2, M(30) );
36398b9484cSchristos       R( e, a, b, c, d, F2, K2, M(31) );
36498b9484cSchristos       R( d, e, a, b, c, F2, K2, M(32) );
36598b9484cSchristos       R( c, d, e, a, b, F2, K2, M(33) );
36698b9484cSchristos       R( b, c, d, e, a, F2, K2, M(34) );
36798b9484cSchristos       R( a, b, c, d, e, F2, K2, M(35) );
36898b9484cSchristos       R( e, a, b, c, d, F2, K2, M(36) );
36998b9484cSchristos       R( d, e, a, b, c, F2, K2, M(37) );
37098b9484cSchristos       R( c, d, e, a, b, F2, K2, M(38) );
37198b9484cSchristos       R( b, c, d, e, a, F2, K2, M(39) );
37298b9484cSchristos       R( a, b, c, d, e, F3, K3, M(40) );
37398b9484cSchristos       R( e, a, b, c, d, F3, K3, M(41) );
37498b9484cSchristos       R( d, e, a, b, c, F3, K3, M(42) );
37598b9484cSchristos       R( c, d, e, a, b, F3, K3, M(43) );
37698b9484cSchristos       R( b, c, d, e, a, F3, K3, M(44) );
37798b9484cSchristos       R( a, b, c, d, e, F3, K3, M(45) );
37898b9484cSchristos       R( e, a, b, c, d, F3, K3, M(46) );
37998b9484cSchristos       R( d, e, a, b, c, F3, K3, M(47) );
38098b9484cSchristos       R( c, d, e, a, b, F3, K3, M(48) );
38198b9484cSchristos       R( b, c, d, e, a, F3, K3, M(49) );
38298b9484cSchristos       R( a, b, c, d, e, F3, K3, M(50) );
38398b9484cSchristos       R( e, a, b, c, d, F3, K3, M(51) );
38498b9484cSchristos       R( d, e, a, b, c, F3, K3, M(52) );
38598b9484cSchristos       R( c, d, e, a, b, F3, K3, M(53) );
38698b9484cSchristos       R( b, c, d, e, a, F3, K3, M(54) );
38798b9484cSchristos       R( a, b, c, d, e, F3, K3, M(55) );
38898b9484cSchristos       R( e, a, b, c, d, F3, K3, M(56) );
38998b9484cSchristos       R( d, e, a, b, c, F3, K3, M(57) );
39098b9484cSchristos       R( c, d, e, a, b, F3, K3, M(58) );
39198b9484cSchristos       R( b, c, d, e, a, F3, K3, M(59) );
39298b9484cSchristos       R( a, b, c, d, e, F4, K4, M(60) );
39398b9484cSchristos       R( e, a, b, c, d, F4, K4, M(61) );
39498b9484cSchristos       R( d, e, a, b, c, F4, K4, M(62) );
39598b9484cSchristos       R( c, d, e, a, b, F4, K4, M(63) );
39698b9484cSchristos       R( b, c, d, e, a, F4, K4, M(64) );
39798b9484cSchristos       R( a, b, c, d, e, F4, K4, M(65) );
39898b9484cSchristos       R( e, a, b, c, d, F4, K4, M(66) );
39998b9484cSchristos       R( d, e, a, b, c, F4, K4, M(67) );
40098b9484cSchristos       R( c, d, e, a, b, F4, K4, M(68) );
40198b9484cSchristos       R( b, c, d, e, a, F4, K4, M(69) );
40298b9484cSchristos       R( a, b, c, d, e, F4, K4, M(70) );
40398b9484cSchristos       R( e, a, b, c, d, F4, K4, M(71) );
40498b9484cSchristos       R( d, e, a, b, c, F4, K4, M(72) );
40598b9484cSchristos       R( c, d, e, a, b, F4, K4, M(73) );
40698b9484cSchristos       R( b, c, d, e, a, F4, K4, M(74) );
40798b9484cSchristos       R( a, b, c, d, e, F4, K4, M(75) );
40898b9484cSchristos       R( e, a, b, c, d, F4, K4, M(76) );
40998b9484cSchristos       R( d, e, a, b, c, F4, K4, M(77) );
41098b9484cSchristos       R( c, d, e, a, b, F4, K4, M(78) );
41198b9484cSchristos       R( b, c, d, e, a, F4, K4, M(79) );
41298b9484cSchristos 
41398b9484cSchristos       a = ctx->A += a;
41498b9484cSchristos       b = ctx->B += b;
41598b9484cSchristos       c = ctx->C += c;
41698b9484cSchristos       d = ctx->D += d;
41798b9484cSchristos       e = ctx->E += e;
41898b9484cSchristos     }
41998b9484cSchristos }
420*5173eb0aSchristos 
421*5173eb0aSchristos #if defined(HAVE_X86_SHA1_HW_SUPPORT)
422*5173eb0aSchristos /* HW specific version of sha1_process_bytes.  */
423*5173eb0aSchristos 
424*5173eb0aSchristos static void sha1_hw_process_block (const void *, size_t, struct sha1_ctx *);
425*5173eb0aSchristos 
426*5173eb0aSchristos static void
427*5173eb0aSchristos sha1_hw_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
428*5173eb0aSchristos {
429*5173eb0aSchristos   /* When we already have some bits in our internal buffer concatenate
430*5173eb0aSchristos      both inputs first.  */
431*5173eb0aSchristos   if (ctx->buflen != 0)
432*5173eb0aSchristos     {
433*5173eb0aSchristos       size_t left_over = ctx->buflen;
434*5173eb0aSchristos       size_t add = 128 - left_over > len ? len : 128 - left_over;
435*5173eb0aSchristos 
436*5173eb0aSchristos       memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
437*5173eb0aSchristos       ctx->buflen += add;
438*5173eb0aSchristos 
439*5173eb0aSchristos       if (ctx->buflen > 64)
440*5173eb0aSchristos 	{
441*5173eb0aSchristos 	  sha1_hw_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
442*5173eb0aSchristos 
443*5173eb0aSchristos 	  ctx->buflen &= 63;
444*5173eb0aSchristos 	  /* The regions in the following copy operation cannot overlap.  */
445*5173eb0aSchristos 	  memcpy (ctx->buffer,
446*5173eb0aSchristos 		  &((char *) ctx->buffer)[(left_over + add) & ~63],
447*5173eb0aSchristos 		  ctx->buflen);
448*5173eb0aSchristos 	}
449*5173eb0aSchristos 
450*5173eb0aSchristos       buffer = (const char *) buffer + add;
451*5173eb0aSchristos       len -= add;
452*5173eb0aSchristos     }
453*5173eb0aSchristos 
454*5173eb0aSchristos   /* Process available complete blocks.  */
455*5173eb0aSchristos   if (len >= 64)
456*5173eb0aSchristos     {
457*5173eb0aSchristos #if !_STRING_ARCH_unaligned
458*5173eb0aSchristos # define alignof(type) offsetof (struct { char c; type x; }, x)
459*5173eb0aSchristos # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
460*5173eb0aSchristos       if (UNALIGNED_P (buffer))
461*5173eb0aSchristos 	while (len > 64)
462*5173eb0aSchristos 	  {
463*5173eb0aSchristos 	    sha1_hw_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
464*5173eb0aSchristos 	    buffer = (const char *) buffer + 64;
465*5173eb0aSchristos 	    len -= 64;
466*5173eb0aSchristos 	  }
467*5173eb0aSchristos       else
468*5173eb0aSchristos #endif
469*5173eb0aSchristos 	{
470*5173eb0aSchristos 	  sha1_hw_process_block (buffer, len & ~63, ctx);
471*5173eb0aSchristos 	  buffer = (const char *) buffer + (len & ~63);
472*5173eb0aSchristos 	  len &= 63;
473*5173eb0aSchristos 	}
474*5173eb0aSchristos     }
475*5173eb0aSchristos 
476*5173eb0aSchristos   /* Move remaining bytes in internal buffer.  */
477*5173eb0aSchristos   if (len > 0)
478*5173eb0aSchristos     {
479*5173eb0aSchristos       size_t left_over = ctx->buflen;
480*5173eb0aSchristos 
481*5173eb0aSchristos       memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
482*5173eb0aSchristos       left_over += len;
483*5173eb0aSchristos       if (left_over >= 64)
484*5173eb0aSchristos 	{
485*5173eb0aSchristos 	  sha1_hw_process_block (ctx->buffer, 64, ctx);
486*5173eb0aSchristos 	  left_over -= 64;
487*5173eb0aSchristos 	  memmove (ctx->buffer, &ctx->buffer[16], left_over);
488*5173eb0aSchristos 	}
489*5173eb0aSchristos       ctx->buflen = left_over;
490*5173eb0aSchristos     }
491*5173eb0aSchristos }
492*5173eb0aSchristos 
493*5173eb0aSchristos /* Process LEN bytes of BUFFER, accumulating context into CTX.
494*5173eb0aSchristos    Using CPU specific intrinsics.  */
495*5173eb0aSchristos 
496*5173eb0aSchristos #ifdef HAVE_X86_SHA1_HW_SUPPORT
497*5173eb0aSchristos __attribute__((__target__ ("sse4.1,sha")))
498*5173eb0aSchristos #endif
499*5173eb0aSchristos static void
500*5173eb0aSchristos sha1_hw_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx)
501*5173eb0aSchristos {
502*5173eb0aSchristos #ifdef HAVE_X86_SHA1_HW_SUPPORT
503*5173eb0aSchristos   /* Implemented from
504*5173eb0aSchristos      https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html  */
505*5173eb0aSchristos   const __m128i *words = (const __m128i *) buffer;
506*5173eb0aSchristos   const __m128i *endp = (const __m128i *) ((const char *) buffer + len);
507*5173eb0aSchristos   __m128i abcd, abcd_save, e0, e0_save, e1, msg0, msg1, msg2, msg3;
508*5173eb0aSchristos   const __m128i shuf_mask
509*5173eb0aSchristos     = _mm_set_epi64x (0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL);
510*5173eb0aSchristos   char check[((offsetof (struct sha1_ctx, B)
511*5173eb0aSchristos 	     == offsetof (struct sha1_ctx, A) + sizeof (ctx->A))
512*5173eb0aSchristos 		   && (offsetof (struct sha1_ctx, C)
513*5173eb0aSchristos 		       == offsetof (struct sha1_ctx, A) + 2 * sizeof (ctx->A))
514*5173eb0aSchristos 		   && (offsetof (struct sha1_ctx, D)
515*5173eb0aSchristos 		       == offsetof (struct sha1_ctx, A) + 3 * sizeof (ctx->A)))
516*5173eb0aSchristos 		  ? 1 : -1];
517*5173eb0aSchristos 
518*5173eb0aSchristos   /* First increment the byte count.  RFC 1321 specifies the possible
519*5173eb0aSchristos      length of the file up to 2^64 bits.  Here we only compute the
520*5173eb0aSchristos      number of bytes.  Do a double word increment.  */
521*5173eb0aSchristos   ctx->total[0] += len;
522*5173eb0aSchristos   ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len);
523*5173eb0aSchristos 
524*5173eb0aSchristos   (void) &check[0];
525*5173eb0aSchristos   abcd = _mm_loadu_si128 ((const __m128i *) &ctx->A);
526*5173eb0aSchristos   e0 = _mm_set_epi32 (ctx->E, 0, 0, 0);
527*5173eb0aSchristos   abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */
528*5173eb0aSchristos 
529*5173eb0aSchristos   while (words < endp)
530*5173eb0aSchristos     {
531*5173eb0aSchristos       abcd_save = abcd;
532*5173eb0aSchristos       e0_save = e0;
533*5173eb0aSchristos 
534*5173eb0aSchristos       /* 0..3 */
535*5173eb0aSchristos       msg0 = _mm_loadu_si128 (words);
536*5173eb0aSchristos       msg0 = _mm_shuffle_epi8 (msg0, shuf_mask);
537*5173eb0aSchristos       e0 = _mm_add_epi32 (e0, msg0);
538*5173eb0aSchristos       e1 = abcd;
539*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
540*5173eb0aSchristos 
541*5173eb0aSchristos       /* 4..7 */
542*5173eb0aSchristos       msg1 = _mm_loadu_si128 (words + 1);
543*5173eb0aSchristos       msg1 = _mm_shuffle_epi8 (msg1, shuf_mask);
544*5173eb0aSchristos       e1 = _mm_sha1nexte_epu32 (e1, msg1);
545*5173eb0aSchristos       e0 = abcd;
546*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0);
547*5173eb0aSchristos       msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
548*5173eb0aSchristos 
549*5173eb0aSchristos       /* 8..11 */
550*5173eb0aSchristos       msg2 = _mm_loadu_si128 (words + 2);
551*5173eb0aSchristos       msg2 = _mm_shuffle_epi8 (msg2, shuf_mask);
552*5173eb0aSchristos       e0 = _mm_sha1nexte_epu32 (e0, msg2);
553*5173eb0aSchristos       e1 = abcd;
554*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
555*5173eb0aSchristos       msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
556*5173eb0aSchristos       msg0 = _mm_xor_si128 (msg0, msg2);
557*5173eb0aSchristos 
558*5173eb0aSchristos       /* 12..15 */
559*5173eb0aSchristos       msg3 = _mm_loadu_si128 (words + 3);
560*5173eb0aSchristos       msg3 = _mm_shuffle_epi8 (msg3, shuf_mask);
561*5173eb0aSchristos       e1 = _mm_sha1nexte_epu32 (e1, msg3);
562*5173eb0aSchristos       e0 = abcd;
563*5173eb0aSchristos       msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
564*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0);
565*5173eb0aSchristos       msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
566*5173eb0aSchristos       msg1 = _mm_xor_si128 (msg1, msg3);
567*5173eb0aSchristos 
568*5173eb0aSchristos       /* 16..19 */
569*5173eb0aSchristos       e0 = _mm_sha1nexte_epu32 (e0, msg0);
570*5173eb0aSchristos       e1 = abcd;
571*5173eb0aSchristos       msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
572*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
573*5173eb0aSchristos       msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
574*5173eb0aSchristos       msg2 = _mm_xor_si128 (msg2, msg0);
575*5173eb0aSchristos 
576*5173eb0aSchristos       /* 20..23 */
577*5173eb0aSchristos       e1 = _mm_sha1nexte_epu32 (e1, msg1);
578*5173eb0aSchristos       e0 = abcd;
579*5173eb0aSchristos       msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
580*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
581*5173eb0aSchristos       msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
582*5173eb0aSchristos       msg3 = _mm_xor_si128 (msg3, msg1);
583*5173eb0aSchristos 
584*5173eb0aSchristos       /* 24..27 */
585*5173eb0aSchristos       e0 = _mm_sha1nexte_epu32 (e0, msg2);
586*5173eb0aSchristos       e1 = abcd;
587*5173eb0aSchristos       msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
588*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1);
589*5173eb0aSchristos       msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
590*5173eb0aSchristos       msg0 = _mm_xor_si128 (msg0, msg2);
591*5173eb0aSchristos 
592*5173eb0aSchristos       /* 28..31 */
593*5173eb0aSchristos       e1 = _mm_sha1nexte_epu32 (e1, msg3);
594*5173eb0aSchristos       e0 = abcd;
595*5173eb0aSchristos       msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
596*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
597*5173eb0aSchristos       msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
598*5173eb0aSchristos       msg1 = _mm_xor_si128 (msg1, msg3);
599*5173eb0aSchristos 
600*5173eb0aSchristos       /* 32..35 */
601*5173eb0aSchristos       e0 = _mm_sha1nexte_epu32 (e0, msg0);
602*5173eb0aSchristos       e1 = abcd;
603*5173eb0aSchristos       msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
604*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1);
605*5173eb0aSchristos       msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
606*5173eb0aSchristos       msg2 = _mm_xor_si128 (msg2, msg0);
607*5173eb0aSchristos 
608*5173eb0aSchristos       /* 36..39 */
609*5173eb0aSchristos       e1 = _mm_sha1nexte_epu32 (e1, msg1);
610*5173eb0aSchristos       e0 = abcd;
611*5173eb0aSchristos       msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
612*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
613*5173eb0aSchristos       msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
614*5173eb0aSchristos       msg3 = _mm_xor_si128 (msg3, msg1);
615*5173eb0aSchristos 
616*5173eb0aSchristos       /* 40..43 */
617*5173eb0aSchristos       e0 = _mm_sha1nexte_epu32 (e0, msg2);
618*5173eb0aSchristos       e1 = abcd;
619*5173eb0aSchristos       msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
620*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
621*5173eb0aSchristos       msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
622*5173eb0aSchristos       msg0 = _mm_xor_si128 (msg0, msg2);
623*5173eb0aSchristos 
624*5173eb0aSchristos       /* 44..47 */
625*5173eb0aSchristos       e1 = _mm_sha1nexte_epu32 (e1, msg3);
626*5173eb0aSchristos       e0 = abcd;
627*5173eb0aSchristos       msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
628*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2);
629*5173eb0aSchristos       msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
630*5173eb0aSchristos       msg1 = _mm_xor_si128 (msg1, msg3);
631*5173eb0aSchristos 
632*5173eb0aSchristos       /* 48..51 */
633*5173eb0aSchristos       e0 = _mm_sha1nexte_epu32 (e0, msg0);
634*5173eb0aSchristos       e1 = abcd;
635*5173eb0aSchristos       msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
636*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
637*5173eb0aSchristos       msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
638*5173eb0aSchristos       msg2 = _mm_xor_si128 (msg2, msg0);
639*5173eb0aSchristos 
640*5173eb0aSchristos       /* 52..55 */
641*5173eb0aSchristos       e1 = _mm_sha1nexte_epu32 (e1, msg1);
642*5173eb0aSchristos       e0 = abcd;
643*5173eb0aSchristos       msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
644*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2);
645*5173eb0aSchristos       msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
646*5173eb0aSchristos       msg3 = _mm_xor_si128 (msg3, msg1);
647*5173eb0aSchristos 
648*5173eb0aSchristos       /* 56..59 */
649*5173eb0aSchristos       e0 = _mm_sha1nexte_epu32 (e0, msg2);
650*5173eb0aSchristos       e1 = abcd;
651*5173eb0aSchristos       msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
652*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
653*5173eb0aSchristos       msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
654*5173eb0aSchristos       msg0 = _mm_xor_si128 (msg0, msg2);
655*5173eb0aSchristos 
656*5173eb0aSchristos       /* 60..63 */
657*5173eb0aSchristos       e1 = _mm_sha1nexte_epu32 (e1, msg3);
658*5173eb0aSchristos       e0 = abcd;
659*5173eb0aSchristos       msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
660*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
661*5173eb0aSchristos       msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
662*5173eb0aSchristos       msg1 = _mm_xor_si128 (msg1, msg3);
663*5173eb0aSchristos 
664*5173eb0aSchristos       /* 64..67 */
665*5173eb0aSchristos       e0 = _mm_sha1nexte_epu32 (e0, msg0);
666*5173eb0aSchristos       e1 = abcd;
667*5173eb0aSchristos       msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
668*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3);
669*5173eb0aSchristos       msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
670*5173eb0aSchristos       msg2 = _mm_xor_si128 (msg2, msg0);
671*5173eb0aSchristos 
672*5173eb0aSchristos       /* 68..71 */
673*5173eb0aSchristos       e1 = _mm_sha1nexte_epu32 (e1, msg1);
674*5173eb0aSchristos       e0 = abcd;
675*5173eb0aSchristos       msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
676*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
677*5173eb0aSchristos       msg3 = _mm_xor_si128 (msg3, msg1);
678*5173eb0aSchristos 
679*5173eb0aSchristos       /* 72..75 */
680*5173eb0aSchristos       e0 = _mm_sha1nexte_epu32 (e0, msg2);
681*5173eb0aSchristos       e1 = abcd;
682*5173eb0aSchristos       msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
683*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3);
684*5173eb0aSchristos 
685*5173eb0aSchristos       /* 76..79 */
686*5173eb0aSchristos       e1 = _mm_sha1nexte_epu32 (e1, msg3);
687*5173eb0aSchristos       e0 = abcd;
688*5173eb0aSchristos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
689*5173eb0aSchristos 
690*5173eb0aSchristos       /* Finalize. */
691*5173eb0aSchristos       e0 = _mm_sha1nexte_epu32 (e0, e0_save);
692*5173eb0aSchristos       abcd = _mm_add_epi32 (abcd, abcd_save);
693*5173eb0aSchristos 
694*5173eb0aSchristos       words = words + 4;
695*5173eb0aSchristos     }
696*5173eb0aSchristos 
697*5173eb0aSchristos   abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */
698*5173eb0aSchristos   _mm_storeu_si128 ((__m128i *) &ctx->A, abcd);
699*5173eb0aSchristos   ctx->E = _mm_extract_epi32 (e0, 3);
700*5173eb0aSchristos #endif
701*5173eb0aSchristos }
702*5173eb0aSchristos #endif
703*5173eb0aSchristos 
704*5173eb0aSchristos /* Return sha1_process_bytes or some hardware optimized version thereof
705*5173eb0aSchristos    depending on current CPU.  */
706*5173eb0aSchristos 
707*5173eb0aSchristos sha1_process_bytes_fn
708*5173eb0aSchristos sha1_choose_process_bytes (void)
709*5173eb0aSchristos {
710*5173eb0aSchristos #ifdef HAVE_X86_SHA1_HW_SUPPORT
711*5173eb0aSchristos   unsigned int eax, ebx, ecx, edx;
712*5173eb0aSchristos   if (__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx)
713*5173eb0aSchristos       && (ebx & bit_SHA) != 0
714*5173eb0aSchristos       && __get_cpuid (1, &eax, &ebx, &ecx, &edx)
715*5173eb0aSchristos       && (ecx & bit_SSE4_1) != 0)
716*5173eb0aSchristos     return sha1_hw_process_bytes;
717*5173eb0aSchristos #endif
718*5173eb0aSchristos   return sha1_process_bytes;
719*5173eb0aSchristos }
720