198b9484cSchristos /* sha1.c - Functions to compute SHA1 message digest of files or 298b9484cSchristos memory blocks according to the NIST specification FIPS-180-1. 398b9484cSchristos 4*5173eb0aSchristos Copyright (C) 2000-2024 Free Software Foundation, Inc. 598b9484cSchristos 698b9484cSchristos This program is free software; you can redistribute it and/or modify it 798b9484cSchristos under the terms of the GNU General Public License as published by the 898b9484cSchristos Free Software Foundation; either version 2, or (at your option) any 998b9484cSchristos later version. 1098b9484cSchristos 1198b9484cSchristos This program is distributed in the hope that it will be useful, 1298b9484cSchristos but WITHOUT ANY WARRANTY; without even the implied warranty of 1398b9484cSchristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1498b9484cSchristos GNU General Public License for more details. 1598b9484cSchristos 1698b9484cSchristos You should have received a copy of the GNU General Public License 1798b9484cSchristos along with this program; if not, write to the Free Software Foundation, 1898b9484cSchristos Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 1998b9484cSchristos 2098b9484cSchristos /* Written by Scott G. Miller 2198b9484cSchristos Credits: 2298b9484cSchristos Robert Klep <robert@ilse.nl> -- Expansion function fix 2398b9484cSchristos */ 2498b9484cSchristos 2598b9484cSchristos #include <config.h> 2698b9484cSchristos 2798b9484cSchristos #include "sha1.h" 2898b9484cSchristos 2998b9484cSchristos #include <stddef.h> 3098b9484cSchristos #include <string.h> 3198b9484cSchristos 32*5173eb0aSchristos #ifdef HAVE_X86_SHA1_HW_SUPPORT 33*5173eb0aSchristos # include <x86intrin.h> 34*5173eb0aSchristos # include <cpuid.h> 35*5173eb0aSchristos #endif 36*5173eb0aSchristos 3798b9484cSchristos #if USE_UNLOCKED_IO 3898b9484cSchristos # include "unlocked-io.h" 3998b9484cSchristos #endif 4098b9484cSchristos 4198b9484cSchristos #ifdef WORDS_BIGENDIAN 4298b9484cSchristos # define SWAP(n) (n) 4398b9484cSchristos #else 4498b9484cSchristos # define SWAP(n) \ 4598b9484cSchristos (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24)) 4698b9484cSchristos #endif 4798b9484cSchristos 4898b9484cSchristos #define BLOCKSIZE 4096 4998b9484cSchristos #if BLOCKSIZE % 64 != 0 5098b9484cSchristos # error "invalid BLOCKSIZE" 5198b9484cSchristos #endif 5298b9484cSchristos 5398b9484cSchristos /* This array contains the bytes used to pad the buffer to the next 5498b9484cSchristos 64-byte boundary. (RFC 1321, 3.1: Step 1) */ 5598b9484cSchristos static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ }; 5698b9484cSchristos 5798b9484cSchristos 5898b9484cSchristos /* Take a pointer to a 160 bit block of data (five 32 bit ints) and 5998b9484cSchristos initialize it to the start constants of the SHA1 algorithm. This 6098b9484cSchristos must be called before using hash in the call to sha1_hash. */ 6198b9484cSchristos void 6298b9484cSchristos sha1_init_ctx (struct sha1_ctx *ctx) 6398b9484cSchristos { 6498b9484cSchristos ctx->A = 0x67452301; 6598b9484cSchristos ctx->B = 0xefcdab89; 6698b9484cSchristos ctx->C = 0x98badcfe; 6798b9484cSchristos ctx->D = 0x10325476; 6898b9484cSchristos ctx->E = 0xc3d2e1f0; 6998b9484cSchristos 7098b9484cSchristos ctx->total[0] = ctx->total[1] = 0; 7198b9484cSchristos ctx->buflen = 0; 7298b9484cSchristos } 7398b9484cSchristos 7498b9484cSchristos /* Put result from CTX in first 20 bytes following RESBUF. The result 7598b9484cSchristos must be in little endian byte order. 7698b9484cSchristos 7798b9484cSchristos IMPORTANT: On some systems it is required that RESBUF is correctly 7898b9484cSchristos aligned for a 32-bit value. */ 7998b9484cSchristos void * 8098b9484cSchristos sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf) 8198b9484cSchristos { 8298b9484cSchristos ((sha1_uint32 *) resbuf)[0] = SWAP (ctx->A); 8398b9484cSchristos ((sha1_uint32 *) resbuf)[1] = SWAP (ctx->B); 8498b9484cSchristos ((sha1_uint32 *) resbuf)[2] = SWAP (ctx->C); 8598b9484cSchristos ((sha1_uint32 *) resbuf)[3] = SWAP (ctx->D); 8698b9484cSchristos ((sha1_uint32 *) resbuf)[4] = SWAP (ctx->E); 8798b9484cSchristos 8898b9484cSchristos return resbuf; 8998b9484cSchristos } 9098b9484cSchristos 9198b9484cSchristos /* Process the remaining bytes in the internal buffer and the usual 9298b9484cSchristos prolog according to the standard and write the result to RESBUF. 9398b9484cSchristos 9498b9484cSchristos IMPORTANT: On some systems it is required that RESBUF is correctly 9598b9484cSchristos aligned for a 32-bit value. */ 9698b9484cSchristos void * 9798b9484cSchristos sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf) 9898b9484cSchristos { 9998b9484cSchristos /* Take yet unprocessed bytes into account. */ 10098b9484cSchristos sha1_uint32 bytes = ctx->buflen; 10198b9484cSchristos size_t size = (bytes < 56) ? 64 / 4 : 64 * 2 / 4; 10298b9484cSchristos 10398b9484cSchristos /* Now count remaining bytes. */ 10498b9484cSchristos ctx->total[0] += bytes; 10598b9484cSchristos if (ctx->total[0] < bytes) 10698b9484cSchristos ++ctx->total[1]; 10798b9484cSchristos 10898b9484cSchristos /* Put the 64-bit file length in *bits* at the end of the buffer. */ 10998b9484cSchristos ctx->buffer[size - 2] = SWAP ((ctx->total[1] << 3) | (ctx->total[0] >> 29)); 11098b9484cSchristos ctx->buffer[size - 1] = SWAP (ctx->total[0] << 3); 11198b9484cSchristos 11298b9484cSchristos memcpy (&((char *) ctx->buffer)[bytes], fillbuf, (size - 2) * 4 - bytes); 11398b9484cSchristos 11498b9484cSchristos /* Process last bytes. */ 11598b9484cSchristos sha1_process_block (ctx->buffer, size * 4, ctx); 11698b9484cSchristos 11798b9484cSchristos return sha1_read_ctx (ctx, resbuf); 11898b9484cSchristos } 11998b9484cSchristos 12098b9484cSchristos /* Compute SHA1 message digest for bytes read from STREAM. The 12198b9484cSchristos resulting message digest number will be written into the 16 bytes 12298b9484cSchristos beginning at RESBLOCK. */ 12398b9484cSchristos int 12498b9484cSchristos sha1_stream (FILE *stream, void *resblock) 12598b9484cSchristos { 12698b9484cSchristos struct sha1_ctx ctx; 12798b9484cSchristos char buffer[BLOCKSIZE + 72]; 12898b9484cSchristos size_t sum; 12998b9484cSchristos 13098b9484cSchristos /* Initialize the computation context. */ 13198b9484cSchristos sha1_init_ctx (&ctx); 13298b9484cSchristos 13398b9484cSchristos /* Iterate over full file contents. */ 13498b9484cSchristos while (1) 13598b9484cSchristos { 13698b9484cSchristos /* We read the file in blocks of BLOCKSIZE bytes. One call of the 13798b9484cSchristos computation function processes the whole buffer so that with the 13898b9484cSchristos next round of the loop another block can be read. */ 13998b9484cSchristos size_t n; 14098b9484cSchristos sum = 0; 14198b9484cSchristos 14298b9484cSchristos /* Read block. Take care for partial reads. */ 14398b9484cSchristos while (1) 14498b9484cSchristos { 14598b9484cSchristos n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream); 14698b9484cSchristos 14798b9484cSchristos sum += n; 14898b9484cSchristos 14998b9484cSchristos if (sum == BLOCKSIZE) 15098b9484cSchristos break; 15198b9484cSchristos 15298b9484cSchristos if (n == 0) 15398b9484cSchristos { 15498b9484cSchristos /* Check for the error flag IFF N == 0, so that we don't 15598b9484cSchristos exit the loop after a partial read due to e.g., EAGAIN 15698b9484cSchristos or EWOULDBLOCK. */ 15798b9484cSchristos if (ferror (stream)) 15898b9484cSchristos return 1; 15998b9484cSchristos goto process_partial_block; 16098b9484cSchristos } 16198b9484cSchristos 16298b9484cSchristos /* We've read at least one byte, so ignore errors. But always 16398b9484cSchristos check for EOF, since feof may be true even though N > 0. 16498b9484cSchristos Otherwise, we could end up calling fread after EOF. */ 16598b9484cSchristos if (feof (stream)) 16698b9484cSchristos goto process_partial_block; 16798b9484cSchristos } 16898b9484cSchristos 16998b9484cSchristos /* Process buffer with BLOCKSIZE bytes. Note that 17098b9484cSchristos BLOCKSIZE % 64 == 0 17198b9484cSchristos */ 17298b9484cSchristos sha1_process_block (buffer, BLOCKSIZE, &ctx); 17398b9484cSchristos } 17498b9484cSchristos 17598b9484cSchristos process_partial_block:; 17698b9484cSchristos 17798b9484cSchristos /* Process any remaining bytes. */ 17898b9484cSchristos if (sum > 0) 17998b9484cSchristos sha1_process_bytes (buffer, sum, &ctx); 18098b9484cSchristos 18198b9484cSchristos /* Construct result in desired memory. */ 18298b9484cSchristos sha1_finish_ctx (&ctx, resblock); 18398b9484cSchristos return 0; 18498b9484cSchristos } 18598b9484cSchristos 18698b9484cSchristos /* Compute SHA1 message digest for LEN bytes beginning at BUFFER. The 18798b9484cSchristos result is always in little endian byte order, so that a byte-wise 18898b9484cSchristos output yields to the wanted ASCII representation of the message 18998b9484cSchristos digest. */ 19098b9484cSchristos void * 19198b9484cSchristos sha1_buffer (const char *buffer, size_t len, void *resblock) 19298b9484cSchristos { 19398b9484cSchristos struct sha1_ctx ctx; 19498b9484cSchristos 19598b9484cSchristos /* Initialize the computation context. */ 19698b9484cSchristos sha1_init_ctx (&ctx); 19798b9484cSchristos 19898b9484cSchristos /* Process whole buffer but last len % 64 bytes. */ 19998b9484cSchristos sha1_process_bytes (buffer, len, &ctx); 20098b9484cSchristos 20198b9484cSchristos /* Put result in desired memory area. */ 20298b9484cSchristos return sha1_finish_ctx (&ctx, resblock); 20398b9484cSchristos } 20498b9484cSchristos 20598b9484cSchristos void 20698b9484cSchristos sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx) 20798b9484cSchristos { 20898b9484cSchristos /* When we already have some bits in our internal buffer concatenate 20998b9484cSchristos both inputs first. */ 21098b9484cSchristos if (ctx->buflen != 0) 21198b9484cSchristos { 21298b9484cSchristos size_t left_over = ctx->buflen; 21398b9484cSchristos size_t add = 128 - left_over > len ? len : 128 - left_over; 21498b9484cSchristos 21598b9484cSchristos memcpy (&((char *) ctx->buffer)[left_over], buffer, add); 21698b9484cSchristos ctx->buflen += add; 21798b9484cSchristos 21898b9484cSchristos if (ctx->buflen > 64) 21998b9484cSchristos { 22098b9484cSchristos sha1_process_block (ctx->buffer, ctx->buflen & ~63, ctx); 22198b9484cSchristos 22298b9484cSchristos ctx->buflen &= 63; 22398b9484cSchristos /* The regions in the following copy operation cannot overlap. */ 22498b9484cSchristos memcpy (ctx->buffer, 22598b9484cSchristos &((char *) ctx->buffer)[(left_over + add) & ~63], 22698b9484cSchristos ctx->buflen); 22798b9484cSchristos } 22898b9484cSchristos 22998b9484cSchristos buffer = (const char *) buffer + add; 23098b9484cSchristos len -= add; 23198b9484cSchristos } 23298b9484cSchristos 23398b9484cSchristos /* Process available complete blocks. */ 23498b9484cSchristos if (len >= 64) 23598b9484cSchristos { 23698b9484cSchristos #if !_STRING_ARCH_unaligned 23798b9484cSchristos # define alignof(type) offsetof (struct { char c; type x; }, x) 23898b9484cSchristos # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0) 23998b9484cSchristos if (UNALIGNED_P (buffer)) 24098b9484cSchristos while (len > 64) 24198b9484cSchristos { 24298b9484cSchristos sha1_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx); 24398b9484cSchristos buffer = (const char *) buffer + 64; 24498b9484cSchristos len -= 64; 24598b9484cSchristos } 24698b9484cSchristos else 24798b9484cSchristos #endif 24898b9484cSchristos { 24998b9484cSchristos sha1_process_block (buffer, len & ~63, ctx); 25098b9484cSchristos buffer = (const char *) buffer + (len & ~63); 25198b9484cSchristos len &= 63; 25298b9484cSchristos } 25398b9484cSchristos } 25498b9484cSchristos 25598b9484cSchristos /* Move remaining bytes in internal buffer. */ 25698b9484cSchristos if (len > 0) 25798b9484cSchristos { 25898b9484cSchristos size_t left_over = ctx->buflen; 25998b9484cSchristos 26098b9484cSchristos memcpy (&((char *) ctx->buffer)[left_over], buffer, len); 26198b9484cSchristos left_over += len; 26298b9484cSchristos if (left_over >= 64) 26398b9484cSchristos { 26498b9484cSchristos sha1_process_block (ctx->buffer, 64, ctx); 26598b9484cSchristos left_over -= 64; 2664b169a6bSchristos memmove (ctx->buffer, &ctx->buffer[16], left_over); 26798b9484cSchristos } 26898b9484cSchristos ctx->buflen = left_over; 26998b9484cSchristos } 27098b9484cSchristos } 27198b9484cSchristos 27298b9484cSchristos /* --- Code below is the primary difference between md5.c and sha1.c --- */ 27398b9484cSchristos 27498b9484cSchristos /* SHA1 round constants */ 27598b9484cSchristos #define K1 0x5a827999 27698b9484cSchristos #define K2 0x6ed9eba1 27798b9484cSchristos #define K3 0x8f1bbcdc 27898b9484cSchristos #define K4 0xca62c1d6 27998b9484cSchristos 28098b9484cSchristos /* Round functions. Note that F2 is the same as F4. */ 28198b9484cSchristos #define F1(B,C,D) ( D ^ ( B & ( C ^ D ) ) ) 28298b9484cSchristos #define F2(B,C,D) (B ^ C ^ D) 28398b9484cSchristos #define F3(B,C,D) ( ( B & C ) | ( D & ( B | C ) ) ) 28498b9484cSchristos #define F4(B,C,D) (B ^ C ^ D) 28598b9484cSchristos 28698b9484cSchristos /* Process LEN bytes of BUFFER, accumulating context into CTX. 28798b9484cSchristos It is assumed that LEN % 64 == 0. 28898b9484cSchristos Most of this code comes from GnuPG's cipher/sha1.c. */ 28998b9484cSchristos 29098b9484cSchristos void 29198b9484cSchristos sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx) 29298b9484cSchristos { 29398b9484cSchristos const sha1_uint32 *words = (const sha1_uint32*) buffer; 29498b9484cSchristos size_t nwords = len / sizeof (sha1_uint32); 29598b9484cSchristos const sha1_uint32 *endp = words + nwords; 29698b9484cSchristos sha1_uint32 x[16]; 29798b9484cSchristos sha1_uint32 a = ctx->A; 29898b9484cSchristos sha1_uint32 b = ctx->B; 29998b9484cSchristos sha1_uint32 c = ctx->C; 30098b9484cSchristos sha1_uint32 d = ctx->D; 30198b9484cSchristos sha1_uint32 e = ctx->E; 30298b9484cSchristos 30398b9484cSchristos /* First increment the byte count. RFC 1321 specifies the possible 30498b9484cSchristos length of the file up to 2^64 bits. Here we only compute the 30598b9484cSchristos number of bytes. Do a double word increment. */ 30698b9484cSchristos ctx->total[0] += len; 307a2e2270fSchristos ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len); 30898b9484cSchristos 30998b9484cSchristos #define rol(x, n) (((x) << (n)) | ((sha1_uint32) (x) >> (32 - (n)))) 31098b9484cSchristos 31198b9484cSchristos #define M(I) ( tm = x[I&0x0f] ^ x[(I-14)&0x0f] \ 31298b9484cSchristos ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \ 31398b9484cSchristos , (x[I&0x0f] = rol(tm, 1)) ) 31498b9484cSchristos 31598b9484cSchristos #define R(A,B,C,D,E,F,K,M) do { E += rol( A, 5 ) \ 31698b9484cSchristos + F( B, C, D ) \ 31798b9484cSchristos + K \ 31898b9484cSchristos + M; \ 31998b9484cSchristos B = rol( B, 30 ); \ 32098b9484cSchristos } while(0) 32198b9484cSchristos 32298b9484cSchristos while (words < endp) 32398b9484cSchristos { 32498b9484cSchristos sha1_uint32 tm; 32598b9484cSchristos int t; 32698b9484cSchristos for (t = 0; t < 16; t++) 32798b9484cSchristos { 32898b9484cSchristos x[t] = SWAP (*words); 32998b9484cSchristos words++; 33098b9484cSchristos } 33198b9484cSchristos 33298b9484cSchristos R( a, b, c, d, e, F1, K1, x[ 0] ); 33398b9484cSchristos R( e, a, b, c, d, F1, K1, x[ 1] ); 33498b9484cSchristos R( d, e, a, b, c, F1, K1, x[ 2] ); 33598b9484cSchristos R( c, d, e, a, b, F1, K1, x[ 3] ); 33698b9484cSchristos R( b, c, d, e, a, F1, K1, x[ 4] ); 33798b9484cSchristos R( a, b, c, d, e, F1, K1, x[ 5] ); 33898b9484cSchristos R( e, a, b, c, d, F1, K1, x[ 6] ); 33998b9484cSchristos R( d, e, a, b, c, F1, K1, x[ 7] ); 34098b9484cSchristos R( c, d, e, a, b, F1, K1, x[ 8] ); 34198b9484cSchristos R( b, c, d, e, a, F1, K1, x[ 9] ); 34298b9484cSchristos R( a, b, c, d, e, F1, K1, x[10] ); 34398b9484cSchristos R( e, a, b, c, d, F1, K1, x[11] ); 34498b9484cSchristos R( d, e, a, b, c, F1, K1, x[12] ); 34598b9484cSchristos R( c, d, e, a, b, F1, K1, x[13] ); 34698b9484cSchristos R( b, c, d, e, a, F1, K1, x[14] ); 34798b9484cSchristos R( a, b, c, d, e, F1, K1, x[15] ); 34898b9484cSchristos R( e, a, b, c, d, F1, K1, M(16) ); 34998b9484cSchristos R( d, e, a, b, c, F1, K1, M(17) ); 35098b9484cSchristos R( c, d, e, a, b, F1, K1, M(18) ); 35198b9484cSchristos R( b, c, d, e, a, F1, K1, M(19) ); 35298b9484cSchristos R( a, b, c, d, e, F2, K2, M(20) ); 35398b9484cSchristos R( e, a, b, c, d, F2, K2, M(21) ); 35498b9484cSchristos R( d, e, a, b, c, F2, K2, M(22) ); 35598b9484cSchristos R( c, d, e, a, b, F2, K2, M(23) ); 35698b9484cSchristos R( b, c, d, e, a, F2, K2, M(24) ); 35798b9484cSchristos R( a, b, c, d, e, F2, K2, M(25) ); 35898b9484cSchristos R( e, a, b, c, d, F2, K2, M(26) ); 35998b9484cSchristos R( d, e, a, b, c, F2, K2, M(27) ); 36098b9484cSchristos R( c, d, e, a, b, F2, K2, M(28) ); 36198b9484cSchristos R( b, c, d, e, a, F2, K2, M(29) ); 36298b9484cSchristos R( a, b, c, d, e, F2, K2, M(30) ); 36398b9484cSchristos R( e, a, b, c, d, F2, K2, M(31) ); 36498b9484cSchristos R( d, e, a, b, c, F2, K2, M(32) ); 36598b9484cSchristos R( c, d, e, a, b, F2, K2, M(33) ); 36698b9484cSchristos R( b, c, d, e, a, F2, K2, M(34) ); 36798b9484cSchristos R( a, b, c, d, e, F2, K2, M(35) ); 36898b9484cSchristos R( e, a, b, c, d, F2, K2, M(36) ); 36998b9484cSchristos R( d, e, a, b, c, F2, K2, M(37) ); 37098b9484cSchristos R( c, d, e, a, b, F2, K2, M(38) ); 37198b9484cSchristos R( b, c, d, e, a, F2, K2, M(39) ); 37298b9484cSchristos R( a, b, c, d, e, F3, K3, M(40) ); 37398b9484cSchristos R( e, a, b, c, d, F3, K3, M(41) ); 37498b9484cSchristos R( d, e, a, b, c, F3, K3, M(42) ); 37598b9484cSchristos R( c, d, e, a, b, F3, K3, M(43) ); 37698b9484cSchristos R( b, c, d, e, a, F3, K3, M(44) ); 37798b9484cSchristos R( a, b, c, d, e, F3, K3, M(45) ); 37898b9484cSchristos R( e, a, b, c, d, F3, K3, M(46) ); 37998b9484cSchristos R( d, e, a, b, c, F3, K3, M(47) ); 38098b9484cSchristos R( c, d, e, a, b, F3, K3, M(48) ); 38198b9484cSchristos R( b, c, d, e, a, F3, K3, M(49) ); 38298b9484cSchristos R( a, b, c, d, e, F3, K3, M(50) ); 38398b9484cSchristos R( e, a, b, c, d, F3, K3, M(51) ); 38498b9484cSchristos R( d, e, a, b, c, F3, K3, M(52) ); 38598b9484cSchristos R( c, d, e, a, b, F3, K3, M(53) ); 38698b9484cSchristos R( b, c, d, e, a, F3, K3, M(54) ); 38798b9484cSchristos R( a, b, c, d, e, F3, K3, M(55) ); 38898b9484cSchristos R( e, a, b, c, d, F3, K3, M(56) ); 38998b9484cSchristos R( d, e, a, b, c, F3, K3, M(57) ); 39098b9484cSchristos R( c, d, e, a, b, F3, K3, M(58) ); 39198b9484cSchristos R( b, c, d, e, a, F3, K3, M(59) ); 39298b9484cSchristos R( a, b, c, d, e, F4, K4, M(60) ); 39398b9484cSchristos R( e, a, b, c, d, F4, K4, M(61) ); 39498b9484cSchristos R( d, e, a, b, c, F4, K4, M(62) ); 39598b9484cSchristos R( c, d, e, a, b, F4, K4, M(63) ); 39698b9484cSchristos R( b, c, d, e, a, F4, K4, M(64) ); 39798b9484cSchristos R( a, b, c, d, e, F4, K4, M(65) ); 39898b9484cSchristos R( e, a, b, c, d, F4, K4, M(66) ); 39998b9484cSchristos R( d, e, a, b, c, F4, K4, M(67) ); 40098b9484cSchristos R( c, d, e, a, b, F4, K4, M(68) ); 40198b9484cSchristos R( b, c, d, e, a, F4, K4, M(69) ); 40298b9484cSchristos R( a, b, c, d, e, F4, K4, M(70) ); 40398b9484cSchristos R( e, a, b, c, d, F4, K4, M(71) ); 40498b9484cSchristos R( d, e, a, b, c, F4, K4, M(72) ); 40598b9484cSchristos R( c, d, e, a, b, F4, K4, M(73) ); 40698b9484cSchristos R( b, c, d, e, a, F4, K4, M(74) ); 40798b9484cSchristos R( a, b, c, d, e, F4, K4, M(75) ); 40898b9484cSchristos R( e, a, b, c, d, F4, K4, M(76) ); 40998b9484cSchristos R( d, e, a, b, c, F4, K4, M(77) ); 41098b9484cSchristos R( c, d, e, a, b, F4, K4, M(78) ); 41198b9484cSchristos R( b, c, d, e, a, F4, K4, M(79) ); 41298b9484cSchristos 41398b9484cSchristos a = ctx->A += a; 41498b9484cSchristos b = ctx->B += b; 41598b9484cSchristos c = ctx->C += c; 41698b9484cSchristos d = ctx->D += d; 41798b9484cSchristos e = ctx->E += e; 41898b9484cSchristos } 41998b9484cSchristos } 420*5173eb0aSchristos 421*5173eb0aSchristos #if defined(HAVE_X86_SHA1_HW_SUPPORT) 422*5173eb0aSchristos /* HW specific version of sha1_process_bytes. */ 423*5173eb0aSchristos 424*5173eb0aSchristos static void sha1_hw_process_block (const void *, size_t, struct sha1_ctx *); 425*5173eb0aSchristos 426*5173eb0aSchristos static void 427*5173eb0aSchristos sha1_hw_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx) 428*5173eb0aSchristos { 429*5173eb0aSchristos /* When we already have some bits in our internal buffer concatenate 430*5173eb0aSchristos both inputs first. */ 431*5173eb0aSchristos if (ctx->buflen != 0) 432*5173eb0aSchristos { 433*5173eb0aSchristos size_t left_over = ctx->buflen; 434*5173eb0aSchristos size_t add = 128 - left_over > len ? len : 128 - left_over; 435*5173eb0aSchristos 436*5173eb0aSchristos memcpy (&((char *) ctx->buffer)[left_over], buffer, add); 437*5173eb0aSchristos ctx->buflen += add; 438*5173eb0aSchristos 439*5173eb0aSchristos if (ctx->buflen > 64) 440*5173eb0aSchristos { 441*5173eb0aSchristos sha1_hw_process_block (ctx->buffer, ctx->buflen & ~63, ctx); 442*5173eb0aSchristos 443*5173eb0aSchristos ctx->buflen &= 63; 444*5173eb0aSchristos /* The regions in the following copy operation cannot overlap. */ 445*5173eb0aSchristos memcpy (ctx->buffer, 446*5173eb0aSchristos &((char *) ctx->buffer)[(left_over + add) & ~63], 447*5173eb0aSchristos ctx->buflen); 448*5173eb0aSchristos } 449*5173eb0aSchristos 450*5173eb0aSchristos buffer = (const char *) buffer + add; 451*5173eb0aSchristos len -= add; 452*5173eb0aSchristos } 453*5173eb0aSchristos 454*5173eb0aSchristos /* Process available complete blocks. */ 455*5173eb0aSchristos if (len >= 64) 456*5173eb0aSchristos { 457*5173eb0aSchristos #if !_STRING_ARCH_unaligned 458*5173eb0aSchristos # define alignof(type) offsetof (struct { char c; type x; }, x) 459*5173eb0aSchristos # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0) 460*5173eb0aSchristos if (UNALIGNED_P (buffer)) 461*5173eb0aSchristos while (len > 64) 462*5173eb0aSchristos { 463*5173eb0aSchristos sha1_hw_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx); 464*5173eb0aSchristos buffer = (const char *) buffer + 64; 465*5173eb0aSchristos len -= 64; 466*5173eb0aSchristos } 467*5173eb0aSchristos else 468*5173eb0aSchristos #endif 469*5173eb0aSchristos { 470*5173eb0aSchristos sha1_hw_process_block (buffer, len & ~63, ctx); 471*5173eb0aSchristos buffer = (const char *) buffer + (len & ~63); 472*5173eb0aSchristos len &= 63; 473*5173eb0aSchristos } 474*5173eb0aSchristos } 475*5173eb0aSchristos 476*5173eb0aSchristos /* Move remaining bytes in internal buffer. */ 477*5173eb0aSchristos if (len > 0) 478*5173eb0aSchristos { 479*5173eb0aSchristos size_t left_over = ctx->buflen; 480*5173eb0aSchristos 481*5173eb0aSchristos memcpy (&((char *) ctx->buffer)[left_over], buffer, len); 482*5173eb0aSchristos left_over += len; 483*5173eb0aSchristos if (left_over >= 64) 484*5173eb0aSchristos { 485*5173eb0aSchristos sha1_hw_process_block (ctx->buffer, 64, ctx); 486*5173eb0aSchristos left_over -= 64; 487*5173eb0aSchristos memmove (ctx->buffer, &ctx->buffer[16], left_over); 488*5173eb0aSchristos } 489*5173eb0aSchristos ctx->buflen = left_over; 490*5173eb0aSchristos } 491*5173eb0aSchristos } 492*5173eb0aSchristos 493*5173eb0aSchristos /* Process LEN bytes of BUFFER, accumulating context into CTX. 494*5173eb0aSchristos Using CPU specific intrinsics. */ 495*5173eb0aSchristos 496*5173eb0aSchristos #ifdef HAVE_X86_SHA1_HW_SUPPORT 497*5173eb0aSchristos __attribute__((__target__ ("sse4.1,sha"))) 498*5173eb0aSchristos #endif 499*5173eb0aSchristos static void 500*5173eb0aSchristos sha1_hw_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx) 501*5173eb0aSchristos { 502*5173eb0aSchristos #ifdef HAVE_X86_SHA1_HW_SUPPORT 503*5173eb0aSchristos /* Implemented from 504*5173eb0aSchristos https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html */ 505*5173eb0aSchristos const __m128i *words = (const __m128i *) buffer; 506*5173eb0aSchristos const __m128i *endp = (const __m128i *) ((const char *) buffer + len); 507*5173eb0aSchristos __m128i abcd, abcd_save, e0, e0_save, e1, msg0, msg1, msg2, msg3; 508*5173eb0aSchristos const __m128i shuf_mask 509*5173eb0aSchristos = _mm_set_epi64x (0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL); 510*5173eb0aSchristos char check[((offsetof (struct sha1_ctx, B) 511*5173eb0aSchristos == offsetof (struct sha1_ctx, A) + sizeof (ctx->A)) 512*5173eb0aSchristos && (offsetof (struct sha1_ctx, C) 513*5173eb0aSchristos == offsetof (struct sha1_ctx, A) + 2 * sizeof (ctx->A)) 514*5173eb0aSchristos && (offsetof (struct sha1_ctx, D) 515*5173eb0aSchristos == offsetof (struct sha1_ctx, A) + 3 * sizeof (ctx->A))) 516*5173eb0aSchristos ? 1 : -1]; 517*5173eb0aSchristos 518*5173eb0aSchristos /* First increment the byte count. RFC 1321 specifies the possible 519*5173eb0aSchristos length of the file up to 2^64 bits. Here we only compute the 520*5173eb0aSchristos number of bytes. Do a double word increment. */ 521*5173eb0aSchristos ctx->total[0] += len; 522*5173eb0aSchristos ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len); 523*5173eb0aSchristos 524*5173eb0aSchristos (void) &check[0]; 525*5173eb0aSchristos abcd = _mm_loadu_si128 ((const __m128i *) &ctx->A); 526*5173eb0aSchristos e0 = _mm_set_epi32 (ctx->E, 0, 0, 0); 527*5173eb0aSchristos abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */ 528*5173eb0aSchristos 529*5173eb0aSchristos while (words < endp) 530*5173eb0aSchristos { 531*5173eb0aSchristos abcd_save = abcd; 532*5173eb0aSchristos e0_save = e0; 533*5173eb0aSchristos 534*5173eb0aSchristos /* 0..3 */ 535*5173eb0aSchristos msg0 = _mm_loadu_si128 (words); 536*5173eb0aSchristos msg0 = _mm_shuffle_epi8 (msg0, shuf_mask); 537*5173eb0aSchristos e0 = _mm_add_epi32 (e0, msg0); 538*5173eb0aSchristos e1 = abcd; 539*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0); 540*5173eb0aSchristos 541*5173eb0aSchristos /* 4..7 */ 542*5173eb0aSchristos msg1 = _mm_loadu_si128 (words + 1); 543*5173eb0aSchristos msg1 = _mm_shuffle_epi8 (msg1, shuf_mask); 544*5173eb0aSchristos e1 = _mm_sha1nexte_epu32 (e1, msg1); 545*5173eb0aSchristos e0 = abcd; 546*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0); 547*5173eb0aSchristos msg0 = _mm_sha1msg1_epu32 (msg0, msg1); 548*5173eb0aSchristos 549*5173eb0aSchristos /* 8..11 */ 550*5173eb0aSchristos msg2 = _mm_loadu_si128 (words + 2); 551*5173eb0aSchristos msg2 = _mm_shuffle_epi8 (msg2, shuf_mask); 552*5173eb0aSchristos e0 = _mm_sha1nexte_epu32 (e0, msg2); 553*5173eb0aSchristos e1 = abcd; 554*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0); 555*5173eb0aSchristos msg1 = _mm_sha1msg1_epu32 (msg1, msg2); 556*5173eb0aSchristos msg0 = _mm_xor_si128 (msg0, msg2); 557*5173eb0aSchristos 558*5173eb0aSchristos /* 12..15 */ 559*5173eb0aSchristos msg3 = _mm_loadu_si128 (words + 3); 560*5173eb0aSchristos msg3 = _mm_shuffle_epi8 (msg3, shuf_mask); 561*5173eb0aSchristos e1 = _mm_sha1nexte_epu32 (e1, msg3); 562*5173eb0aSchristos e0 = abcd; 563*5173eb0aSchristos msg0 = _mm_sha1msg2_epu32 (msg0, msg3); 564*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0); 565*5173eb0aSchristos msg2 = _mm_sha1msg1_epu32 (msg2, msg3); 566*5173eb0aSchristos msg1 = _mm_xor_si128 (msg1, msg3); 567*5173eb0aSchristos 568*5173eb0aSchristos /* 16..19 */ 569*5173eb0aSchristos e0 = _mm_sha1nexte_epu32 (e0, msg0); 570*5173eb0aSchristos e1 = abcd; 571*5173eb0aSchristos msg1 = _mm_sha1msg2_epu32 (msg1, msg0); 572*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0); 573*5173eb0aSchristos msg3 = _mm_sha1msg1_epu32 (msg3, msg0); 574*5173eb0aSchristos msg2 = _mm_xor_si128 (msg2, msg0); 575*5173eb0aSchristos 576*5173eb0aSchristos /* 20..23 */ 577*5173eb0aSchristos e1 = _mm_sha1nexte_epu32 (e1, msg1); 578*5173eb0aSchristos e0 = abcd; 579*5173eb0aSchristos msg2 = _mm_sha1msg2_epu32 (msg2, msg1); 580*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1); 581*5173eb0aSchristos msg0 = _mm_sha1msg1_epu32 (msg0, msg1); 582*5173eb0aSchristos msg3 = _mm_xor_si128 (msg3, msg1); 583*5173eb0aSchristos 584*5173eb0aSchristos /* 24..27 */ 585*5173eb0aSchristos e0 = _mm_sha1nexte_epu32 (e0, msg2); 586*5173eb0aSchristos e1 = abcd; 587*5173eb0aSchristos msg3 = _mm_sha1msg2_epu32 (msg3, msg2); 588*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1); 589*5173eb0aSchristos msg1 = _mm_sha1msg1_epu32 (msg1, msg2); 590*5173eb0aSchristos msg0 = _mm_xor_si128 (msg0, msg2); 591*5173eb0aSchristos 592*5173eb0aSchristos /* 28..31 */ 593*5173eb0aSchristos e1 = _mm_sha1nexte_epu32 (e1, msg3); 594*5173eb0aSchristos e0 = abcd; 595*5173eb0aSchristos msg0 = _mm_sha1msg2_epu32 (msg0, msg3); 596*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1); 597*5173eb0aSchristos msg2 = _mm_sha1msg1_epu32 (msg2, msg3); 598*5173eb0aSchristos msg1 = _mm_xor_si128 (msg1, msg3); 599*5173eb0aSchristos 600*5173eb0aSchristos /* 32..35 */ 601*5173eb0aSchristos e0 = _mm_sha1nexte_epu32 (e0, msg0); 602*5173eb0aSchristos e1 = abcd; 603*5173eb0aSchristos msg1 = _mm_sha1msg2_epu32 (msg1, msg0); 604*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1); 605*5173eb0aSchristos msg3 = _mm_sha1msg1_epu32 (msg3, msg0); 606*5173eb0aSchristos msg2 = _mm_xor_si128 (msg2, msg0); 607*5173eb0aSchristos 608*5173eb0aSchristos /* 36..39 */ 609*5173eb0aSchristos e1 = _mm_sha1nexte_epu32 (e1, msg1); 610*5173eb0aSchristos e0 = abcd; 611*5173eb0aSchristos msg2 = _mm_sha1msg2_epu32 (msg2, msg1); 612*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1); 613*5173eb0aSchristos msg0 = _mm_sha1msg1_epu32 (msg0, msg1); 614*5173eb0aSchristos msg3 = _mm_xor_si128 (msg3, msg1); 615*5173eb0aSchristos 616*5173eb0aSchristos /* 40..43 */ 617*5173eb0aSchristos e0 = _mm_sha1nexte_epu32 (e0, msg2); 618*5173eb0aSchristos e1 = abcd; 619*5173eb0aSchristos msg3 = _mm_sha1msg2_epu32 (msg3, msg2); 620*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2); 621*5173eb0aSchristos msg1 = _mm_sha1msg1_epu32 (msg1, msg2); 622*5173eb0aSchristos msg0 = _mm_xor_si128 (msg0, msg2); 623*5173eb0aSchristos 624*5173eb0aSchristos /* 44..47 */ 625*5173eb0aSchristos e1 = _mm_sha1nexte_epu32 (e1, msg3); 626*5173eb0aSchristos e0 = abcd; 627*5173eb0aSchristos msg0 = _mm_sha1msg2_epu32 (msg0, msg3); 628*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2); 629*5173eb0aSchristos msg2 = _mm_sha1msg1_epu32 (msg2, msg3); 630*5173eb0aSchristos msg1 = _mm_xor_si128 (msg1, msg3); 631*5173eb0aSchristos 632*5173eb0aSchristos /* 48..51 */ 633*5173eb0aSchristos e0 = _mm_sha1nexte_epu32 (e0, msg0); 634*5173eb0aSchristos e1 = abcd; 635*5173eb0aSchristos msg1 = _mm_sha1msg2_epu32 (msg1, msg0); 636*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2); 637*5173eb0aSchristos msg3 = _mm_sha1msg1_epu32 (msg3, msg0); 638*5173eb0aSchristos msg2 = _mm_xor_si128 (msg2, msg0); 639*5173eb0aSchristos 640*5173eb0aSchristos /* 52..55 */ 641*5173eb0aSchristos e1 = _mm_sha1nexte_epu32 (e1, msg1); 642*5173eb0aSchristos e0 = abcd; 643*5173eb0aSchristos msg2 = _mm_sha1msg2_epu32 (msg2, msg1); 644*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2); 645*5173eb0aSchristos msg0 = _mm_sha1msg1_epu32 (msg0, msg1); 646*5173eb0aSchristos msg3 = _mm_xor_si128 (msg3, msg1); 647*5173eb0aSchristos 648*5173eb0aSchristos /* 56..59 */ 649*5173eb0aSchristos e0 = _mm_sha1nexte_epu32 (e0, msg2); 650*5173eb0aSchristos e1 = abcd; 651*5173eb0aSchristos msg3 = _mm_sha1msg2_epu32 (msg3, msg2); 652*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2); 653*5173eb0aSchristos msg1 = _mm_sha1msg1_epu32 (msg1, msg2); 654*5173eb0aSchristos msg0 = _mm_xor_si128 (msg0, msg2); 655*5173eb0aSchristos 656*5173eb0aSchristos /* 60..63 */ 657*5173eb0aSchristos e1 = _mm_sha1nexte_epu32 (e1, msg3); 658*5173eb0aSchristos e0 = abcd; 659*5173eb0aSchristos msg0 = _mm_sha1msg2_epu32 (msg0, msg3); 660*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3); 661*5173eb0aSchristos msg2 = _mm_sha1msg1_epu32 (msg2, msg3); 662*5173eb0aSchristos msg1 = _mm_xor_si128 (msg1, msg3); 663*5173eb0aSchristos 664*5173eb0aSchristos /* 64..67 */ 665*5173eb0aSchristos e0 = _mm_sha1nexte_epu32 (e0, msg0); 666*5173eb0aSchristos e1 = abcd; 667*5173eb0aSchristos msg1 = _mm_sha1msg2_epu32 (msg1, msg0); 668*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3); 669*5173eb0aSchristos msg3 = _mm_sha1msg1_epu32 (msg3, msg0); 670*5173eb0aSchristos msg2 = _mm_xor_si128 (msg2, msg0); 671*5173eb0aSchristos 672*5173eb0aSchristos /* 68..71 */ 673*5173eb0aSchristos e1 = _mm_sha1nexte_epu32 (e1, msg1); 674*5173eb0aSchristos e0 = abcd; 675*5173eb0aSchristos msg2 = _mm_sha1msg2_epu32 (msg2, msg1); 676*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3); 677*5173eb0aSchristos msg3 = _mm_xor_si128 (msg3, msg1); 678*5173eb0aSchristos 679*5173eb0aSchristos /* 72..75 */ 680*5173eb0aSchristos e0 = _mm_sha1nexte_epu32 (e0, msg2); 681*5173eb0aSchristos e1 = abcd; 682*5173eb0aSchristos msg3 = _mm_sha1msg2_epu32 (msg3, msg2); 683*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3); 684*5173eb0aSchristos 685*5173eb0aSchristos /* 76..79 */ 686*5173eb0aSchristos e1 = _mm_sha1nexte_epu32 (e1, msg3); 687*5173eb0aSchristos e0 = abcd; 688*5173eb0aSchristos abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3); 689*5173eb0aSchristos 690*5173eb0aSchristos /* Finalize. */ 691*5173eb0aSchristos e0 = _mm_sha1nexte_epu32 (e0, e0_save); 692*5173eb0aSchristos abcd = _mm_add_epi32 (abcd, abcd_save); 693*5173eb0aSchristos 694*5173eb0aSchristos words = words + 4; 695*5173eb0aSchristos } 696*5173eb0aSchristos 697*5173eb0aSchristos abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */ 698*5173eb0aSchristos _mm_storeu_si128 ((__m128i *) &ctx->A, abcd); 699*5173eb0aSchristos ctx->E = _mm_extract_epi32 (e0, 3); 700*5173eb0aSchristos #endif 701*5173eb0aSchristos } 702*5173eb0aSchristos #endif 703*5173eb0aSchristos 704*5173eb0aSchristos /* Return sha1_process_bytes or some hardware optimized version thereof 705*5173eb0aSchristos depending on current CPU. */ 706*5173eb0aSchristos 707*5173eb0aSchristos sha1_process_bytes_fn 708*5173eb0aSchristos sha1_choose_process_bytes (void) 709*5173eb0aSchristos { 710*5173eb0aSchristos #ifdef HAVE_X86_SHA1_HW_SUPPORT 711*5173eb0aSchristos unsigned int eax, ebx, ecx, edx; 712*5173eb0aSchristos if (__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx) 713*5173eb0aSchristos && (ebx & bit_SHA) != 0 714*5173eb0aSchristos && __get_cpuid (1, &eax, &ebx, &ecx, &edx) 715*5173eb0aSchristos && (ecx & bit_SSE4_1) != 0) 716*5173eb0aSchristos return sha1_hw_process_bytes; 717*5173eb0aSchristos #endif 718*5173eb0aSchristos return sha1_process_bytes; 719*5173eb0aSchristos } 720