xref: /isa-l_crypto/sm3_mb/sm3_ctx_avx512.c (revision d28f1034f736e3eb791c3cf6bff3e2fa81fb5331)
1374d57feSjiaqizho /**********************************************************************
2374d57feSjiaqizho   Copyright(c) 2011-2020 Intel Corporation All rights reserved.
3374d57feSjiaqizho 
4374d57feSjiaqizho   Redistribution and use in source and binary forms, with or without
5374d57feSjiaqizho   modification, are permitted provided that the following conditions
6374d57feSjiaqizho   are met:
7374d57feSjiaqizho     * Redistributions of source code must retain the above copyright
8374d57feSjiaqizho       notice, this list of conditions and the following disclaimer.
9374d57feSjiaqizho     * Redistributions in binary form must reproduce the above copyright
10374d57feSjiaqizho       notice, this list of conditions and the following disclaimer in
11374d57feSjiaqizho       the documentation and/or other materials provided with the
12374d57feSjiaqizho       distribution.
13374d57feSjiaqizho     * Neither the name of Intel Corporation nor the names of its
14374d57feSjiaqizho       contributors may be used to endorse or promote products derived
15374d57feSjiaqizho       from this software without specific prior written permission.
16374d57feSjiaqizho 
17374d57feSjiaqizho   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18374d57feSjiaqizho   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19374d57feSjiaqizho   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20374d57feSjiaqizho   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21374d57feSjiaqizho   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22374d57feSjiaqizho   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23374d57feSjiaqizho   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24374d57feSjiaqizho   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25374d57feSjiaqizho   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26374d57feSjiaqizho   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27374d57feSjiaqizho   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28374d57feSjiaqizho **********************************************************************/
29374d57feSjiaqizho 
30a6dc8696SGreg Tucker #if defined(__clang__)
31a6dc8696SGreg Tucker #pragma clang attribute push(__attribute__((target("avx2"))), apply_to = function)
32a6dc8696SGreg Tucker #elif defined(__ICC)
33a6dc8696SGreg Tucker #pragma intel optimization_parameter target_arch = AVX2
34a6dc8696SGreg Tucker #elif defined(__ICL)
35a6dc8696SGreg Tucker #pragma[intel] optimization_parameter target_arch = AVX2
36a6dc8696SGreg Tucker #elif (__GNUC__ >= 5)
37a6dc8696SGreg Tucker #pragma GCC target("avx2")
38a6dc8696SGreg Tucker #endif
39a6dc8696SGreg Tucker 
40374d57feSjiaqizho #include "sm3_mb.h"
41*3080abdaSTomasz Kantecki #include "sm3_mb_internal.h"
42374d57feSjiaqizho #include "memcpy_inline.h"
4392aa5aa4SGreg Tucker #include "endian_helper.h"
44374d57feSjiaqizho 
45374d57feSjiaqizho #ifdef _MSC_VER
46374d57feSjiaqizho #include <intrin.h>
47374d57feSjiaqizho #define inline __inline
48374d57feSjiaqizho #endif
49374d57feSjiaqizho 
50b923697dSMarcel Cornu static inline void
516801b27bSTomasz Kantecki hash_init_digest(ISAL_SM3_WORD_T *digest);
52b923697dSMarcel Cornu static inline uint32_t
536801b27bSTomasz Kantecki hash_pad(uint8_t padblock[ISAL_SM3_BLOCK_SIZE * 2], uint64_t total_len);
546801b27bSTomasz Kantecki static ISAL_SM3_HASH_CTX *
556801b27bSTomasz Kantecki sm3_ctx_mgr_resubmit(ISAL_SM3_HASH_CTX_MGR *mgr, ISAL_SM3_HASH_CTX *ctx);
56374d57feSjiaqizho 
57b923697dSMarcel Cornu void
586801b27bSTomasz Kantecki _sm3_mb_mgr_init_avx512(ISAL_SM3_MB_JOB_MGR *state);
596801b27bSTomasz Kantecki ISAL_SM3_JOB *
606801b27bSTomasz Kantecki _sm3_mb_mgr_submit_avx512(ISAL_SM3_MB_JOB_MGR *state, ISAL_SM3_JOB *job);
616801b27bSTomasz Kantecki ISAL_SM3_JOB *
626801b27bSTomasz Kantecki _sm3_mb_mgr_flush_avx512(ISAL_SM3_MB_JOB_MGR *state);
63374d57feSjiaqizho 
64b923697dSMarcel Cornu void
_sm3_mb_mgr_init_avx512(ISAL_SM3_MB_JOB_MGR * state)656801b27bSTomasz Kantecki _sm3_mb_mgr_init_avx512(ISAL_SM3_MB_JOB_MGR *state)
66374d57feSjiaqizho {
67374d57feSjiaqizho         unsigned int j;
68fabd0c36SMarcel Cornu 
69fabd0c36SMarcel Cornu         memset(state, 0, sizeof(*state));
70374d57feSjiaqizho         state->unused_lanes = 0xfedcba9876543210;
71374d57feSjiaqizho         state->num_lanes_inuse = 0;
726801b27bSTomasz Kantecki         for (j = 0; j < ISAL_SM3_MAX_LANES; j++) {
73374d57feSjiaqizho                 state->lens[j] = 0;
74374d57feSjiaqizho                 state->ldata[j].job_in_lane = 0;
75374d57feSjiaqizho         }
76374d57feSjiaqizho }
77374d57feSjiaqizho 
78b923697dSMarcel Cornu void
_sm3_ctx_mgr_init_avx512(ISAL_SM3_HASH_CTX_MGR * mgr)796801b27bSTomasz Kantecki _sm3_ctx_mgr_init_avx512(ISAL_SM3_HASH_CTX_MGR *mgr)
80374d57feSjiaqizho {
816801b27bSTomasz Kantecki         _sm3_mb_mgr_init_avx512(&mgr->mgr);
82374d57feSjiaqizho }
83374d57feSjiaqizho 
846801b27bSTomasz Kantecki ISAL_SM3_HASH_CTX *
_sm3_ctx_mgr_submit_avx512(ISAL_SM3_HASH_CTX_MGR * mgr,ISAL_SM3_HASH_CTX * ctx,const void * buffer,uint32_t len,ISAL_HASH_CTX_FLAG flags)856801b27bSTomasz Kantecki _sm3_ctx_mgr_submit_avx512(ISAL_SM3_HASH_CTX_MGR *mgr, ISAL_SM3_HASH_CTX *ctx, const void *buffer,
868cb7fe78SPablo de Lara                            uint32_t len, ISAL_HASH_CTX_FLAG flags)
87374d57feSjiaqizho {
888cb7fe78SPablo de Lara         if (flags & (~ISAL_HASH_ENTIRE)) {
89374d57feSjiaqizho                 // User should not pass anything other than FIRST, UPDATE, or LAST
908cb7fe78SPablo de Lara                 ctx->error = ISAL_HASH_CTX_ERROR_INVALID_FLAGS;
91374d57feSjiaqizho                 return ctx;
92374d57feSjiaqizho         }
93374d57feSjiaqizho 
948cb7fe78SPablo de Lara         if (ctx->status & ISAL_HASH_CTX_STS_PROCESSING) {
95374d57feSjiaqizho                 // Cannot submit to a currently processing job.
968cb7fe78SPablo de Lara                 ctx->error = ISAL_HASH_CTX_ERROR_ALREADY_PROCESSING;
97374d57feSjiaqizho                 return ctx;
98374d57feSjiaqizho         }
99374d57feSjiaqizho 
1008cb7fe78SPablo de Lara         if ((ctx->status & ISAL_HASH_CTX_STS_COMPLETE) && !(flags & ISAL_HASH_FIRST)) {
101374d57feSjiaqizho                 // Cannot update a finished job.
1028cb7fe78SPablo de Lara                 ctx->error = ISAL_HASH_CTX_ERROR_ALREADY_COMPLETED;
103374d57feSjiaqizho                 return ctx;
104374d57feSjiaqizho         }
105374d57feSjiaqizho 
1068cb7fe78SPablo de Lara         if (flags & ISAL_HASH_FIRST) {
107374d57feSjiaqizho                 // Init digest
108374d57feSjiaqizho                 hash_init_digest(ctx->job.result_digest);
109374d57feSjiaqizho 
110374d57feSjiaqizho                 // Reset byte counter
111374d57feSjiaqizho                 ctx->total_length = 0;
112374d57feSjiaqizho 
113374d57feSjiaqizho                 // Clear extra blocks
114374d57feSjiaqizho                 ctx->partial_block_buffer_length = 0;
115374d57feSjiaqizho         }
1168cb7fe78SPablo de Lara         ctx->error = ISAL_HASH_CTX_ERROR_NONE;
117374d57feSjiaqizho 
118374d57feSjiaqizho         // Store buffer ptr info from user
119374d57feSjiaqizho         ctx->incoming_buffer = buffer;
120374d57feSjiaqizho         ctx->incoming_buffer_length = len;
121374d57feSjiaqizho 
1228cb7fe78SPablo de Lara         ctx->status = (flags & ISAL_HASH_LAST) ? (ISAL_HASH_CTX_STS) (ISAL_HASH_CTX_STS_PROCESSING |
1238cb7fe78SPablo de Lara                                                                       ISAL_HASH_CTX_STS_LAST)
1248cb7fe78SPablo de Lara                                                : ISAL_HASH_CTX_STS_PROCESSING;
125374d57feSjiaqizho 
126374d57feSjiaqizho         // Advance byte counter
127374d57feSjiaqizho         ctx->total_length += len;
128374d57feSjiaqizho 
129374d57feSjiaqizho         // if partial_block_buffer_length != 0 means ctx get extra data
1306801b27bSTomasz Kantecki         // len < ISAL_SM3_BLOCK_SIZE means data len < ISAL_SM3_BLOCK_SIZE
1316801b27bSTomasz Kantecki         if ((ctx->partial_block_buffer_length) | (len < ISAL_SM3_BLOCK_SIZE)) {
132374d57feSjiaqizho                 // Compute how many bytes to copy from user buffer into extra block
1336801b27bSTomasz Kantecki                 uint32_t copy_len = ISAL_SM3_BLOCK_SIZE - ctx->partial_block_buffer_length;
134374d57feSjiaqizho                 if (len < copy_len)
135374d57feSjiaqizho                         copy_len = len;
136374d57feSjiaqizho 
137374d57feSjiaqizho                 if (copy_len) {
138374d57feSjiaqizho                         // Copy and update relevant pointers and counters
139b923697dSMarcel Cornu                         memcpy_varlen(&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
140b923697dSMarcel Cornu                                       buffer, copy_len);
141374d57feSjiaqizho 
142374d57feSjiaqizho                         ctx->partial_block_buffer_length += copy_len;
143374d57feSjiaqizho                         ctx->incoming_buffer = (const void *) ((const char *) buffer + copy_len);
144374d57feSjiaqizho                         ctx->incoming_buffer_length = len - copy_len;
145374d57feSjiaqizho                 }
146374d57feSjiaqizho                 // The extra block should never contain more than 1 block here
1476801b27bSTomasz Kantecki                 assert(ctx->partial_block_buffer_length <= ISAL_SM3_BLOCK_SIZE);
148374d57feSjiaqizho 
149374d57feSjiaqizho                 // If the extra block buffer contains exactly 1 block, it can be hashed.
1506801b27bSTomasz Kantecki                 if (ctx->partial_block_buffer_length >= ISAL_SM3_BLOCK_SIZE) {
151374d57feSjiaqizho 
152374d57feSjiaqizho                         ctx->partial_block_buffer_length = 0;
153374d57feSjiaqizho                         ctx->job.buffer = ctx->partial_block_buffer;
154374d57feSjiaqizho 
155374d57feSjiaqizho                         ctx->job.len = 1;
1566801b27bSTomasz Kantecki                         ctx = (ISAL_SM3_HASH_CTX *) _sm3_mb_mgr_submit_avx512(&mgr->mgr, &ctx->job);
157374d57feSjiaqizho                 }
158374d57feSjiaqizho         }
159374d57feSjiaqizho 
160374d57feSjiaqizho         return sm3_ctx_mgr_resubmit(mgr, ctx);
161374d57feSjiaqizho }
162374d57feSjiaqizho 
1636801b27bSTomasz Kantecki static ISAL_SM3_HASH_CTX *
sm3_ctx_mgr_resubmit(ISAL_SM3_HASH_CTX_MGR * mgr,ISAL_SM3_HASH_CTX * ctx)1646801b27bSTomasz Kantecki sm3_ctx_mgr_resubmit(ISAL_SM3_HASH_CTX_MGR *mgr, ISAL_SM3_HASH_CTX *ctx)
165374d57feSjiaqizho {
166374d57feSjiaqizho         while (ctx) {
1678cb7fe78SPablo de Lara                 if (ctx->status & ISAL_HASH_CTX_STS_COMPLETE) {
168374d57feSjiaqizho                         unsigned int j;
1698cb7fe78SPablo de Lara                         ctx->status = ISAL_HASH_CTX_STS_COMPLETE; // Clear PROCESSING bit
1706801b27bSTomasz Kantecki                         for (j = 0; j < ISAL_SM3_DIGEST_NWORDS; j++) {
171b923697dSMarcel Cornu                                 ctx->job.result_digest[j] = byteswap32(ctx->job.result_digest[j]);
172374d57feSjiaqizho                         }
173374d57feSjiaqizho                         return ctx;
174374d57feSjiaqizho                 }
175b923697dSMarcel Cornu                 // partial_block_buffer_length must be 0 that means incoming_buffer_length have not
176b923697dSMarcel Cornu                 // be init.
177374d57feSjiaqizho                 if (ctx->partial_block_buffer_length == 0 && ctx->incoming_buffer_length) {
178374d57feSjiaqizho                         const void *buffer = ctx->incoming_buffer;
179374d57feSjiaqizho                         uint32_t len = ctx->incoming_buffer_length;
180374d57feSjiaqizho 
1816801b27bSTomasz Kantecki                         // copy_len will check len % ISAL_SM3_BLOCK_SIZE ?= 0
1826801b27bSTomasz Kantecki                         uint32_t copy_len = len & (ISAL_SM3_BLOCK_SIZE - 1);
183374d57feSjiaqizho 
1846801b27bSTomasz Kantecki                         // if mod ISAL_SM3_BLOCK_SIZE != 0
185374d57feSjiaqizho                         if (copy_len) {
186374d57feSjiaqizho                                 len -= copy_len;
187374d57feSjiaqizho                                 memcpy_varlen(ctx->partial_block_buffer,
188374d57feSjiaqizho                                               ((const char *) buffer + len), copy_len);
189374d57feSjiaqizho                                 // store the extra data
190374d57feSjiaqizho                                 ctx->partial_block_buffer_length = copy_len;
191374d57feSjiaqizho                         }
192374d57feSjiaqizho 
193374d57feSjiaqizho                         ctx->incoming_buffer_length = 0;
194374d57feSjiaqizho                         // after len -= copy_len or copy_len == 0
1956801b27bSTomasz Kantecki                         assert((len % ISAL_SM3_BLOCK_SIZE) == 0);
196374d57feSjiaqizho                         // get the block size , eq len = len / 64
1976801b27bSTomasz Kantecki                         len >>= ISAL_SM3_LOG2_BLOCK_SIZE;
198374d57feSjiaqizho 
199374d57feSjiaqizho                         if (len) {
200374d57feSjiaqizho                                 ctx->job.buffer = (uint8_t *) buffer;
201374d57feSjiaqizho                                 ctx->job.len = len;
2026801b27bSTomasz Kantecki                                 ctx = (ISAL_SM3_HASH_CTX *) _sm3_mb_mgr_submit_avx512(&mgr->mgr,
203374d57feSjiaqizho                                                                                       &ctx->job);
204374d57feSjiaqizho                                 continue;
205374d57feSjiaqizho                         }
206374d57feSjiaqizho                 }
207374d57feSjiaqizho                 // If the extra blocks are not empty, then we are either on the last block(s)
208374d57feSjiaqizho                 // or we need more user input before continuing.
2098cb7fe78SPablo de Lara                 if (ctx->status & ISAL_HASH_CTX_STS_LAST) {
210374d57feSjiaqizho                         uint8_t *buf = ctx->partial_block_buffer;
211374d57feSjiaqizho                         uint32_t n_extra_blocks = hash_pad(buf, ctx->total_length);
212374d57feSjiaqizho 
2138cb7fe78SPablo de Lara                         ctx->status = (ISAL_HASH_CTX_STS) (ISAL_HASH_CTX_STS_PROCESSING |
2148cb7fe78SPablo de Lara                                                            ISAL_HASH_CTX_STS_COMPLETE);
215374d57feSjiaqizho                         ctx->job.buffer = buf;
216374d57feSjiaqizho                         ctx->job.len = (uint32_t) n_extra_blocks;
2176801b27bSTomasz Kantecki                         ctx = (ISAL_SM3_HASH_CTX *) _sm3_mb_mgr_submit_avx512(&mgr->mgr, &ctx->job);
218374d57feSjiaqizho                         // todo make sure should return ?
219374d57feSjiaqizho                         continue;
220374d57feSjiaqizho                 }
221374d57feSjiaqizho 
222374d57feSjiaqizho                 if (ctx)
2238cb7fe78SPablo de Lara                         ctx->status = ISAL_HASH_CTX_STS_IDLE;
224374d57feSjiaqizho                 return ctx;
225374d57feSjiaqizho         }
226374d57feSjiaqizho 
227374d57feSjiaqizho         return NULL;
228374d57feSjiaqizho }
229374d57feSjiaqizho 
230b923697dSMarcel Cornu static inline uint32_t
hash_pad(uint8_t padblock[ISAL_SM3_BLOCK_SIZE * 2],uint64_t total_len)2316801b27bSTomasz Kantecki hash_pad(uint8_t padblock[ISAL_SM3_BLOCK_SIZE * 2], uint64_t total_len)
232374d57feSjiaqizho {
2336801b27bSTomasz Kantecki         uint32_t i = (uint32_t) (total_len & (ISAL_SM3_BLOCK_SIZE - 1));
234374d57feSjiaqizho 
2356801b27bSTomasz Kantecki         memclr_fixedlen(&padblock[i], ISAL_SM3_BLOCK_SIZE);
236374d57feSjiaqizho         padblock[i] = 0x80;
237374d57feSjiaqizho 
238374d57feSjiaqizho         // Move i to the end of either 1st or 2nd extra block depending on length
2396801b27bSTomasz Kantecki         i += ((ISAL_SM3_BLOCK_SIZE - 1) & (0 - (total_len + ISAL_SM3_PADLENGTHFIELD_SIZE + 1))) +
2406801b27bSTomasz Kantecki              1 + ISAL_SM3_PADLENGTHFIELD_SIZE;
241374d57feSjiaqizho 
2426801b27bSTomasz Kantecki #if ISAL_SM3_PADLENGTHFIELD_SIZE == 16
243374d57feSjiaqizho         *((uint64_t *) &padblock[i - 16]) = 0;
244374d57feSjiaqizho #endif
245374d57feSjiaqizho 
246e3f7d4fbSUlrich Weigand         *((uint64_t *) &padblock[i - 8]) = to_be64((uint64_t) total_len << 3);
247374d57feSjiaqizho 
2486801b27bSTomasz Kantecki         return i >> ISAL_SM3_LOG2_BLOCK_SIZE; // Number of extra blocks to hash
249374d57feSjiaqizho }
250374d57feSjiaqizho 
2516801b27bSTomasz Kantecki ISAL_SM3_HASH_CTX *
_sm3_ctx_mgr_flush_avx512(ISAL_SM3_HASH_CTX_MGR * mgr)2526801b27bSTomasz Kantecki _sm3_ctx_mgr_flush_avx512(ISAL_SM3_HASH_CTX_MGR *mgr)
253374d57feSjiaqizho {
254374d57feSjiaqizho 
2556801b27bSTomasz Kantecki         ISAL_SM3_HASH_CTX *ctx;
256374d57feSjiaqizho 
257374d57feSjiaqizho         while (1) {
2586801b27bSTomasz Kantecki                 ctx = (ISAL_SM3_HASH_CTX *) _sm3_mb_mgr_flush_avx512(&mgr->mgr);
259374d57feSjiaqizho 
260374d57feSjiaqizho                 // If flush returned 0, there are no more jobs in flight.
261374d57feSjiaqizho                 if (!ctx)
262374d57feSjiaqizho                         return NULL;
263374d57feSjiaqizho 
264374d57feSjiaqizho                 // If flush returned a job, verify that it is safe to return to the user.
265374d57feSjiaqizho                 // If it is not ready, resubmit the job to finish processing.
266374d57feSjiaqizho                 ctx = sm3_ctx_mgr_resubmit(mgr, ctx);
267374d57feSjiaqizho 
268374d57feSjiaqizho                 // If sha256_ctx_mgr_resubmit returned a job, it is ready to be returned.
269374d57feSjiaqizho                 if (ctx)
270374d57feSjiaqizho                         return ctx;
271374d57feSjiaqizho 
272b923697dSMarcel Cornu                 // Otherwise, all jobs currently being managed by the SHA256_HASH_CTX_MGR still need
273b923697dSMarcel Cornu                 // processing. Loop.
274b923697dSMarcel Cornu         }
275374d57feSjiaqizho }
276374d57feSjiaqizho 
277b923697dSMarcel Cornu static inline void
hash_init_digest(ISAL_SM3_WORD_T * digest)2786801b27bSTomasz Kantecki hash_init_digest(ISAL_SM3_WORD_T *digest)
279374d57feSjiaqizho {
2806801b27bSTomasz Kantecki         static const ISAL_SM3_WORD_T hash_initial_digest[ISAL_SM3_DIGEST_NWORDS] = {
2816801b27bSTomasz Kantecki                 ISAL_SM3_INITIAL_DIGEST
2826801b27bSTomasz Kantecki         };
283374d57feSjiaqizho         memcpy_fixedlen(digest, hash_initial_digest, sizeof(hash_initial_digest));
284374d57feSjiaqizho }
285374d57feSjiaqizho 
286374d57feSjiaqizho struct slver {
287374d57feSjiaqizho         uint16_t snum;
288374d57feSjiaqizho         uint8_t ver;
289374d57feSjiaqizho         uint8_t core;
290374d57feSjiaqizho };
291374d57feSjiaqizho 
2926801b27bSTomasz Kantecki struct slver _sm3_ctx_mgr_init_avx512_slver_0000;
2936801b27bSTomasz Kantecki struct slver _sm3_ctx_mgr_init_avx512_slver = { 0x2306, 0x00, 0x00 };
294374d57feSjiaqizho 
2956801b27bSTomasz Kantecki struct slver _sm3_ctx_mgr_submit_avx512_slver_0000;
2966801b27bSTomasz Kantecki struct slver _sm3_ctx_mgr_submit_avx512_slver = { 0x2307, 0x00, 0x00 };
297374d57feSjiaqizho 
2986801b27bSTomasz Kantecki struct slver _sm3_ctx_mgr_flush_avx512_slver_0000;
2996801b27bSTomasz Kantecki struct slver _sm3_ctx_mgr_flush_avx512_slver = { 0x2308, 0x00, 0x00 };
300374d57feSjiaqizho 
301a6dc8696SGreg Tucker #if defined(__clang__)
302a6dc8696SGreg Tucker #pragma clang attribute pop
303a6dc8696SGreg Tucker #endif
304