xref: /isa-l_crypto/sm3_mb/sm3_ctx_avx512.c (revision d28f1034f736e3eb791c3cf6bff3e2fa81fb5331)
1 /**********************************************************************
2   Copyright(c) 2011-2020 Intel Corporation All rights reserved.
3 
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions
6   are met:
7     * Redistributions of source code must retain the above copyright
8       notice, this list of conditions and the following disclaimer.
9     * Redistributions in binary form must reproduce the above copyright
10       notice, this list of conditions and the following disclaimer in
11       the documentation and/or other materials provided with the
12       distribution.
13     * Neither the name of Intel Corporation nor the names of its
14       contributors may be used to endorse or promote products derived
15       from this software without specific prior written permission.
16 
17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
29 
30 #if defined(__clang__)
31 #pragma clang attribute push(__attribute__((target("avx2"))), apply_to = function)
32 #elif defined(__ICC)
33 #pragma intel optimization_parameter target_arch = AVX2
34 #elif defined(__ICL)
35 #pragma[intel] optimization_parameter target_arch = AVX2
36 #elif (__GNUC__ >= 5)
37 #pragma GCC target("avx2")
38 #endif
39 
40 #include "sm3_mb.h"
41 #include "sm3_mb_internal.h"
42 #include "memcpy_inline.h"
43 #include "endian_helper.h"
44 
45 #ifdef _MSC_VER
46 #include <intrin.h>
47 #define inline __inline
48 #endif
49 
50 static inline void
51 hash_init_digest(ISAL_SM3_WORD_T *digest);
52 static inline uint32_t
53 hash_pad(uint8_t padblock[ISAL_SM3_BLOCK_SIZE * 2], uint64_t total_len);
54 static ISAL_SM3_HASH_CTX *
55 sm3_ctx_mgr_resubmit(ISAL_SM3_HASH_CTX_MGR *mgr, ISAL_SM3_HASH_CTX *ctx);
56 
57 void
58 _sm3_mb_mgr_init_avx512(ISAL_SM3_MB_JOB_MGR *state);
59 ISAL_SM3_JOB *
60 _sm3_mb_mgr_submit_avx512(ISAL_SM3_MB_JOB_MGR *state, ISAL_SM3_JOB *job);
61 ISAL_SM3_JOB *
62 _sm3_mb_mgr_flush_avx512(ISAL_SM3_MB_JOB_MGR *state);
63 
64 void
_sm3_mb_mgr_init_avx512(ISAL_SM3_MB_JOB_MGR * state)65 _sm3_mb_mgr_init_avx512(ISAL_SM3_MB_JOB_MGR *state)
66 {
67         unsigned int j;
68 
69         memset(state, 0, sizeof(*state));
70         state->unused_lanes = 0xfedcba9876543210;
71         state->num_lanes_inuse = 0;
72         for (j = 0; j < ISAL_SM3_MAX_LANES; j++) {
73                 state->lens[j] = 0;
74                 state->ldata[j].job_in_lane = 0;
75         }
76 }
77 
78 void
_sm3_ctx_mgr_init_avx512(ISAL_SM3_HASH_CTX_MGR * mgr)79 _sm3_ctx_mgr_init_avx512(ISAL_SM3_HASH_CTX_MGR *mgr)
80 {
81         _sm3_mb_mgr_init_avx512(&mgr->mgr);
82 }
83 
84 ISAL_SM3_HASH_CTX *
_sm3_ctx_mgr_submit_avx512(ISAL_SM3_HASH_CTX_MGR * mgr,ISAL_SM3_HASH_CTX * ctx,const void * buffer,uint32_t len,ISAL_HASH_CTX_FLAG flags)85 _sm3_ctx_mgr_submit_avx512(ISAL_SM3_HASH_CTX_MGR *mgr, ISAL_SM3_HASH_CTX *ctx, const void *buffer,
86                            uint32_t len, ISAL_HASH_CTX_FLAG flags)
87 {
88         if (flags & (~ISAL_HASH_ENTIRE)) {
89                 // User should not pass anything other than FIRST, UPDATE, or LAST
90                 ctx->error = ISAL_HASH_CTX_ERROR_INVALID_FLAGS;
91                 return ctx;
92         }
93 
94         if (ctx->status & ISAL_HASH_CTX_STS_PROCESSING) {
95                 // Cannot submit to a currently processing job.
96                 ctx->error = ISAL_HASH_CTX_ERROR_ALREADY_PROCESSING;
97                 return ctx;
98         }
99 
100         if ((ctx->status & ISAL_HASH_CTX_STS_COMPLETE) && !(flags & ISAL_HASH_FIRST)) {
101                 // Cannot update a finished job.
102                 ctx->error = ISAL_HASH_CTX_ERROR_ALREADY_COMPLETED;
103                 return ctx;
104         }
105 
106         if (flags & ISAL_HASH_FIRST) {
107                 // Init digest
108                 hash_init_digest(ctx->job.result_digest);
109 
110                 // Reset byte counter
111                 ctx->total_length = 0;
112 
113                 // Clear extra blocks
114                 ctx->partial_block_buffer_length = 0;
115         }
116         ctx->error = ISAL_HASH_CTX_ERROR_NONE;
117 
118         // Store buffer ptr info from user
119         ctx->incoming_buffer = buffer;
120         ctx->incoming_buffer_length = len;
121 
122         ctx->status = (flags & ISAL_HASH_LAST) ? (ISAL_HASH_CTX_STS) (ISAL_HASH_CTX_STS_PROCESSING |
123                                                                       ISAL_HASH_CTX_STS_LAST)
124                                                : ISAL_HASH_CTX_STS_PROCESSING;
125 
126         // Advance byte counter
127         ctx->total_length += len;
128 
129         // if partial_block_buffer_length != 0 means ctx get extra data
130         // len < ISAL_SM3_BLOCK_SIZE means data len < ISAL_SM3_BLOCK_SIZE
131         if ((ctx->partial_block_buffer_length) | (len < ISAL_SM3_BLOCK_SIZE)) {
132                 // Compute how many bytes to copy from user buffer into extra block
133                 uint32_t copy_len = ISAL_SM3_BLOCK_SIZE - ctx->partial_block_buffer_length;
134                 if (len < copy_len)
135                         copy_len = len;
136 
137                 if (copy_len) {
138                         // Copy and update relevant pointers and counters
139                         memcpy_varlen(&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
140                                       buffer, copy_len);
141 
142                         ctx->partial_block_buffer_length += copy_len;
143                         ctx->incoming_buffer = (const void *) ((const char *) buffer + copy_len);
144                         ctx->incoming_buffer_length = len - copy_len;
145                 }
146                 // The extra block should never contain more than 1 block here
147                 assert(ctx->partial_block_buffer_length <= ISAL_SM3_BLOCK_SIZE);
148 
149                 // If the extra block buffer contains exactly 1 block, it can be hashed.
150                 if (ctx->partial_block_buffer_length >= ISAL_SM3_BLOCK_SIZE) {
151 
152                         ctx->partial_block_buffer_length = 0;
153                         ctx->job.buffer = ctx->partial_block_buffer;
154 
155                         ctx->job.len = 1;
156                         ctx = (ISAL_SM3_HASH_CTX *) _sm3_mb_mgr_submit_avx512(&mgr->mgr, &ctx->job);
157                 }
158         }
159 
160         return sm3_ctx_mgr_resubmit(mgr, ctx);
161 }
162 
163 static ISAL_SM3_HASH_CTX *
sm3_ctx_mgr_resubmit(ISAL_SM3_HASH_CTX_MGR * mgr,ISAL_SM3_HASH_CTX * ctx)164 sm3_ctx_mgr_resubmit(ISAL_SM3_HASH_CTX_MGR *mgr, ISAL_SM3_HASH_CTX *ctx)
165 {
166         while (ctx) {
167                 if (ctx->status & ISAL_HASH_CTX_STS_COMPLETE) {
168                         unsigned int j;
169                         ctx->status = ISAL_HASH_CTX_STS_COMPLETE; // Clear PROCESSING bit
170                         for (j = 0; j < ISAL_SM3_DIGEST_NWORDS; j++) {
171                                 ctx->job.result_digest[j] = byteswap32(ctx->job.result_digest[j]);
172                         }
173                         return ctx;
174                 }
175                 // partial_block_buffer_length must be 0 that means incoming_buffer_length have not
176                 // be init.
177                 if (ctx->partial_block_buffer_length == 0 && ctx->incoming_buffer_length) {
178                         const void *buffer = ctx->incoming_buffer;
179                         uint32_t len = ctx->incoming_buffer_length;
180 
181                         // copy_len will check len % ISAL_SM3_BLOCK_SIZE ?= 0
182                         uint32_t copy_len = len & (ISAL_SM3_BLOCK_SIZE - 1);
183 
184                         // if mod ISAL_SM3_BLOCK_SIZE != 0
185                         if (copy_len) {
186                                 len -= copy_len;
187                                 memcpy_varlen(ctx->partial_block_buffer,
188                                               ((const char *) buffer + len), copy_len);
189                                 // store the extra data
190                                 ctx->partial_block_buffer_length = copy_len;
191                         }
192 
193                         ctx->incoming_buffer_length = 0;
194                         // after len -= copy_len or copy_len == 0
195                         assert((len % ISAL_SM3_BLOCK_SIZE) == 0);
196                         // get the block size , eq len = len / 64
197                         len >>= ISAL_SM3_LOG2_BLOCK_SIZE;
198 
199                         if (len) {
200                                 ctx->job.buffer = (uint8_t *) buffer;
201                                 ctx->job.len = len;
202                                 ctx = (ISAL_SM3_HASH_CTX *) _sm3_mb_mgr_submit_avx512(&mgr->mgr,
203                                                                                       &ctx->job);
204                                 continue;
205                         }
206                 }
207                 // If the extra blocks are not empty, then we are either on the last block(s)
208                 // or we need more user input before continuing.
209                 if (ctx->status & ISAL_HASH_CTX_STS_LAST) {
210                         uint8_t *buf = ctx->partial_block_buffer;
211                         uint32_t n_extra_blocks = hash_pad(buf, ctx->total_length);
212 
213                         ctx->status = (ISAL_HASH_CTX_STS) (ISAL_HASH_CTX_STS_PROCESSING |
214                                                            ISAL_HASH_CTX_STS_COMPLETE);
215                         ctx->job.buffer = buf;
216                         ctx->job.len = (uint32_t) n_extra_blocks;
217                         ctx = (ISAL_SM3_HASH_CTX *) _sm3_mb_mgr_submit_avx512(&mgr->mgr, &ctx->job);
218                         // todo make sure should return ?
219                         continue;
220                 }
221 
222                 if (ctx)
223                         ctx->status = ISAL_HASH_CTX_STS_IDLE;
224                 return ctx;
225         }
226 
227         return NULL;
228 }
229 
230 static inline uint32_t
hash_pad(uint8_t padblock[ISAL_SM3_BLOCK_SIZE * 2],uint64_t total_len)231 hash_pad(uint8_t padblock[ISAL_SM3_BLOCK_SIZE * 2], uint64_t total_len)
232 {
233         uint32_t i = (uint32_t) (total_len & (ISAL_SM3_BLOCK_SIZE - 1));
234 
235         memclr_fixedlen(&padblock[i], ISAL_SM3_BLOCK_SIZE);
236         padblock[i] = 0x80;
237 
238         // Move i to the end of either 1st or 2nd extra block depending on length
239         i += ((ISAL_SM3_BLOCK_SIZE - 1) & (0 - (total_len + ISAL_SM3_PADLENGTHFIELD_SIZE + 1))) +
240              1 + ISAL_SM3_PADLENGTHFIELD_SIZE;
241 
242 #if ISAL_SM3_PADLENGTHFIELD_SIZE == 16
243         *((uint64_t *) &padblock[i - 16]) = 0;
244 #endif
245 
246         *((uint64_t *) &padblock[i - 8]) = to_be64((uint64_t) total_len << 3);
247 
248         return i >> ISAL_SM3_LOG2_BLOCK_SIZE; // Number of extra blocks to hash
249 }
250 
251 ISAL_SM3_HASH_CTX *
_sm3_ctx_mgr_flush_avx512(ISAL_SM3_HASH_CTX_MGR * mgr)252 _sm3_ctx_mgr_flush_avx512(ISAL_SM3_HASH_CTX_MGR *mgr)
253 {
254 
255         ISAL_SM3_HASH_CTX *ctx;
256 
257         while (1) {
258                 ctx = (ISAL_SM3_HASH_CTX *) _sm3_mb_mgr_flush_avx512(&mgr->mgr);
259 
260                 // If flush returned 0, there are no more jobs in flight.
261                 if (!ctx)
262                         return NULL;
263 
264                 // If flush returned a job, verify that it is safe to return to the user.
265                 // If it is not ready, resubmit the job to finish processing.
266                 ctx = sm3_ctx_mgr_resubmit(mgr, ctx);
267 
268                 // If sha256_ctx_mgr_resubmit returned a job, it is ready to be returned.
269                 if (ctx)
270                         return ctx;
271 
272                 // Otherwise, all jobs currently being managed by the SHA256_HASH_CTX_MGR still need
273                 // processing. Loop.
274         }
275 }
276 
277 static inline void
hash_init_digest(ISAL_SM3_WORD_T * digest)278 hash_init_digest(ISAL_SM3_WORD_T *digest)
279 {
280         static const ISAL_SM3_WORD_T hash_initial_digest[ISAL_SM3_DIGEST_NWORDS] = {
281                 ISAL_SM3_INITIAL_DIGEST
282         };
283         memcpy_fixedlen(digest, hash_initial_digest, sizeof(hash_initial_digest));
284 }
285 
286 struct slver {
287         uint16_t snum;
288         uint8_t ver;
289         uint8_t core;
290 };
291 
292 struct slver _sm3_ctx_mgr_init_avx512_slver_0000;
293 struct slver _sm3_ctx_mgr_init_avx512_slver = { 0x2306, 0x00, 0x00 };
294 
295 struct slver _sm3_ctx_mgr_submit_avx512_slver_0000;
296 struct slver _sm3_ctx_mgr_submit_avx512_slver = { 0x2307, 0x00, 0x00 };
297 
298 struct slver _sm3_ctx_mgr_flush_avx512_slver_0000;
299 struct slver _sm3_ctx_mgr_flush_avx512_slver = { 0x2308, 0x00, 0x00 };
300 
301 #if defined(__clang__)
302 #pragma clang attribute pop
303 #endif
304