1 /**********************************************************************
2 Copyright(c) 2011-2020 Intel Corporation All rights reserved.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in
11 the documentation and/or other materials provided with the
12 distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
29
30 #if defined(__clang__)
31 #pragma clang attribute push(__attribute__((target("avx2"))), apply_to = function)
32 #elif defined(__ICC)
33 #pragma intel optimization_parameter target_arch = AVX2
34 #elif defined(__ICL)
35 #pragma[intel] optimization_parameter target_arch = AVX2
36 #elif (__GNUC__ >= 5)
37 #pragma GCC target("avx2")
38 #endif
39
40 #include "sm3_mb.h"
41 #include "sm3_mb_internal.h"
42 #include "memcpy_inline.h"
43 #include "endian_helper.h"
44
45 #ifdef _MSC_VER
46 #include <intrin.h>
47 #define inline __inline
48 #endif
49
50 static inline void
51 hash_init_digest(ISAL_SM3_WORD_T *digest);
52 static inline uint32_t
53 hash_pad(uint8_t padblock[ISAL_SM3_BLOCK_SIZE * 2], uint64_t total_len);
54 static ISAL_SM3_HASH_CTX *
55 sm3_ctx_mgr_resubmit(ISAL_SM3_HASH_CTX_MGR *mgr, ISAL_SM3_HASH_CTX *ctx);
56
57 void
58 _sm3_mb_mgr_init_avx512(ISAL_SM3_MB_JOB_MGR *state);
59 ISAL_SM3_JOB *
60 _sm3_mb_mgr_submit_avx512(ISAL_SM3_MB_JOB_MGR *state, ISAL_SM3_JOB *job);
61 ISAL_SM3_JOB *
62 _sm3_mb_mgr_flush_avx512(ISAL_SM3_MB_JOB_MGR *state);
63
64 void
_sm3_mb_mgr_init_avx512(ISAL_SM3_MB_JOB_MGR * state)65 _sm3_mb_mgr_init_avx512(ISAL_SM3_MB_JOB_MGR *state)
66 {
67 unsigned int j;
68
69 memset(state, 0, sizeof(*state));
70 state->unused_lanes = 0xfedcba9876543210;
71 state->num_lanes_inuse = 0;
72 for (j = 0; j < ISAL_SM3_MAX_LANES; j++) {
73 state->lens[j] = 0;
74 state->ldata[j].job_in_lane = 0;
75 }
76 }
77
78 void
_sm3_ctx_mgr_init_avx512(ISAL_SM3_HASH_CTX_MGR * mgr)79 _sm3_ctx_mgr_init_avx512(ISAL_SM3_HASH_CTX_MGR *mgr)
80 {
81 _sm3_mb_mgr_init_avx512(&mgr->mgr);
82 }
83
84 ISAL_SM3_HASH_CTX *
_sm3_ctx_mgr_submit_avx512(ISAL_SM3_HASH_CTX_MGR * mgr,ISAL_SM3_HASH_CTX * ctx,const void * buffer,uint32_t len,ISAL_HASH_CTX_FLAG flags)85 _sm3_ctx_mgr_submit_avx512(ISAL_SM3_HASH_CTX_MGR *mgr, ISAL_SM3_HASH_CTX *ctx, const void *buffer,
86 uint32_t len, ISAL_HASH_CTX_FLAG flags)
87 {
88 if (flags & (~ISAL_HASH_ENTIRE)) {
89 // User should not pass anything other than FIRST, UPDATE, or LAST
90 ctx->error = ISAL_HASH_CTX_ERROR_INVALID_FLAGS;
91 return ctx;
92 }
93
94 if (ctx->status & ISAL_HASH_CTX_STS_PROCESSING) {
95 // Cannot submit to a currently processing job.
96 ctx->error = ISAL_HASH_CTX_ERROR_ALREADY_PROCESSING;
97 return ctx;
98 }
99
100 if ((ctx->status & ISAL_HASH_CTX_STS_COMPLETE) && !(flags & ISAL_HASH_FIRST)) {
101 // Cannot update a finished job.
102 ctx->error = ISAL_HASH_CTX_ERROR_ALREADY_COMPLETED;
103 return ctx;
104 }
105
106 if (flags & ISAL_HASH_FIRST) {
107 // Init digest
108 hash_init_digest(ctx->job.result_digest);
109
110 // Reset byte counter
111 ctx->total_length = 0;
112
113 // Clear extra blocks
114 ctx->partial_block_buffer_length = 0;
115 }
116 ctx->error = ISAL_HASH_CTX_ERROR_NONE;
117
118 // Store buffer ptr info from user
119 ctx->incoming_buffer = buffer;
120 ctx->incoming_buffer_length = len;
121
122 ctx->status = (flags & ISAL_HASH_LAST) ? (ISAL_HASH_CTX_STS) (ISAL_HASH_CTX_STS_PROCESSING |
123 ISAL_HASH_CTX_STS_LAST)
124 : ISAL_HASH_CTX_STS_PROCESSING;
125
126 // Advance byte counter
127 ctx->total_length += len;
128
129 // if partial_block_buffer_length != 0 means ctx get extra data
130 // len < ISAL_SM3_BLOCK_SIZE means data len < ISAL_SM3_BLOCK_SIZE
131 if ((ctx->partial_block_buffer_length) | (len < ISAL_SM3_BLOCK_SIZE)) {
132 // Compute how many bytes to copy from user buffer into extra block
133 uint32_t copy_len = ISAL_SM3_BLOCK_SIZE - ctx->partial_block_buffer_length;
134 if (len < copy_len)
135 copy_len = len;
136
137 if (copy_len) {
138 // Copy and update relevant pointers and counters
139 memcpy_varlen(&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
140 buffer, copy_len);
141
142 ctx->partial_block_buffer_length += copy_len;
143 ctx->incoming_buffer = (const void *) ((const char *) buffer + copy_len);
144 ctx->incoming_buffer_length = len - copy_len;
145 }
146 // The extra block should never contain more than 1 block here
147 assert(ctx->partial_block_buffer_length <= ISAL_SM3_BLOCK_SIZE);
148
149 // If the extra block buffer contains exactly 1 block, it can be hashed.
150 if (ctx->partial_block_buffer_length >= ISAL_SM3_BLOCK_SIZE) {
151
152 ctx->partial_block_buffer_length = 0;
153 ctx->job.buffer = ctx->partial_block_buffer;
154
155 ctx->job.len = 1;
156 ctx = (ISAL_SM3_HASH_CTX *) _sm3_mb_mgr_submit_avx512(&mgr->mgr, &ctx->job);
157 }
158 }
159
160 return sm3_ctx_mgr_resubmit(mgr, ctx);
161 }
162
163 static ISAL_SM3_HASH_CTX *
sm3_ctx_mgr_resubmit(ISAL_SM3_HASH_CTX_MGR * mgr,ISAL_SM3_HASH_CTX * ctx)164 sm3_ctx_mgr_resubmit(ISAL_SM3_HASH_CTX_MGR *mgr, ISAL_SM3_HASH_CTX *ctx)
165 {
166 while (ctx) {
167 if (ctx->status & ISAL_HASH_CTX_STS_COMPLETE) {
168 unsigned int j;
169 ctx->status = ISAL_HASH_CTX_STS_COMPLETE; // Clear PROCESSING bit
170 for (j = 0; j < ISAL_SM3_DIGEST_NWORDS; j++) {
171 ctx->job.result_digest[j] = byteswap32(ctx->job.result_digest[j]);
172 }
173 return ctx;
174 }
175 // partial_block_buffer_length must be 0 that means incoming_buffer_length have not
176 // be init.
177 if (ctx->partial_block_buffer_length == 0 && ctx->incoming_buffer_length) {
178 const void *buffer = ctx->incoming_buffer;
179 uint32_t len = ctx->incoming_buffer_length;
180
181 // copy_len will check len % ISAL_SM3_BLOCK_SIZE ?= 0
182 uint32_t copy_len = len & (ISAL_SM3_BLOCK_SIZE - 1);
183
184 // if mod ISAL_SM3_BLOCK_SIZE != 0
185 if (copy_len) {
186 len -= copy_len;
187 memcpy_varlen(ctx->partial_block_buffer,
188 ((const char *) buffer + len), copy_len);
189 // store the extra data
190 ctx->partial_block_buffer_length = copy_len;
191 }
192
193 ctx->incoming_buffer_length = 0;
194 // after len -= copy_len or copy_len == 0
195 assert((len % ISAL_SM3_BLOCK_SIZE) == 0);
196 // get the block size , eq len = len / 64
197 len >>= ISAL_SM3_LOG2_BLOCK_SIZE;
198
199 if (len) {
200 ctx->job.buffer = (uint8_t *) buffer;
201 ctx->job.len = len;
202 ctx = (ISAL_SM3_HASH_CTX *) _sm3_mb_mgr_submit_avx512(&mgr->mgr,
203 &ctx->job);
204 continue;
205 }
206 }
207 // If the extra blocks are not empty, then we are either on the last block(s)
208 // or we need more user input before continuing.
209 if (ctx->status & ISAL_HASH_CTX_STS_LAST) {
210 uint8_t *buf = ctx->partial_block_buffer;
211 uint32_t n_extra_blocks = hash_pad(buf, ctx->total_length);
212
213 ctx->status = (ISAL_HASH_CTX_STS) (ISAL_HASH_CTX_STS_PROCESSING |
214 ISAL_HASH_CTX_STS_COMPLETE);
215 ctx->job.buffer = buf;
216 ctx->job.len = (uint32_t) n_extra_blocks;
217 ctx = (ISAL_SM3_HASH_CTX *) _sm3_mb_mgr_submit_avx512(&mgr->mgr, &ctx->job);
218 // todo make sure should return ?
219 continue;
220 }
221
222 if (ctx)
223 ctx->status = ISAL_HASH_CTX_STS_IDLE;
224 return ctx;
225 }
226
227 return NULL;
228 }
229
230 static inline uint32_t
hash_pad(uint8_t padblock[ISAL_SM3_BLOCK_SIZE * 2],uint64_t total_len)231 hash_pad(uint8_t padblock[ISAL_SM3_BLOCK_SIZE * 2], uint64_t total_len)
232 {
233 uint32_t i = (uint32_t) (total_len & (ISAL_SM3_BLOCK_SIZE - 1));
234
235 memclr_fixedlen(&padblock[i], ISAL_SM3_BLOCK_SIZE);
236 padblock[i] = 0x80;
237
238 // Move i to the end of either 1st or 2nd extra block depending on length
239 i += ((ISAL_SM3_BLOCK_SIZE - 1) & (0 - (total_len + ISAL_SM3_PADLENGTHFIELD_SIZE + 1))) +
240 1 + ISAL_SM3_PADLENGTHFIELD_SIZE;
241
242 #if ISAL_SM3_PADLENGTHFIELD_SIZE == 16
243 *((uint64_t *) &padblock[i - 16]) = 0;
244 #endif
245
246 *((uint64_t *) &padblock[i - 8]) = to_be64((uint64_t) total_len << 3);
247
248 return i >> ISAL_SM3_LOG2_BLOCK_SIZE; // Number of extra blocks to hash
249 }
250
251 ISAL_SM3_HASH_CTX *
_sm3_ctx_mgr_flush_avx512(ISAL_SM3_HASH_CTX_MGR * mgr)252 _sm3_ctx_mgr_flush_avx512(ISAL_SM3_HASH_CTX_MGR *mgr)
253 {
254
255 ISAL_SM3_HASH_CTX *ctx;
256
257 while (1) {
258 ctx = (ISAL_SM3_HASH_CTX *) _sm3_mb_mgr_flush_avx512(&mgr->mgr);
259
260 // If flush returned 0, there are no more jobs in flight.
261 if (!ctx)
262 return NULL;
263
264 // If flush returned a job, verify that it is safe to return to the user.
265 // If it is not ready, resubmit the job to finish processing.
266 ctx = sm3_ctx_mgr_resubmit(mgr, ctx);
267
268 // If sha256_ctx_mgr_resubmit returned a job, it is ready to be returned.
269 if (ctx)
270 return ctx;
271
272 // Otherwise, all jobs currently being managed by the SHA256_HASH_CTX_MGR still need
273 // processing. Loop.
274 }
275 }
276
277 static inline void
hash_init_digest(ISAL_SM3_WORD_T * digest)278 hash_init_digest(ISAL_SM3_WORD_T *digest)
279 {
280 static const ISAL_SM3_WORD_T hash_initial_digest[ISAL_SM3_DIGEST_NWORDS] = {
281 ISAL_SM3_INITIAL_DIGEST
282 };
283 memcpy_fixedlen(digest, hash_initial_digest, sizeof(hash_initial_digest));
284 }
285
286 struct slver {
287 uint16_t snum;
288 uint8_t ver;
289 uint8_t core;
290 };
291
292 struct slver _sm3_ctx_mgr_init_avx512_slver_0000;
293 struct slver _sm3_ctx_mgr_init_avx512_slver = { 0x2306, 0x00, 0x00 };
294
295 struct slver _sm3_ctx_mgr_submit_avx512_slver_0000;
296 struct slver _sm3_ctx_mgr_submit_avx512_slver = { 0x2307, 0x00, 0x00 };
297
298 struct slver _sm3_ctx_mgr_flush_avx512_slver_0000;
299 struct slver _sm3_ctx_mgr_flush_avx512_slver = { 0x2308, 0x00, 0x00 };
300
301 #if defined(__clang__)
302 #pragma clang attribute pop
303 #endif
304