1 /**********************************************************************
2 Copyright(c) 2011-2019 Intel Corporation All rights reserved.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in
11 the documentation and/or other materials provided with the
12 distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
29 #include <limits.h>
30 #include "erasure_code.h"
31 #include "ec_base.h" /* for GF tables */
32
33 #if __x86_64__ || __i386__ || _M_X64 || _M_IX86
34 void
ec_encode_data_sse(int len,int k,int rows,unsigned char * g_tbls,unsigned char ** data,unsigned char ** coding)35 ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
36 unsigned char **coding)
37 {
38
39 if (len < 16) {
40 ec_encode_data_base(len, k, rows, g_tbls, data, coding);
41 return;
42 }
43
44 while (rows >= 6) {
45 gf_6vect_dot_prod_sse(len, k, g_tbls, data, coding);
46 g_tbls += 6 * k * 32;
47 coding += 6;
48 rows -= 6;
49 }
50 switch (rows) {
51 case 5:
52 gf_5vect_dot_prod_sse(len, k, g_tbls, data, coding);
53 break;
54 case 4:
55 gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding);
56 break;
57 case 3:
58 gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding);
59 break;
60 case 2:
61 gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding);
62 break;
63 case 1:
64 gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding);
65 break;
66 case 0:
67 break;
68 }
69 }
70
71 void
ec_encode_data_avx(int len,int k,int rows,unsigned char * g_tbls,unsigned char ** data,unsigned char ** coding)72 ec_encode_data_avx(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
73 unsigned char **coding)
74 {
75 if (len < 16) {
76 ec_encode_data_base(len, k, rows, g_tbls, data, coding);
77 return;
78 }
79
80 while (rows >= 6) {
81 gf_6vect_dot_prod_avx(len, k, g_tbls, data, coding);
82 g_tbls += 6 * k * 32;
83 coding += 6;
84 rows -= 6;
85 }
86 switch (rows) {
87 case 5:
88 gf_5vect_dot_prod_avx(len, k, g_tbls, data, coding);
89 break;
90 case 4:
91 gf_4vect_dot_prod_avx(len, k, g_tbls, data, coding);
92 break;
93 case 3:
94 gf_3vect_dot_prod_avx(len, k, g_tbls, data, coding);
95 break;
96 case 2:
97 gf_2vect_dot_prod_avx(len, k, g_tbls, data, coding);
98 break;
99 case 1:
100 gf_vect_dot_prod_avx(len, k, g_tbls, data, *coding);
101 break;
102 case 0:
103 break;
104 }
105 }
106
107 void
ec_encode_data_avx2(int len,int k,int rows,unsigned char * g_tbls,unsigned char ** data,unsigned char ** coding)108 ec_encode_data_avx2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
109 unsigned char **coding)
110 {
111
112 if (len < 32) {
113 ec_encode_data_base(len, k, rows, g_tbls, data, coding);
114 return;
115 }
116
117 while (rows >= 6) {
118 gf_6vect_dot_prod_avx2(len, k, g_tbls, data, coding);
119 g_tbls += 6 * k * 32;
120 coding += 6;
121 rows -= 6;
122 }
123 switch (rows) {
124 case 5:
125 gf_5vect_dot_prod_avx2(len, k, g_tbls, data, coding);
126 break;
127 case 4:
128 gf_4vect_dot_prod_avx2(len, k, g_tbls, data, coding);
129 break;
130 case 3:
131 gf_3vect_dot_prod_avx2(len, k, g_tbls, data, coding);
132 break;
133 case 2:
134 gf_2vect_dot_prod_avx2(len, k, g_tbls, data, coding);
135 break;
136 case 1:
137 gf_vect_dot_prod_avx2(len, k, g_tbls, data, *coding);
138 break;
139 case 0:
140 break;
141 }
142 }
143
144 #ifdef HAVE_AS_KNOWS_AVX512
145
146 extern int
147 gf_vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
148 unsigned char *dest);
149 extern int
150 gf_2vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
151 unsigned char **coding);
152 extern int
153 gf_3vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
154 unsigned char **coding);
155 extern int
156 gf_4vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
157 unsigned char **coding);
158 extern int
159 gf_5vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
160 unsigned char **coding);
161 extern int
162 gf_6vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
163 unsigned char **coding);
164 extern void
165 gf_vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
166 unsigned char *dest);
167 extern void
168 gf_2vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
169 unsigned char **dest);
170 extern void
171 gf_3vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
172 unsigned char **dest);
173 extern void
174 gf_4vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
175 unsigned char **dest);
176 extern void
177 gf_5vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
178 unsigned char **dest);
179 extern void
180 gf_6vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
181 unsigned char **dest);
182
183 void
ec_encode_data_avx512(int len,int k,int rows,unsigned char * g_tbls,unsigned char ** data,unsigned char ** coding)184 ec_encode_data_avx512(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
185 unsigned char **coding)
186 {
187
188 if (len < 64) {
189 ec_encode_data_base(len, k, rows, g_tbls, data, coding);
190 return;
191 }
192
193 while (rows >= 6) {
194 gf_6vect_dot_prod_avx512(len, k, g_tbls, data, coding);
195 g_tbls += 6 * k * 32;
196 coding += 6;
197 rows -= 6;
198 }
199 switch (rows) {
200 case 5:
201 gf_5vect_dot_prod_avx512(len, k, g_tbls, data, coding);
202 break;
203 case 4:
204 gf_4vect_dot_prod_avx512(len, k, g_tbls, data, coding);
205 break;
206 case 3:
207 gf_3vect_dot_prod_avx512(len, k, g_tbls, data, coding);
208 break;
209 case 2:
210 gf_2vect_dot_prod_avx512(len, k, g_tbls, data, coding);
211 break;
212 case 1:
213 gf_vect_dot_prod_avx512(len, k, g_tbls, data, *coding);
214 break;
215 case 0:
216 break;
217 }
218 }
219
220 void
ec_encode_data_update_avx512(int len,int k,int rows,int vec_i,unsigned char * g_tbls,unsigned char * data,unsigned char ** coding)221 ec_encode_data_update_avx512(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
222 unsigned char *data, unsigned char **coding)
223 {
224 if (len < 64) {
225 ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
226 return;
227 }
228
229 while (rows >= 6) {
230 gf_6vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
231 g_tbls += 6 * k * 32;
232 coding += 6;
233 rows -= 6;
234 }
235 switch (rows) {
236 case 5:
237 gf_5vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
238 break;
239 case 4:
240 gf_4vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
241 break;
242 case 3:
243 gf_3vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
244 break;
245 case 2:
246 gf_2vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
247 break;
248 case 1:
249 gf_vect_mad_avx512(len, k, vec_i, g_tbls, data, *coding);
250 break;
251 case 0:
252 break;
253 }
254 }
255
256 #if AS_FEATURE_LEVEL >= 10
257
258 extern void
259 gf_vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
260 unsigned char *dest);
261 extern void
262 gf_2vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
263 unsigned char **coding);
264 extern void
265 gf_3vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
266 unsigned char **coding);
267 extern void
268 gf_4vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
269 unsigned char **coding);
270 extern void
271 gf_5vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
272 unsigned char **coding);
273 extern void
274 gf_6vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
275 unsigned char **coding);
276
277 extern void
278 gf_vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
279 unsigned char *dest);
280 extern void
281 gf_2vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
282 unsigned char **dest);
283 extern void
284 gf_3vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
285 unsigned char **dest);
286 extern void
287 gf_4vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
288 unsigned char **dest);
289 extern void
290 gf_5vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
291 unsigned char **dest);
292 extern void
293 gf_6vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
294 unsigned char **dest);
295
296 extern void
297 gf_vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
298 unsigned char *dest);
299 extern void
300 gf_2vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
301 unsigned char **coding);
302 extern void
303 gf_3vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
304 unsigned char **coding);
305 extern void
306 gf_vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
307 unsigned char *dest);
308 extern void
309 gf_2vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
310 unsigned char **dest);
311 extern void
312 gf_3vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
313 unsigned char **dest);
314 extern void
315 gf_4vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
316 unsigned char **dest);
317 extern void
318 gf_5vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
319 unsigned char **dest);
320
321 void
ec_init_tables_gfni(int k,int rows,unsigned char * a,unsigned char * g_tbls)322 ec_init_tables_gfni(int k, int rows, unsigned char *a, unsigned char *g_tbls)
323 {
324 int i, j;
325
326 uint64_t *g64 = (uint64_t *) g_tbls;
327
328 for (i = 0; i < rows; i++)
329 for (j = 0; j < k; j++)
330 *(g64++) = gf_table_gfni[*a++];
331 }
332
333 void
ec_encode_data_avx512_gfni(int len,int k,int rows,unsigned char * g_tbls,unsigned char ** data,unsigned char ** coding)334 ec_encode_data_avx512_gfni(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
335 unsigned char **coding)
336 {
337
338 while (rows >= 6) {
339 gf_6vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
340 g_tbls += 6 * k * 8;
341 coding += 6;
342 rows -= 6;
343 }
344 switch (rows) {
345 case 5:
346 gf_5vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
347 break;
348 case 4:
349 gf_4vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
350 break;
351 case 3:
352 gf_3vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
353 break;
354 case 2:
355 gf_2vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
356 break;
357 case 1:
358 gf_vect_dot_prod_avx512_gfni(len, k, g_tbls, data, *coding);
359 break;
360 case 0:
361 default:
362 break;
363 }
364 }
365
366 void
ec_encode_data_avx2_gfni(int len,int k,int rows,unsigned char * g_tbls,unsigned char ** data,unsigned char ** coding)367 ec_encode_data_avx2_gfni(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
368 unsigned char **coding)
369 {
370 while (rows >= 3) {
371 gf_3vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding);
372 g_tbls += 3 * k * 8;
373 coding += 3;
374 rows -= 3;
375 }
376 switch (rows) {
377 case 2:
378 gf_2vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding);
379 break;
380 case 1:
381 gf_vect_dot_prod_avx2_gfni(len, k, g_tbls, data, *coding);
382 break;
383 case 0:
384 default:
385 break;
386 }
387 }
388
389 void
ec_encode_data_update_avx512_gfni(int len,int k,int rows,int vec_i,unsigned char * g_tbls,unsigned char * data,unsigned char ** coding)390 ec_encode_data_update_avx512_gfni(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
391 unsigned char *data, unsigned char **coding)
392 {
393 while (rows >= 6) {
394 gf_6vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
395 g_tbls += 6 * k * 8;
396 coding += 6;
397 rows -= 6;
398 }
399 switch (rows) {
400 case 5:
401 gf_5vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
402 break;
403 case 4:
404 gf_4vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
405 break;
406 case 3:
407 gf_3vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
408 break;
409 case 2:
410 gf_2vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
411 break;
412 case 1:
413 gf_vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, *coding);
414 break;
415 case 0:
416 default:
417 break;
418 }
419 }
420
421 void
ec_encode_data_update_avx2_gfni(int len,int k,int rows,int vec_i,unsigned char * g_tbls,unsigned char * data,unsigned char ** coding)422 ec_encode_data_update_avx2_gfni(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
423 unsigned char *data, unsigned char **coding)
424 {
425 while (rows >= 5) {
426 gf_5vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
427 g_tbls += 5 * k * 8;
428 coding += 5;
429 rows -= 5;
430 }
431 switch (rows) {
432 case 4:
433 gf_4vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
434 break;
435 case 3:
436 gf_3vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
437 break;
438 case 2:
439 gf_2vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
440 break;
441 case 1:
442 gf_vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, *coding);
443 break;
444 case 0:
445 default:
446 break;
447 }
448 }
449
450 #endif // AS_FEATURE_LEVEL >= 10
451 #endif // HAVE_AS_KNOWS_AVX512
452
453 #if __WORDSIZE == 64 || _WIN64 || __x86_64__
454
455 void
ec_encode_data_update_sse(int len,int k,int rows,int vec_i,unsigned char * g_tbls,unsigned char * data,unsigned char ** coding)456 ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
457 unsigned char *data, unsigned char **coding)
458 {
459 if (len < 16) {
460 ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
461 return;
462 }
463
464 while (rows > 6) {
465 gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
466 g_tbls += 6 * k * 32;
467 coding += 6;
468 rows -= 6;
469 }
470 switch (rows) {
471 case 6:
472 gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
473 break;
474 case 5:
475 gf_5vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
476 break;
477 case 4:
478 gf_4vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
479 break;
480 case 3:
481 gf_3vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
482 break;
483 case 2:
484 gf_2vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
485 break;
486 case 1:
487 gf_vect_mad_sse(len, k, vec_i, g_tbls, data, *coding);
488 break;
489 case 0:
490 break;
491 }
492 }
493
494 void
ec_encode_data_update_avx(int len,int k,int rows,int vec_i,unsigned char * g_tbls,unsigned char * data,unsigned char ** coding)495 ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
496 unsigned char *data, unsigned char **coding)
497 {
498 if (len < 16) {
499 ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
500 return;
501 }
502 while (rows > 6) {
503 gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
504 g_tbls += 6 * k * 32;
505 coding += 6;
506 rows -= 6;
507 }
508 switch (rows) {
509 case 6:
510 gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
511 break;
512 case 5:
513 gf_5vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
514 break;
515 case 4:
516 gf_4vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
517 break;
518 case 3:
519 gf_3vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
520 break;
521 case 2:
522 gf_2vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
523 break;
524 case 1:
525 gf_vect_mad_avx(len, k, vec_i, g_tbls, data, *coding);
526 break;
527 case 0:
528 break;
529 }
530 }
531
532 void
ec_encode_data_update_avx2(int len,int k,int rows,int vec_i,unsigned char * g_tbls,unsigned char * data,unsigned char ** coding)533 ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
534 unsigned char *data, unsigned char **coding)
535 {
536 if (len < 32) {
537 ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
538 return;
539 }
540 while (rows > 6) {
541 gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
542 g_tbls += 6 * k * 32;
543 coding += 6;
544 rows -= 6;
545 }
546 switch (rows) {
547 case 6:
548 gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
549 break;
550 case 5:
551 gf_5vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
552 break;
553 case 4:
554 gf_4vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
555 break;
556 case 3:
557 gf_3vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
558 break;
559 case 2:
560 gf_2vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
561 break;
562 case 1:
563 gf_vect_mad_avx2(len, k, vec_i, g_tbls, data, *coding);
564 break;
565 case 0:
566 break;
567 }
568 }
569
570 #endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
571 #endif //__x86_64__ || __i386__ || _M_X64 || _M_IX86
572